From owner-svn-src-all@FreeBSD.ORG Sat Sep 21 22:13:18 2013 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTP id F113D7E3; Sat, 21 Sep 2013 22:13:17 +0000 (UTC) (envelope-from rmacklem@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.freebsd.org (Postfix) with ESMTPS id DC54D2B94; Sat, 21 Sep 2013 22:13:17 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.7/8.14.7) with ESMTP id r8LMDHhs007254; Sat, 21 Sep 2013 22:13:17 GMT (envelope-from rmacklem@svn.freebsd.org) Received: (from rmacklem@localhost) by svn.freebsd.org (8.14.7/8.14.5/Submit) id r8LMDHPL007252; Sat, 21 Sep 2013 22:13:17 GMT (envelope-from rmacklem@svn.freebsd.org) Message-Id: <201309212213.r8LMDHPL007252@svn.freebsd.org> From: Rick Macklem Date: Sat, 21 Sep 2013 22:13:17 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org Subject: svn commit: r255773 - stable/9/sys/nlm X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 21 Sep 2013 22:13:18 -0000 Author: rmacklem Date: Sat Sep 21 22:13:17 2013 New Revision: 255773 URL: http://svnweb.freebsd.org/changeset/base/255773 Log: MFC: r255333 Intermittent crashes in the NLM (rpc.lockd) code during system shutdown was reporetd via email. The crashes occurred because the client side NLM would attempt to use its socket after it had been destroyed. Looking at the code, it would soclose() once the reference count on the socket handling structure went to 0. Unfortunately, nlm_host_get_rpc() will simply allocate a new socket handling structure when none exists and use the now soclose()d socket. Since there doesn't seem to be a safe way to determine when the socket is no longer needed, this patch modifies the code so that it never soclose()es the socket. Since there is only one socket ever created, this does not introduce a leak when the rpc.lockd is stopped/restarted. The patch also disables unloading of the nfslockd module, since it is not safe to do so (and has never been safe to do so, from what I can see). Modified: stable/9/sys/nlm/nlm_prot_impl.c Directory Properties: stable/9/sys/ (props changed) Modified: stable/9/sys/nlm/nlm_prot_impl.c ============================================================================== --- stable/9/sys/nlm/nlm_prot_impl.c Sat Sep 21 22:12:09 2013 (r255772) +++ stable/9/sys/nlm/nlm_prot_impl.c Sat Sep 21 22:13:17 2013 (r255773) @@ -133,6 +133,11 @@ static time_t nlm_grace_threshold; static time_t nlm_next_idle_check; /* + * A flag to indicate the server is already running. + */ +static int nlm_is_running; + +/* * A socket to use for RPC - shared by all IPv4 RPC clients. */ static struct socket *nlm_socket; @@ -1526,51 +1531,51 @@ nlm_server_main(int addr_count, char **a struct nlm_waiting_lock *nw; vop_advlock_t *old_nfs_advlock; vop_reclaim_t *old_nfs_reclaim; - int v4_used; -#ifdef INET6 - int v6_used; -#endif - if (nlm_socket) { + if (nlm_is_running != 0) { NLM_ERR("NLM: can't start server - " "it appears to be running already\n"); return (EPERM); } - memset(&opt, 0, sizeof(opt)); + if (nlm_socket == NULL) { + memset(&opt, 0, sizeof(opt)); - nlm_socket = NULL; - error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, - td->td_ucred, td); - if (error) { - NLM_ERR("NLM: can't create IPv4 socket - error %d\n", error); - return (error); - } - opt.sopt_dir = SOPT_SET; - opt.sopt_level = IPPROTO_IP; - opt.sopt_name = IP_PORTRANGE; - portlow = IP_PORTRANGE_LOW; - opt.sopt_val = &portlow; - opt.sopt_valsize = sizeof(portlow); - sosetopt(nlm_socket, &opt); + error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, + td->td_ucred, td); + if (error) { + NLM_ERR("NLM: can't create IPv4 socket - error %d\n", + error); + return (error); + } + opt.sopt_dir = SOPT_SET; + opt.sopt_level = IPPROTO_IP; + opt.sopt_name = IP_PORTRANGE; + portlow = IP_PORTRANGE_LOW; + opt.sopt_val = &portlow; + opt.sopt_valsize = sizeof(portlow); + sosetopt(nlm_socket, &opt); #ifdef INET6 - nlm_socket6 = NULL; - error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, - td->td_ucred, td); - if (error) { - NLM_ERR("NLM: can't create IPv6 socket - error %d\n", error); - goto out; - return (error); - } - opt.sopt_dir = SOPT_SET; - opt.sopt_level = IPPROTO_IPV6; - opt.sopt_name = IPV6_PORTRANGE; - portlow = IPV6_PORTRANGE_LOW; - opt.sopt_val = &portlow; - opt.sopt_valsize = sizeof(portlow); - sosetopt(nlm_socket6, &opt); + nlm_socket6 = NULL; + error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, + td->td_ucred, td); + if (error) { + NLM_ERR("NLM: can't create IPv6 socket - error %d\n", + error); + soclose(nlm_socket); + nlm_socket = NULL; + return (error); + } + opt.sopt_dir = SOPT_SET; + opt.sopt_level = IPPROTO_IPV6; + opt.sopt_name = IPV6_PORTRANGE; + portlow = IPV6_PORTRANGE_LOW; + opt.sopt_val = &portlow; + opt.sopt_valsize = sizeof(portlow); + sosetopt(nlm_socket6, &opt); #endif + } nlm_auth = authunix_create(curthread->td_ucred); @@ -1622,6 +1627,7 @@ nlm_server_main(int addr_count, char **a error = EINVAL; goto out; } + nlm_is_running = 1; NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state); nlm_nsm_state = smstat.state; @@ -1638,6 +1644,7 @@ nlm_server_main(int addr_count, char **a nfs_reclaim_p = old_nfs_reclaim; out: + nlm_is_running = 0; if (pool) svcpool_destroy(pool); @@ -1661,13 +1668,8 @@ out: * nlm_hosts to the host (which may remove it from the list * and free it). After this phase, the only entries in the * nlm_host list should be from other threads performing - * client lock requests. We arrange to defer closing the - * sockets until the last RPC client handle is released. + * client lock requests. */ - v4_used = 0; -#ifdef INET6 - v6_used = 0; -#endif mtx_lock(&nlm_global_lock); TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { wakeup(nw); @@ -1678,43 +1680,10 @@ out: nlm_host_release(host); mtx_lock(&nlm_global_lock); } - TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) { - mtx_lock(&host->nh_lock); - if (host->nh_srvrpc.nr_client - || host->nh_clntrpc.nr_client) { - if (host->nh_addr.ss_family == AF_INET) - v4_used++; -#ifdef INET6 - if (host->nh_addr.ss_family == AF_INET6) - v6_used++; -#endif - /* - * Note that the rpc over udp code copes - * correctly with the fact that a socket may - * be used by many rpc handles. - */ - if (host->nh_srvrpc.nr_client) - CLNT_CONTROL(host->nh_srvrpc.nr_client, - CLSET_FD_CLOSE, 0); - if (host->nh_clntrpc.nr_client) - CLNT_CONTROL(host->nh_clntrpc.nr_client, - CLSET_FD_CLOSE, 0); - } - mtx_unlock(&host->nh_lock); - } mtx_unlock(&nlm_global_lock); AUTH_DESTROY(nlm_auth); - if (!v4_used) - soclose(nlm_socket); - nlm_socket = NULL; -#ifdef INET6 - if (!v6_used) - soclose(nlm_socket6); - nlm_socket6 = NULL; -#endif - return (error); } @@ -2438,7 +2407,15 @@ static int nfslockd_modevent(module_t mod, int type, void *data) { - return (0); + switch (type) { + case MOD_LOAD: + return (0); + case MOD_UNLOAD: + /* The NLM module cannot be safely unloaded. */ + /* FALLTHROUGH */ + default: + return (EOPNOTSUPP); + } } static moduledata_t nfslockd_mod = { "nfslockd",