From owner-freebsd-current@FreeBSD.ORG Mon Nov 26 20:37:10 2007 Return-Path: Delivered-To: freebsd-current@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 2E22E16A419; Mon, 26 Nov 2007 20:37:10 +0000 (UTC) (envelope-from rwatson@FreeBSD.org) Received: from cyrus.watson.org (cyrus.watson.org [209.31.154.42]) by mx1.freebsd.org (Postfix) with ESMTP id D9ECF13C458; Mon, 26 Nov 2007 20:37:09 +0000 (UTC) (envelope-from rwatson@FreeBSD.org) Received: from fledge.watson.org (fledge.watson.org [209.31.154.41]) by cyrus.watson.org (Postfix) with ESMTP id 3C83446F33; Mon, 26 Nov 2007 15:40:46 -0500 (EST) Date: Mon, 26 Nov 2007 20:37:02 +0000 (GMT) From: Robert Watson X-X-Sender: robert@fledge.watson.org To: Max Laier In-Reply-To: <200711231232.04447.max@love2party.net> Message-ID: <20071126203514.X65286@fledge.watson.org> References: <200711231232.04447.max@love2party.net> MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII; format=flowed Cc: freebsd-net@freebsd.org, freebsd-current@freebsd.org Subject: Re: Switch pfil(9) to rmlocks X-BeenThere: freebsd-current@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Discussions about the use of FreeBSD-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 26 Nov 2007 20:37:10 -0000 On Fri, 23 Nov 2007, Max Laier wrote: > attached is a diff to switch the pfil(9) subsystem to rmlocks, which are > more suited for the task. I'd like some exposure before doing the switch, > but I don't expect any fallout. This email is going through the patched > pfil already - twice. FYI, since people are experimenting with rmlocks as a substitute for rwlocks, I played with moving the global rwlock used to protect the name space and linkage of UNIX domain sockets to be an rmlock. Kris didn't see any measurable change in performance for his MySQL benchmarks, but I figured I'd post the patches as they give a sense of what change impact things like reader state management have on code. Attached below. I have no current plans to commit these changes as they appear not to offer benefit (either because the rwlock overhead was negigible compared to other costs in the benchmark, or because the read/write blend was too scewed towards writes -- I think probably the former rather than the latter). Robert N M Watson Computer Laboratory University of Cambridge ==== //depot/vendor/freebsd/src/sys/kern/uipc_usrreq.c#141 (text+ko) - //depot/user/rwatson/proto/src/sys/kern/uipc_usrreq.c#59 (text+ko) ==== content @@ -79,6 +79,7 @@ #include #include #include +#include #include #include #include @@ -196,6 +197,35 @@ * binding, both of which involve dropping UNIX domain socket locks in order * to perform namei() and other file system operations. */ + +#define USE_RMLOCKS +#ifdef USE_RMLOCKS + +static struct rmlock unp_global_rmlock; + +#define UNP_GLOBAL_LOCK_INIT() rm_init(&unp_global_rmlock, \ + "unp_global_rmlock", 0) + +#define UNP_GLOBAL_LOCK_ASSERT() /*rm_assert(&unp_global_rmlock, \ + RA_LOCKED) */ +#define UNP_GLOBAL_UNLOCK_ASSERT() /*rm_assert(&unp_global_rmlock, \ + RA_UNLOCKED) */ + +#define UNP_GLOBAL_WLOCK() rm_wlock(&unp_global_rmlock) +#define UNP_GLOBAL_WUNLOCK() rm_wunlock(&unp_global_rmlock) +#define UNP_GLOBAL_WLOCK_ASSERT() /*rm_assert(&unp_global_rmlock, \ + RA_WLOCKED) */ +#define UNP_GLOBAL_WOWNED() rm_wowned(&unp_global_rmlock) + +#define UNP_GLOBAL_RLOCK() rm_rlock(&unp_global_rmlock, &tr) +#define UNP_GLOBAL_RUNLOCK() rm_runlock(&unp_global_rmlock, &tr) +#define UNP_GLOBAL_RLOCK_ASSERT() /*rm_assert(&unp_global_rmlock, \ + RA_RLOCKED) */ + +#define RMLOCK_DECL struct rm_priotracker tr + +#else /* !USE_RMLOCKS */ + static struct rwlock unp_global_rwlock; #define UNP_GLOBAL_LOCK_INIT() rw_init(&unp_global_rwlock, \ @@ -217,6 +247,10 @@ #define UNP_GLOBAL_RLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ RA_RLOCKED) +#define RMLOCK_DECL __unused struct rm_priotracker tr + +#endif /* !USE_RMLOCKS */ + #define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \ "unp_mtx", "unp_mtx", \ MTX_DUPOK|MTX_DEF|MTX_RECURSE) @@ -225,9 +259,11 @@ #define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx) #define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED) + static int unp_connect(struct socket *, struct sockaddr *, - struct thread *); -static int unp_connect2(struct socket *so, struct socket *so2, int); + struct thread *, struct rm_priotracker *); +static int unp_connect2(struct socket *so, struct socket *so2, int, + struct rm_priotracker *); static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2); static void unp_shutdown(struct unpcb *); static void unp_drop(struct unpcb *, int); @@ -295,6 +331,7 @@ { struct unpcb *unp, *unp2; const struct sockaddr *sa; + RMLOCK_DECL; /* * Pass back name of connected socket, if it was bound and we are @@ -493,10 +530,11 @@ uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { int error; + RMLOCK_DECL; KASSERT(td == curthread, ("uipc_connect: td != curthread")); UNP_GLOBAL_WLOCK(); - error = unp_connect(so, nam, td); + error = unp_connect(so, nam, td, &tr); UNP_GLOBAL_WUNLOCK(); return (error); } @@ -526,6 +564,7 @@ { struct unpcb *unp, *unp2; int error; + RMLOCK_DECL; UNP_GLOBAL_WLOCK(); unp = so1->so_pcb; @@ -534,7 +573,7 @@ unp2 = so2->so_pcb; KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL")); UNP_PCB_LOCK(unp2); - error = unp_connect2(so1, so2, PRU_CONNECT2); + error = unp_connect2(so1, so2, PRU_CONNECT2, &tr); UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); UNP_GLOBAL_WUNLOCK(); @@ -753,6 +792,7 @@ u_int mbcnt, sbcc; u_long newhiwat; int error = 0; + RMLOCK_DECL; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_send: unp == NULL")); @@ -782,7 +822,7 @@ error = EISCONN; break; } - error = unp_connect(so, nam, td); + error = unp_connect(so, nam, td, &tr); if (error) break; unp2 = unp->unp_conn; @@ -836,7 +876,7 @@ if ((so->so_state & SS_ISCONNECTED) == 0) { if (nam != NULL) { UNP_GLOBAL_WLOCK_ASSERT(); - error = unp_connect(so, nam, td); + error = unp_connect(so, nam, td, &tr); if (error) break; /* XXX */ } else { @@ -938,6 +978,7 @@ { struct unpcb *unp, *unp2; struct socket *so2; + RMLOCK_DECL; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_sense: unp == NULL")); @@ -1110,7 +1151,8 @@ } static int -unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) +unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td, + struct rm_priotracker *tr) { struct sockaddr_un *soun = (struct sockaddr_un *)nam; struct vnode *vp; @@ -1249,7 +1291,7 @@ KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL")); UNP_PCB_LOCK(unp); UNP_PCB_LOCK(unp2); - error = unp_connect2(so, so2, PRU_CONNECT); + error = unp_connect2(so, so2, PRU_CONNECT, tr); UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); bad2: @@ -1273,7 +1315,8 @@ } static int -unp_connect2(struct socket *so, struct socket *so2, int req) +unp_connect2(struct socket *so, struct socket *so2, int req, + struct rm_priotracker *tr) { struct unpcb *unp; struct unpcb *unp2; @@ -1359,6 +1402,7 @@ struct xunpgen *xug; struct unp_head *head; struct xunpcb *xu; + RMLOCK_DECL; head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);