Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 22 Nov 2014 01:28:12 +0100
From:      Mateusz Guzik <mjguzik@gmail.com>
To:        freebsd-fs@freebsd.org
Subject:   atomic v_usecount and v_holdcnt
Message-ID:  <20141122002812.GA32289@dft-labs.eu>

next in thread | raw e-mail | index | archive | help
The idea is that we don't need an interlock as long as we don't
transition either counter 1->0 or 0->1.

Patch itself is more of a PoC, so I didn't rename vholdl & friends just
yet.

It helps during lookups with same vnodes since the interlock which was
taken twice served as a serializatin point and this effect is now
reduced.

There are other places which can avoid VI_LOCK + vget scheme.

Patch below survived make -j 40 buildworld, poudriere with 40 workers
etc on a 2 package(s) x 10 core(s) x 2 SMT threads machine with and
without debugs (including DEBUG_VFS_LOCKS).

Perf difference:

in a crap microbenchmark of 40 threads doing a stat on
/foo/bar/baz/quux${N}, where each thread stats a separate file I got
over 4 times speed up on tmpfs.

Comments?

diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c
index 83f29c1..b587ebd 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c
@@ -99,6 +99,6 @@ vn_rele_async(vnode_t *vp, taskq_t *taskq)
 		    (task_func_t *)vn_rele_inactive, vp, TQ_SLEEP) != 0);
 		return;
 	}
-	vp->v_usecount--;
+	refcount_release(&vp->v_usecount);
 	vdropl(vp);
 }
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 55e3217..ec26d35 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -665,12 +665,12 @@ success:
 		ltype = VOP_ISLOCKED(dvp);
 		VOP_UNLOCK(dvp, 0);
 	}
-	VI_LOCK(*vpp);
+	vholdl(*vpp);
 	if (wlocked)
 		CACHE_WUNLOCK();
 	else
 		CACHE_RUNLOCK();
-	error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread);
+	error = vget_held(*vpp, cnp->cn_lkflags, cnp->cn_thread);
 	if (cnp->cn_flags & ISDOTDOT) {
 		vn_lock(dvp, ltype | LK_RETRY);
 		if (dvp->v_iflag & VI_DOOMED) {
@@ -1376,9 +1376,9 @@ vn_dir_dd_ino(struct vnode *vp)
 		if ((ncp->nc_flag & NCF_ISDOTDOT) != 0)
 			continue;
 		ddvp = ncp->nc_dvp;
-		VI_LOCK(ddvp);
+		vholdl(ddvp);
 		CACHE_RUNLOCK();
-		if (vget(ddvp, LK_INTERLOCK | LK_SHARED | LK_NOWAIT, curthread))
+		if (vget_held(ddvp, LK_SHARED | LK_NOWAIT, curthread))
 			return (NULL);
 		return (ddvp);
 	}
diff --git a/sys/kern/vfs_hash.c b/sys/kern/vfs_hash.c
index 0271e49..d2fdbba 100644
--- a/sys/kern/vfs_hash.c
+++ b/sys/kern/vfs_hash.c
@@ -83,9 +83,9 @@ vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td, s
 				continue;
 			if (fn != NULL && fn(vp, arg))
 				continue;
-			VI_LOCK(vp);
+			vhold(vp);
 			mtx_unlock(&vfs_hash_mtx);
-			error = vget(vp, flags | LK_INTERLOCK, td);
+			error = vget_held(vp, flags, td);
 			if (error == ENOENT && (flags & LK_NOWAIT) == 0)
 				break;
 			if (error)
@@ -127,9 +127,9 @@ vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td, stru
 				continue;
 			if (fn != NULL && fn(vp2, arg))
 				continue;
-			VI_LOCK(vp2);
+			vhold(vp2);
 			mtx_unlock(&vfs_hash_mtx);
-			error = vget(vp2, flags | LK_INTERLOCK, td);
+			error = vget_held(vp2, flags, td);
 			if (error == ENOENT && (flags & LK_NOWAIT) == 0)
 				break;
 			mtx_lock(&vfs_hash_mtx);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 345aad6..53bdf0d 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -854,7 +854,7 @@ vnlru_free(int count)
 		 */
 		freevnodes--;
 		vp->v_iflag &= ~VI_FREE;
-		vp->v_holdcnt++;
+		refcount_acquire(&vp->v_holdcnt);
 
 		mtx_unlock(&vnode_free_list_mtx);
 		VI_UNLOCK(vp);
@@ -2052,12 +2052,7 @@ v_incr_usecount(struct vnode *vp)
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	vholdl(vp);
-	vp->v_usecount++;
-	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
-		dev_lock();
-		vp->v_rdev->si_usecount++;
-		dev_unlock();
-	}
+	v_upgrade_usecount(vp);
 }
 
 /*
@@ -2067,9 +2062,24 @@ v_incr_usecount(struct vnode *vp)
 static void
 v_upgrade_usecount(struct vnode *vp)
 {
+	int old, locked;
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-	vp->v_usecount++;
+retry:
+	old = vp->v_usecount;
+	if (old > 0) {
+		if (atomic_cmpset_int(&vp->v_usecount, old, old + 1))
+			goto dev;
+		goto retry;
+	}
+
+	locked = mtx_owned(VI_MTX(vp));
+	if (!locked)
+		VI_LOCK(vp);
+	refcount_acquire(&vp->v_usecount);
+	if (!locked)
+		VI_UNLOCK(vp);
+dev:
 	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
 		dev_lock();
 		vp->v_rdev->si_usecount++;
@@ -2086,16 +2096,10 @@ static void
 v_decr_usecount(struct vnode *vp)
 {
 
-	ASSERT_VI_LOCKED(vp, __FUNCTION__);
 	VNASSERT(vp->v_usecount > 0, vp,
 	    ("v_decr_usecount: negative usecount"));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-	vp->v_usecount--;
-	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
-		dev_lock();
-		vp->v_rdev->si_usecount--;
-		dev_unlock();
-	}
+	v_decr_useonly(vp);
 	vdropl(vp);
 }
 
@@ -2108,12 +2112,27 @@ v_decr_usecount(struct vnode *vp)
 static void
 v_decr_useonly(struct vnode *vp)
 {
+	int old, locked;
 
-	ASSERT_VI_LOCKED(vp, __FUNCTION__);
 	VNASSERT(vp->v_usecount > 0, vp,
 	    ("v_decr_useonly: negative usecount"));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-	vp->v_usecount--;
+retry:
+	old = vp->v_usecount;
+	if (old > 1) {
+		if (atomic_cmpset_int(&vp->v_usecount, old, old - 1)) {
+			goto dev;
+		}
+		goto retry;
+	}
+
+	locked = mtx_owned(VI_MTX(vp));
+	if (!locked)
+		VI_LOCK(vp);
+	refcount_release(&vp->v_usecount);
+	if (!locked)
+		VI_UNLOCK(vp);
+dev:
 	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
 		dev_lock();
 		vp->v_rdev->si_usecount--;
@@ -2129,19 +2148,15 @@ v_decr_useonly(struct vnode *vp)
  * vput try to do it here.
  */
 int
-vget(struct vnode *vp, int flags, struct thread *td)
+vget_held(struct vnode *vp, int flags, struct thread *td)
 {
 	int error;
 
-	error = 0;
 	VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
 	    ("vget: invalid lock operation"));
 	CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags);
 
-	if ((flags & LK_INTERLOCK) == 0)
-		VI_LOCK(vp);
-	vholdl(vp);
-	if ((error = vn_lock(vp, flags | LK_INTERLOCK)) != 0) {
+	if ((error = vn_lock(vp, flags)) != 0) {
 		vdrop(vp);
 		CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__,
 		    vp);
@@ -2149,7 +2164,6 @@ vget(struct vnode *vp, int flags, struct thread *td)
 	}
 	if (vp->v_iflag & VI_DOOMED && (flags & LK_RETRY) == 0)
 		panic("vget: vn_lock failed to return ENOENT\n");
-	VI_LOCK(vp);
 	/* Upgrade our holdcnt to a usecount. */
 	v_upgrade_usecount(vp);
 	/*
@@ -2159,15 +2173,26 @@ vget(struct vnode *vp, int flags, struct thread *td)
 	 * we don't succeed no harm is done.
 	 */
 	if (vp->v_iflag & VI_OWEINACT) {
-		if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE &&
-		    (flags & LK_NOWAIT) == 0)
-			vinactive(vp, td);
-		vp->v_iflag &= ~VI_OWEINACT;
+		VI_LOCK(vp);
+		if (vp->v_iflag & VI_OWEINACT) {
+			if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE &&
+			    (flags & LK_NOWAIT) == 0)
+				vinactive(vp, td);
+			vp->v_iflag &= ~VI_OWEINACT;
+		}
+		VI_UNLOCK(vp);
 	}
-	VI_UNLOCK(vp);
 	return (0);
 }
 
+int
+vget(struct vnode *vp, int flags, struct thread *td)
+{
+
+	vhold(vp);
+	return (vget_held(vp, flags, td));
+}
+
 /*
  * Increase the reference count of a vnode.
  */
@@ -2176,9 +2201,7 @@ vref(struct vnode *vp)
 {
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-	VI_LOCK(vp);
 	v_incr_usecount(vp);
-	VI_UNLOCK(vp);
 }
 
 /*
@@ -2195,9 +2218,7 @@ vrefcnt(struct vnode *vp)
 {
 	int usecnt;
 
-	VI_LOCK(vp);
 	usecnt = vp->v_usecount;
-	VI_UNLOCK(vp);
 
 	return (usecnt);
 }
@@ -2210,6 +2231,7 @@ static void
 vputx(struct vnode *vp, int func)
 {
 	int error;
+	int old;
 
 	KASSERT(vp != NULL, ("vputx: null vp"));
 	if (func == VPUTX_VUNREF)
@@ -2219,11 +2241,26 @@ vputx(struct vnode *vp, int func)
 	else
 		KASSERT(func == VPUTX_VRELE, ("vputx: wrong func"));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
+
+retry:
+	old = vp->v_usecount;
+	if (old > 1) {
+		if (atomic_cmpset_int(&vp->v_usecount, old, old - 1)) {
+			if (func == VPUTX_VPUT)
+				VOP_UNLOCK(vp, 0);
+			if (vp->v_type == VCHR && vp->v_rdev != NULL) {
+				dev_lock();
+				vp->v_rdev->si_usecount--;
+				dev_unlock();
+			}
+			vdropl(vp);
+			return;
+		}
+		goto retry;
+	}
+
 	VI_LOCK(vp);
 
-	/* Skip this v_writecount check if we're going to panic below. */
-	VNASSERT(vp->v_writecount < vp->v_usecount || vp->v_usecount < 1, vp,
-	    ("vputx: missed vn_close"));
 	error = 0;
 
 	if (vp->v_usecount > 1 || ((vp->v_iflag & VI_DOINGINACT) &&
@@ -2320,9 +2357,7 @@ void
 vhold(struct vnode *vp)
 {
 
-	VI_LOCK(vp);
 	vholdl(vp);
-	VI_UNLOCK(vp);
 }
 
 /*
@@ -2332,20 +2367,30 @@ void
 vholdl(struct vnode *vp)
 {
 	struct mount *mp;
+	int old;
+	int locked;
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-#ifdef INVARIANTS
-	/* getnewvnode() calls v_incr_usecount() without holding interlock. */
-	if (vp->v_type != VNON || vp->v_data != NULL) {
-		ASSERT_VI_LOCKED(vp, "vholdl");
-		VNASSERT(vp->v_holdcnt > 0 || (vp->v_iflag & VI_FREE) != 0,
-		    vp, ("vholdl: free vnode is held"));
+retry:
+	old = vp->v_holdcnt;
+	if (old > 0) {
+		if (atomic_cmpset_int(&vp->v_holdcnt, old, old + 1)) {
+			VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
+			    ("vholdl: vnode with usecount is free"));
+			return;
+		}
+		goto retry;
 	}
-#endif
-	vp->v_holdcnt++;
-	if ((vp->v_iflag & VI_FREE) == 0)
+	locked = mtx_owned(VI_MTX(vp));
+	if (!locked)
+		VI_LOCK(vp);
+	if ((vp->v_iflag & VI_FREE) == 0) {
+		refcount_acquire(&vp->v_holdcnt);
+		if (!locked)
+			VI_UNLOCK(vp);
 		return;
-	VNASSERT(vp->v_holdcnt == 1, vp, ("vholdl: wrong hold count"));
+	}
+	VNASSERT(vp->v_holdcnt == 0, vp, ("vholdl: wrong hold count"));
 	VNASSERT(vp->v_op != NULL, vp, ("vholdl: vnode already reclaimed."));
 	/*
 	 * Remove a vnode from the free list, mark it as in use,
@@ -2362,6 +2407,9 @@ vholdl(struct vnode *vp)
 	TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
 	mp->mnt_activevnodelistsize++;
 	mtx_unlock(&vnode_free_list_mtx);
+	refcount_acquire(&vp->v_holdcnt);
+	if (!locked)
+		VI_UNLOCK(vp);
 }
 
 /*
@@ -2372,7 +2420,6 @@ void
 vdrop(struct vnode *vp)
 {
 
-	VI_LOCK(vp);
 	vdropl(vp);
 }
 
@@ -2387,15 +2434,27 @@ vdropl(struct vnode *vp)
 	struct bufobj *bo;
 	struct mount *mp;
 	int active;
+	int old;
+	int locked;
 
-	ASSERT_VI_LOCKED(vp, "vdropl");
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	if (vp->v_holdcnt <= 0)
 		panic("vdrop: holdcnt %d", vp->v_holdcnt);
-	vp->v_holdcnt--;
-	VNASSERT(vp->v_holdcnt >= vp->v_usecount, vp,
-	    ("hold count less than use count"));
-	if (vp->v_holdcnt > 0) {
+	locked = mtx_owned(VI_MTX(vp));
+retry:
+	old = vp->v_holdcnt;
+	if (old > 1) {
+		if (atomic_cmpset_int(&vp->v_holdcnt, old, old -1)) {
+			if (locked)
+				VI_UNLOCK(vp);
+			return;
+		}
+		goto retry;
+	}
+
+	if (!locked)
+		VI_LOCK(vp);
+	if (refcount_release(&vp->v_holdcnt) == 0) {
 		VI_UNLOCK(vp);
 		return;
 	}
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index c78b9d1..87eee4a 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -651,6 +651,7 @@ void	vdrop(struct vnode *);
 void	vdropl(struct vnode *);
 int	vflush(struct mount *mp, int rootrefs, int flags, struct thread *td);
 int	vget(struct vnode *vp, int lockflag, struct thread *td);
+int	vget_held(struct vnode *vp, int lockflag, struct thread *td);
 void	vgone(struct vnode *vp);
 void	vhold(struct vnode *);
 void	vholdl(struct vnode *);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20141122002812.GA32289>