Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 22 Jun 2010 08:09:26 GMT
From:      Gleb Kurtsou <gk@FreeBSD.org>
To:        Perforce Change Reviews <perforce@FreeBSD.org>
Subject:   PERFORCE change 180094 for review
Message-ID:  <201006220809.o5M89QsF069614@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://p4web.freebsd.org/@@180094?ac=10

Change 180094 by gk@gk_h1 on 2010/06/22 08:09:25

	Change API dropping dircache_*update(), use granular entry locking

Affected files ...

.. //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs.h#2 edit
.. //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs_subr.c#2 edit
.. //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs_vfsops.c#2 edit
.. //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs_vnops.c#2 edit
.. //depot/projects/soc2010/gk_namecache/sys/fs/tmpfs/tmpfs_subr.c#3 edit
.. //depot/projects/soc2010/gk_namecache/sys/fs/tmpfs/tmpfs_vnops.c#3 edit
.. //depot/projects/soc2010/gk_namecache/sys/kern/vfs_dircache.c#3 edit
.. //depot/projects/soc2010/gk_namecache/sys/sys/dircache.h#3 edit

Differences ...

==== //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs.h#2 (text+ko) ====

@@ -103,7 +103,10 @@
 };
 
 #define PM_ROOT_CANRECURSE		0x01
-#define PM_DIRCACHE			0x02
+#define PM_DIRCACHE_NAMECACHE		0x02
+#define PM_DIRCACHE_VATTR		0x04
+
+#define PM_DIRCACHE			(PM_DIRCACHE_NAMECACHE | PM_DIRCACHE_VATTR)
 
 struct pefs_mount {
 	struct mount *pm_lowervfs;

==== //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs_subr.c#2 (text+ko) ====

@@ -68,6 +68,7 @@
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/dirent.h>
+#include <sys/dircache.h>
 #include <sys/queue.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
@@ -253,6 +254,11 @@
 	int buflen = *encname_len;
 
 	ASSERT_VOP_LOCKED(lvp, "pefs_node_lookup_name");
+
+	error = dircache_getname(lvp, encname, encname_len);
+	if (error == 0)
+		goto out;
+
 	locked = VOP_ISLOCKED(lvp);
 	if (ldvp) {
 		dlocked = VOP_ISLOCKED(ldvp);
@@ -276,6 +282,7 @@
 	if (error)
 		return (ENOENT);
 
+out:
 	memcpy(encname, encname + *encname_len, buflen - *encname_len);
 	*encname_len = buflen - *encname_len;
 	if (*encname_len < buflen)

==== //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs_vfsops.c#2 (text+ko) ====

@@ -46,6 +46,10 @@
 
 #include <fs/pefs/pefs.h>
 
+static const char *pefs_dircache_support_vattr[] = {
+	"zfs", NULL
+};
+
 static MALLOC_DEFINE(M_PEFSMNT, "pefs_mount", "PEFS mount structure");
 
 static const char *pefs_opts[] = {
@@ -55,24 +59,35 @@
 static void
 dircache_init(struct mount *mp, int opt, struct pefs_mount *pm)
 {
+	struct mount *lmp;
 	char *lowerfs;
-	int supported;
+	const char **support_vattr;
+	int support;
+
+	if (opt == 0)
+		return;
 
-	lowerfs = mp->mnt_vnodecovered->v_mount->mnt_vfc->vfc_name;
-	supported = (strcmp(lowerfs, "zfs") == 0 ||
-	    strcmp(lowerfs, "tmpfs") == 0);
-	if (opt < 0)
-		opt = supported;
-	else if (opt > 0 && supported == 0) {
+	support = 0;
+	lmp = mp->mnt_vnodecovered->v_mount;
+	lowerfs = lmp->mnt_vfc->vfc_name;
+	for (support_vattr = pefs_dircache_support_vattr;
+	    *support_vattr != NULL; support_vattr++)
+		if (strcmp(lowerfs, *support_vattr) == 0) {
+			support |= PM_DIRCACHE_VATTR;
+			break;
+		}
+	MNT_ILOCK(lmp);
+	if ((lmp->mnt_kern_flag & MNTK_DIRCACHE) != 0)
+		support |= PM_DIRCACHE_NAMECACHE;
+	MNT_IUNLOCK(lmp);
+	if (opt > 0 && support == 0) {
 		printf("pefs: dircache is not supported by filesystem: %s\n",
 		    lowerfs);
 		opt = 0;
 	}
 
-	if (opt == 0)
-		pm->pm_flags &= ~PM_DIRCACHE;
-	else
-		pm->pm_flags |= PM_DIRCACHE;
+	if (opt != 0)
+		pm->pm_flags |= support;
 	PEFSDEBUG("pefs_mount: dircache %s\n", (opt ? "enabled" : "disabed"));
 }
 

==== //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs_vnops.c#2 (text+ko) ====

@@ -62,6 +62,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
+#include <sys/dircache.h>
 #include <sys/ioccom.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
@@ -105,6 +106,13 @@
 	struct vattr va;
 	int error;
 
+	/* Prefer PM_DIRCACHE_VATTR */
+	if ((VFS_TO_PEFS(vp->v_mount)->pm_flags & PM_DIRCACHE) ==
+	    PM_DIRCACHE_NAMECACHE) {
+		va.va_gen = dircache_getgen(vp);
+		if (va.va_gen != 0)
+			return (va.va_gen);
+	}
 	error = VOP_GETATTR(PEFS_LOWERVP(vp), &va, cred);
 	if (error != 0)
 		return (0);

==== //depot/projects/soc2010/gk_namecache/sys/fs/tmpfs/tmpfs_subr.c#3 (text+ko) ====

@@ -401,6 +401,8 @@
 	error = insmntque(vp, mp);
 	if (error)
 		vp = NULL;
+	else
+		dircache_allocvnode(vp, node->tn_id);
 
 unlock:
 	TMPFS_NODE_LOCK(node);
@@ -520,8 +522,7 @@
 	 * insert the new node into the directory, an operation that
 	 * cannot fail. */
 	tmpfs_dir_attach(dvp, de);
-	dircache_add(dvp, *vpp, cnp, DT_STRONG, node->tn_id,
-	    tmpfs_dircookie(de));
+	dircache_add(dvp, *vpp, cnp, DT_STRONG, node->tn_id);
 
 out:
 
@@ -588,8 +589,7 @@
 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f,
     struct componentname *cnp)
 {
-	struct dircache_cursor curs;
-	boolean_t found, cache;
+	boolean_t found;
 	struct tmpfs_dirent *de;
 
 	MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
@@ -599,25 +599,16 @@
 
 
 	found = 0;
-	cache = dircache_beginupdate(&curs, node->tn_vnode, cnp,
-	    DC_OP_IFPARTIAL) == 0;
 	TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) {
-		if (cache)
-			dircache_update(&curs, DT_STRONG, de->td_name,
-			    de->td_namelen, de->td_node->tn_id,
-			    tmpfs_dircookie(de));
 		if (f != NULL && de->td_node != f)
 		    continue;
 		MPASS(cnp->cn_namelen < 0xffff);
 		if (de->td_namelen == (uint16_t)cnp->cn_namelen &&
 		    bcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) {
 			found = 1;
-			if (!cache)
-				break;
+			break;
 		}
 	}
-	if (cache)
-		dircache_completeupdate(&curs);
 	node->tn_status |= TMPFS_NODE_ACCESSED;
 
 	return found ? de : NULL;

==== //depot/projects/soc2010/gk_namecache/sys/fs/tmpfs/tmpfs_vnops.c#3 (text+ko) ====

@@ -913,7 +913,7 @@
 
 	/* Insert the new directory entry into the appropriate directory. */
 	tmpfs_dir_attach(dvp, de);
-	dircache_add(dvp, vp, cnp, DT_STRONG, node->tn_id, tmpfs_dircookie(de));
+	dircache_add(dvp, vp, cnp, DT_STRONG, node->tn_id);
 
 	/* vp link count has changed, so update node times. */
 	node->tn_status |= TMPFS_NODE_CHANGED;
@@ -1138,7 +1138,7 @@
 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de, TRUE);
 	}
 
-	dircache_rename(fdvp, fvp, fcnp, tdvp, tvp, tcnp);
+	dircache_rename(fdvp, fcnp, tdvp, tcnp);
 
 	error = 0;
 

==== //depot/projects/soc2010/gk_namecache/sys/kern/vfs_dircache.c#3 (text+ko) ====

@@ -49,19 +49,16 @@
 
 #include <sys/dircache.h>
 
-#define DCDEBUG(format, args...)					\
+#define	DC_NAMEROUND			16	/* power of 2 */
+
+#define	DC_OP_VLOCK			0x00000001
+
+#define	DCDEBUG(format, args...)					\
 	do {								\
 		if (dircache_debug != 0)				\
 			printf(format ,## args);			\
 	} while (0)
 
-#define DIRCACHE_STAT(n, descr)						\
-	SYSCTL_PROC(_vfs_dircache, OID_AUTO, n,				\
-	CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE,			\
-	NULL, __CONCAT(dps_, n), dps_sysctlhandler, "LU", descr)
-
-#define DC_NAMEROUND		16	/* power of 2 */
-
 enum {
 	dps_interlock_same,
 	dps_interlock_direct,
@@ -70,18 +67,10 @@
 	dps_max
 };
 
-struct nspace;
-
 struct dircache_poolstat {
 	u_long dps_stats[dps_max];
 };
 
-struct dircache_pool {
-	struct mtx dp_mtx;
-	u_long dp_gen;
-	struct dircache_poolstat dp_stat;
-};
-
 struct dircache_root {
 	struct mount *dr_mnt;
 	struct dircache *dr_entry;
@@ -91,120 +80,26 @@
 static MALLOC_DEFINE(M_DIRCACHE, "dircache buf", "dircache buffers");
 static uma_zone_t dircache_zone;
 
-static struct dircache_pool **pool;
-static size_t pool_size;
-static u_long pool_id;
-static char **pool_mtxname;
-static const int pool_mtxnamesz = 20;
-
 static int dircache_debug = 1;
 SYSCTL_UINT(_vfs_dircache, OID_AUTO, debug, CTLFLAG_RW, &dircache_debug, 0,
     "Enable debug");
 
-static int dps_sysctlhandler(SYSCTL_HANDLER_ARGS);
-
-DIRCACHE_STAT(interlock_same,
-    "Same lock hits in interlock");
-DIRCACHE_STAT(interlock_direct,
-    "Direct lock order hits in interlock");
-DIRCACHE_STAT(interlock_reverse,
-    "Reverse lock order hits in interlock");
-DIRCACHE_STAT(interlock_reverse_fast,
-    "Reverse lock order without sleeping hits in interlock");
-
-static int
-ptr_cmp(const void *a, const void *b)
-{
-	return (((uintptr_t)(*(void * const *)a)) -
-	    ((uintptr_t)(*(void * const *)b)));
-}
-
 static void
 dircache_sysinit(void *arg __unused)
 {
-	int i;
-
-	pool_size = 4;
-
 	dircache_zone = uma_zcreate("dircache",
 	    sizeof(struct dircache), NULL, NULL, NULL, NULL,
             UMA_ALIGN_PTR, 0);
-	pool = malloc(sizeof(void *) * pool_size,
-	    M_DIRCACHE, M_WAITOK);
-	pool_mtxname = malloc(sizeof(void *) * pool_size,
-	    M_DIRCACHE, M_WAITOK);
-	/*
-	 * Keep struct dircache_pool size minimal. (and align at cache
-	 * pipeline?)
-	 * Use pool address for lock ordering.
-	 */
-	for (i = 0; i < pool_size; i++) {
-		pool_mtxname[i] = malloc(pool_mtxnamesz,
-		    M_DIRCACHE, M_WAITOK | M_ZERO);
-		pool[i] = malloc(sizeof(struct dircache_pool),
-		    M_DIRCACHE, M_WAITOK | M_ZERO);
-	}
-	qsort(pool, pool_size, sizeof(void *), ptr_cmp);
-	for (i = 0; i < pool_size; i++) {
-		pool[i]->dp_gen = pool_id++;
-		snprintf(pool_mtxname[i], pool_mtxnamesz, "dircache lock %d", i);
-		mtx_init(&pool[i]->dp_mtx, pool_mtxname[i], NULL, MTX_DEF);
-	}
 }
 SYSINIT(dircache, SI_SUB_VFS, SI_ORDER_SECOND, dircache_sysinit, NULL);
 
 static void
 dircache_sysuninit(void *arg __unused)
 {
-	int i;
-
-	for (i = 0; i < pool_size; i++) {
-		mtx_destroy(&pool[i]->dp_mtx);
-		free(pool[i], M_DIRCACHE);
-		free(pool_mtxname[i], M_DIRCACHE);
-	}
-	free(pool, M_DIRCACHE);
-	free(pool_mtxname, M_DIRCACHE);
-	pool = NULL;
 	uma_zdestroy(dircache_zone);
 }
 SYSUNINIT(dircache, SI_SUB_VFS, SI_ORDER_SECOND, dircache_sysuninit, NULL);
 
-static void
-pool_getstats(struct dircache_poolstat *ps)
-{
-	struct dircache_poolstat *stat;
-	int i, ind;
-
-	for (i = 0; i < pool_size; i++) {
-		mtx_lock(&pool[i]->dp_mtx);
-		stat = &pool[i]->dp_stat;
-		for (ind = 0; ind < dps_max; ind++)
-			ps->dps_stats[ind] += stat->dps_stats[ind];
-		mtx_unlock(&pool[i]->dp_mtx);
-	}
-}
-
-static int
-dps_sysctlhandler(SYSCTL_HANDLER_ARGS)
-{
-	struct dircache_poolstat st = {};
-	u_long res;
-	int error;
-
-	pool_getstats(&st);
-	res = st.dps_stats[arg2];
-	error = SYSCTL_OUT(req, &res, sizeof(res));
-
-	return (error);
-}
-
-static __inline void
-dp_incstat(int ind, struct dircache_pool *dp, u_long val)
-{
-	dp->dp_stat.dps_stats[ind] += val;
-}
-
 static __inline int
 dc_cmpname(struct dircache *dc, char *name, size_t namelen)
 {
@@ -232,65 +127,18 @@
 	return (r);
 }
 
-RB_GENERATE_STATIC(dircache_tree, dircache, dc_listentry, dc_cmp);
+RB_GENERATE_STATIC(dircache_tree, dircache, dc_tree, dc_cmp);
 
 #define DC_MTX(dc)		(&(dc)->dc_pool->dp_mtx)
 
-#define dc_lock(dc)		mtx_lock(DC_MTX(dc))
+#define dc_lock(dc)		mtx_lock(&(dc)->dc_mtx)
 
-#define dc_trylock(dc)		mtx_trylock(DC_MTX(dc))
+#define dc_trylock(dc)		mtx_trylock(&(dc)->dc_mtx)
 
-#define dc_unlock(dc)		mtx_unlock(DC_MTX(dc))
+#define dc_unlock(dc)		mtx_unlock(&(dc)->dc_mtx)
 
-#define dc_assertlock(dc, w)	mtx_assert(DC_MTX(dc), (w))
-
-static void
-dc_relock(struct dircache *from, struct dircache *to)
-{
-	dc_assertlock(from, MA_OWNED);
-
-	if (from->dc_pool == to->dc_pool)
-		return;
-
-	dc_assertlock(to, MA_NOTOWNED);
-
-	dc_unlock(from);
-	dc_lock(to);
-}
-
-static void
-dc_interlock(struct dircache *from, struct dircache *to)
-{
-	dc_assertlock(from, MA_OWNED);
-
-	if (from->dc_pool == to->dc_pool) {
-		dp_incstat(dps_interlock_same, to->dc_pool, 1);
-		return;
-	}
-
-	dc_assertlock(to, MA_NOTOWNED);
-	if ((uintptr_t)from->dc_pool < (uintptr_t)to->dc_pool) {
-		dc_lock(to);
-		dc_unlock(from);
-		dp_incstat(dps_interlock_direct, to->dc_pool, 1);
-		return;
-	}
+#define dc_assertlock(dc, w)	mtx_assert(&(dc)->dc_mtx, (w))
 
-	critical_enter();
-	if (dc_trylock(to) != 0) {
-		dc_unlock(from);
-		critical_exit();
-		dp_incstat(dps_interlock_reverse_fast, to->dc_pool, 1);
-		return;
-	}
-	critical_exit();
-
-	/* !!!! FIXME !!!! */
-	dc_unlock(from);
-	dc_lock(to);
-	dp_incstat(dps_interlock_reverse, to->dc_pool, 1);
-}
-
 static __inline void
 dc_initname(struct dircache *dc, char *name, size_t namelen)
 {
@@ -306,9 +154,9 @@
 }
 
 static __inline int
-dc_namebuffits(struct dircache *dc, size_t namelen)
+dc_namebuffits(u_int dcnamelen, u_int nnamelen)
 {
-	return (dc_namebuflen(dc->dc_namelen) < namelen + 1);
+	return (dc_namebuflen(dcnamelen) < nnamelen + 1);
 }
 
 static __inline char *
@@ -325,7 +173,8 @@
 {
 	MPASS(name != dc->dc_name);
 
-	if (dc->dc_name == NULL || dc_namebuffits(dc, namelen) == 0) {
+	if (dc->dc_name == NULL ||
+	    dc_namebuffits(dc->dc_namelen, namelen) == 0) {
 		if (dc->dc_name != NULL)
 			free(dc->dc_name, M_DIRCACHE);
 		if (namebuf == NULL)
@@ -342,39 +191,28 @@
 static __inline void
 dc_updategen(struct dircache *dc)
 {
-	u_long *genp;
+	static u_long gen = 1;
+
 	dc_assertlock(dc, MA_OWNED);
-
-	genp = &dc->dc_pool->dp_gen;
-	*genp += pool_size;
-	dc->dc_gen = *genp;
+	do {
+		dc->dc_gen = atomic_fetchadd_long(&gen, 1);
+	} while (__predict_false(dc->dc_gen == 0));
 }
 
 static struct dircache *
-dc_alloc(struct dircache *pdc, enum dircache_type type,
-    char *name, size_t namelen)
+dc_alloc(enum dircache_type type, char *name, size_t namelen)
 {
 	struct dircache *dc;
-	int poolind;
 
 	dc = uma_zalloc(dircache_zone, M_WAITOK | M_ZERO);
 	DCDEBUG("alloc: %p %s\n", dc, name);
 
 	dc->dc_type = type;
-	dc->dc_flags = DC_CH_PARTIAL;
-	dc->dc_parent = pdc;
 	refcount_init(&dc->dc_refcnt, 1);
-	cv_init(&dc->dc_condvar, "dircache cv");
+	mtx_init(&dc->dc_mtx, "dircache mtx", NULL, MTX_DEF | MTX_DUPOK);
 
-	if (name != NULL && namelen != 0) {
+	if (name != NULL && namelen != 0)
 		dc_setname(dc, name, namelen, NULL);
-		/* cheaper way to get pseudo-random value */
-		poolind = dc->dc_namehash;
-	} else {
-		poolind = arc4random();
-	}
-	poolind %= pool_size;
-	dc->dc_pool = pool[poolind];
 
 	return (dc);
 }
@@ -386,7 +224,7 @@
 	MPASS(dc->dc_parent == NULL);
 
 	DCDEBUG("free: %p %s\n", dc, dc->dc_name);
-	cv_destroy(&dc->dc_condvar);
+	mtx_destroy(&dc->dc_mtx);
 	uma_zfree(dircache_zone, dc);
 }
 
@@ -398,35 +236,28 @@
 }
 
 static __inline int
-dc_rele(struct dircache *dc)
+dc_relel(struct dircache *dc)
 {
-	dc_assertlock(dc, MA_NOTOWNED);
+	dc_assertlock(dc, MA_OWNED);
 
 	if (refcount_release(&dc->dc_refcnt) != 0) {
+		dc_unlock(dc);
 		dc_free(dc);
 		return (1);
 	}
 	return (0);
 }
 
-static struct dircache *
-dc_wait(struct dircache *dc)
+static __inline int
+dc_rele(struct dircache *dc)
 {
-	dc_assertlock(dc, MA_OWNED);
+	dc_assertlock(dc, MA_NOTOWNED);
 
-	if ((dc->dc_flags & DC_CH_UPDATING) == 0)
-		return (NULL);
-
-	dc->dc_refcnt++;
-	cv_wait(&dc->dc_condvar, &dc->dc_pool->dp_mtx);
-	dc->dc_refcnt--;
-	if (dc->dc_refcnt == 0) {
-		dc_unlock(dc);
+	if (refcount_release(&dc->dc_refcnt) != 0) {
 		dc_free(dc);
-		return (NULL);
+		return (1);
 	}
-
-	return (dc);
+	return (0);
 }
 
 static void
@@ -449,7 +280,7 @@
 }
 
 static void
-dc_relevnode(struct dircache *dc)
+dc_relevnode(struct dircache *dc, int flags)
 {
 	MPASS(dc->dc_vnode != NULL);
 	dc_assertlock(dc, MA_OWNED);
@@ -458,12 +289,44 @@
 
 	VI_LOCK(dc->dc_vnode);
 	TAILQ_REMOVE(&dc->dc_vnode->v_dircache, dc, dc_vnodelist);
-	VI_UNLOCK(dc->dc_vnode);
+	if ((flags & DC_OP_VLOCK) == 0)
+		VI_UNLOCK(dc->dc_vnode);
 	dc->dc_vnode = NULL;
 	dc_unlock(dc);
 	dc_rele(dc);
 }
 
+static int
+dc_vinterlock(struct vnode *vp, struct dircache *dc)
+{
+	ASSERT_VI_LOCKED(vp, "dc_vinterlock");
+	dc_assertlock(dc, MA_NOTOWNED);
+
+	if (dc_trylock(dc)) {
+		MPASS(dc->dc_vnode == vp);
+		VI_UNLOCK(vp);
+		return (0);
+	}
+
+	dc_ref(dc);
+	VI_UNLOCK(vp);
+	dc_lock(dc);
+
+	if (dc->dc_vnode != vp) {
+		VI_LOCK(vp);
+		dc_unlock(dc);
+		return (1);
+	}
+
+	if (dc_relel(dc) != 0) {
+		VI_LOCK(vp);
+		return (1);
+	}
+
+	MPASS(dc->dc_vnode == vp);
+	return (0);
+}
+
 static struct dircache *
 dc_getentry(struct vnode *vp, struct componentname *cnp, struct vnode *dvp)
 {
@@ -473,14 +336,15 @@
 	VI_LOCK(vp);
 	dc = TAILQ_FIRST(&vp->v_dircache);
 	if (dc == NULL) {
-		VI_UNLOCK(vp);
 		if ((vp->v_vflag & VV_ROOT) != 0) {
 			dc = vp->v_mount->mnt_dircache;
+			VI_UNLOCK(vp);
 			DCDEBUG("getentry: root %p vp=%p\n", dc, vp);
 			MPASS(dc != NULL);
 			dc_lock(dc);
 			dc_refvnode(dc, vp);
 		} else {
+			VI_UNLOCK(vp);
 #if 0
 			DCDEBUG("getentry: not found vp=%p\n", vp);
 #else
@@ -497,19 +361,19 @@
 			    cnp->cn_nameptr[1] == '.'))));
 
 			for(; dc != NULL; dc = TAILQ_NEXT(dc, dc_vnodelist)) {
-				VI_UNLOCK(vp);
-				dc_lock(dc);
-				if (dc->dc_vnode != vp) {
-					dc_unlock(dc);
+				if (dc_vinterlock(vp, dc) != 0) {
 					DCDEBUG("getenrty: restart; multiple entries; vp=%p\n",
 					    vp);
 					goto restart;
 				}
-				/* FIXME: dc_parent locking */
+				dc_lock(dc->dc_parent);
 				if (dc_cmpname(dc, cnp->cn_nameptr,
 				    cnp->cn_namelen) == 0 &&
-				    dvp == dc->dc_parent->dc_vnode)
+				    dvp == dc->dc_parent->dc_vnode) {
+					dc_unlock(dc->dc_parent);
 					break;
+				}
+				dc_unlock(dc->dc_parent);
 				dc_unlock(dc);
 				VI_LOCK(vp);
 			}
@@ -522,115 +386,153 @@
 #endif
 			}
 		} else {
-			VI_UNLOCK(vp);
-			dc_lock(dc);
+			if (dc_vinterlock(vp, dc) != 0) {
+				DCDEBUG("getenrty: restart; node removed; vp=%p\n", vp);
+				goto restart;
+			}
 		}
 	}
 
 	dc_assertlock(dc, MA_OWNED);
-	if (dc->dc_vnode != vp) {
-		dc_unlock(dc);
-		DCDEBUG("getenrty: restart; vp=%p\n", vp);
-		goto restart;
-	}
+	MPASS(dc->dc_vnode == vp);
 	return (dc);
 }
 
 static struct dircache *
-dc_find(struct vnode *dvp, struct componentname *cnp, int flags)
+dc_find(struct vnode *dvp, struct componentname *cnp)
 {
 	struct dircache key;
 	struct dircache *pdc, *dc;
+	int pdcref;
 
 	pdc = dc_getentry(dvp, NULL, NULL);
 	if (pdc == NULL)
 		return (NULL);
 	dc_assertlock(pdc, MA_OWNED);
 
+	pdcref = 0;
 	dc_initname(&key, cnp->cn_nameptr, cnp->cn_namelen);
+
+restart:
 	dc = RB_FIND(dircache_tree, &pdc->dc_children, &key);
 	if (dc == NULL) {
 		dc_unlock(pdc);
-		return (NULL);
+		goto out;
 	}
 
-	if ((flags & DC_OP_NOWAIT) == 0)
-		dc_wait(pdc);
-
-	dc_interlock(pdc, dc);
+	if (dc_trylock(dc) != 0)
+		dc_unlock(pdc);
+	else {
+		if (pdcref == 0) {
+			dc_ref(pdc);
+			pdcref++;
+		}
+		dc_ref(dc);
+		dc_unlock(pdc);
+		dc_lock(dc);
+		if (dc->dc_parent != pdc) {
+			dc_unlock(dc);
+			dc_rele(dc);
+			dc_lock(pdc);
+			goto restart;
+		}
+		if (dc_relel(dc) != 0) {
+			dc_lock(pdc);
+			goto restart;
+		}
+	}
+	dc_assertlock(pdc, MA_NOTOWNED);
 	dc_assertlock(dc, MA_OWNED);
 	MPASS(dc->dc_parent == pdc);
 
+out:
+	if (pdcref != 0)
+		dc_rele(pdc);
 	return (dc);
 }
 
 static struct dircache *
-dc_update(struct dircache_cursor *curs, struct vnode *vp,
-    enum dircache_type type, char *name, size_t namelen,
-    ino_t inode, off_t offset, void *fspriv)
+dc_insert(struct dircache *pdc, struct dircache *dc, struct vnode *vp,
+    ino_t inode)
 {
-	struct dircache *dc, *pdc, *col;
+	struct dircache *col;
 
-	pdc = curs->dcr_parent;
-	MPASS((pdc->dc_flags & DC_CH_UPDATING) != 0);
+	DCDEBUG("update: parent=%p name=%s\n", pdc, pdc->dc_name);
 
-	DCDEBUG("update: parent=%p name=%s\n", pdc, name);
+restart:
+	dc_assertlock(dc, MA_OWNED);
+	dc_assertlock(pdc, MA_OWNED);
 
-	dc = dc_alloc(pdc, type, name, namelen);
-
-	if (type == DT_WEAK)
-		curs->dcr_nflags |= DC_CH_HASWEAK;
-	dc->dc_fspriv = fspriv;
 	col = RB_INSERT(dircache_tree, &pdc->dc_children, dc);
 	if (col != NULL) {
-		if (type == col->dc_type) {
+		if (dc->dc_type == col->dc_type) {
 			DCDEBUG("update: warn: same entry added: %s\n", dc->dc_name);
-			MPASS(col->dc_inode == inode && col->dc_offset == offset);
-			dc->dc_fspriv = NULL;
-			dc->dc_parent = NULL;
+			MPASS(col->dc_inode == inode);
+			dc_unlock(pdc);
+			dc_unlock(dc);
 			dc_rele(dc);
 			return (NULL);
 		} else if (col->dc_type == DT_NEGATIVE) {
 			DCDEBUG("update: replace negative entry: %p %s\n", dc, dc->dc_name);
-			dc_lock(col);
-			col->dc_type = type;
-			MPASS((col->dc_flags & DC_CH_COMPLETE) == 0);
-			col->dc_flags |= DC_CH_PARTIAL;
+			dc_unlock(dc);
+			if (dc_trylock(col) == 0) {
+				dc_unlock(pdc);
+				dc_lock(col);
+				if (col->dc_parent != pdc) {
+					dc_unlock(col);
+					dc_lock(dc);
+					dc_lock(pdc);
+					goto restart;
+				}
+				dc_lock(pdc);
+			}
+			col->dc_type = dc->dc_type;
 			col->dc_inode = inode;
-			col->dc_offset = offset;
-			MPASS(col->dc_fspriv == NULL);
-			col->dc_fspriv = fspriv;
-			dc->dc_fspriv = NULL;
-			dc_unlock(col);
-			dc->dc_parent = NULL;
+			dc_unlock(pdc);
 			dc_rele(dc);
 			dc = col;
 		} else
 			panic("dircache: update: ivalid entry: %d %s\n",
 			    dc->dc_type, dc->dc_name);
-	} else
+	} else {
+		dc->dc_parent = pdc;
 		dc_ref(pdc);
-	if (vp != NULL) {
-		dc_lock(dc);
+		dc_unlock(pdc);
+	}
+	if (vp != NULL)
 		dc_refvnode(dc, vp);
-		dc_unlock(dc);
+	return (dc);
+}
+
+static __inline void
+dc_assertempty(struct dircache *dc)
+{
+	struct dircache *child;
+
+	RB_FOREACH(child, dircache_tree, &dc->dc_children) {
+		MPASS(child->dc_type == DT_NEGATIVE);
 	}
-	return (dc);
 }
 
 static void
 dc_removeentry(struct dircache *dc)
 {
 	struct dircache *parent;
+
 	MPASS(dc->dc_parent != NULL);
+	dc_assertlock(dc, MA_OWNED);
+	dc_assertlock(dc->dc_parent, MA_OWNED);
+	dc_assertempty(dc);
 
-	dc->dc_fspriv = NULL;
+	parent = dc->dc_parent;
+	if (parent->dc_type != DT_NEGATIVE)
+		dc_updategen(parent);
 	dc->dc_type = DT_INVAL;
-	parent = dc->dc_parent;
 	dc->dc_parent = NULL;
 	RB_REMOVE(dircache_tree, &parent->dc_children, dc);
+	dc_unlock(parent);
 	if (dc->dc_vnode != NULL)
-		dc_relevnode(dc);
+		dc_relevnode(dc, 0);
 	else
 		dc_unlock(dc);
 	dc_rele(parent);
@@ -641,15 +543,14 @@
 dc_marknegative(struct dircache *dc)
 {
 	DCDEBUG("mark negative: %p %s; vp=%p\n", dc, dc->dc_name, dc->dc_vnode);
+	dc_lock(dc->dc_parent);
+	dc_assertempty(dc);
+	dc_updategen(dc->dc_parent);
+	dc->dc_type = DT_NEGATIVE;
+	dc_unlock(dc->dc_parent);
 	dc->dc_inode = 0;
-	dc->dc_offset = 0;
-	dc->dc_fspriv = NULL;
-	dc->dc_type = DT_NEGATIVE;
-	dc->dc_flags &= ~DC_CH_COMPLETE;
-	dc->dc_flags |= DC_CH_PARTIAL;
-	dc_updategen(dc);
 	if (dc->dc_vnode != NULL)
-		dc_relevnode(dc);
+		dc_relevnode(dc, 0);
 	else
 		dc_unlock(dc);
 	dc_assertlock(dc, MA_NOTOWNED);
@@ -661,7 +562,7 @@
 	struct dircache *dc;
 
 	MPASS(mp->mnt_dircache == NULL);
-	dc = dc_alloc(NULL, DT_ROOT, NULL, 0);
+	dc = dc_alloc(DT_ROOT, NULL, 0);
 	dc->dc_inode = inode;
 	mp->mnt_dircache = dc;
 	DCDEBUG("init: root=%p %d\n", dc, inode);
@@ -679,119 +580,58 @@
 void
 dircache_purge_negative(struct vnode *vp)
 {
-	struct dircache *dc, *ch, *tmp;
+	TAILQ_HEAD(, dircache) head = TAILQ_HEAD_INITIALIZER(head);
+	struct dircache *dc, *child, *tmp;
+	int r;
 
 restart:
 	VI_LOCK(vp);
-	TAILQ_FOREACH(dc, &vp->v_dircache, dc_vnodelist) {
-		DCDEBUG("purge negative: %p %s; vp=%p\n", dc, dc->dc_name, vp);
+	dc = TAILQ_FIRST(&vp->v_dircache);
+	if (dc == NULL) {
 		VI_UNLOCK(vp);
-		dc_lock(dc);
-		if (dc->dc_vnode != vp) {
-			dc_unlock(dc);
+		return;
+	}
+	if (vp->v_type == VDIR) {
+		MPASS(TAILQ_NEXT(dc, dc_vnodelist) == NULL);
+		if (dc_vinterlock(vp, dc) != 0)
 			goto restart;
-		}
-		RB_FOREACH_SAFE(ch, dircache_tree, &dc->dc_children, tmp) {
-			if (ch->dc_type == DT_NEGATIVE)
+		dc_ref(dc);
+		RB_FOREACH_SAFE(child, dircache_tree, &dc->dc_children, tmp) {
+			if (child->dc_type == DT_NEGATIVE) {
 				RB_REMOVE(dircache_tree, &dc->dc_children,
-				    ch);
+				    child);
+				if (dc_trylock(child) != 0) {
+					child->dc_parent = NULL;
+					dc_unlock(child);
+					dc_rele(child);
+					r = dc_relel(dc);
+					MPASS(r == 0);
+				} else
+					TAILQ_INSERT_HEAD(&head, child,
+					    dc_tmplist);
+			}
 		}
-		VI_LOCK(vp);
 		dc_unlock(dc);
+		while(!TAILQ_EMPTY(&head)) {
+			child = TAILQ_FIRST(&head);
+			dc_lock(child);
+			MPASS(child->dc_parent == dc);
+			dc_lock(dc);
+			child->dc_parent = NULL;
+			dc_unlock(dc);
+			dc_rele(child);
+			dc_rele(dc);
+		}
+		dc_rele(dc);
+	} else {
+		/* Check invariants */
+		TAILQ_FOREACH(dc, &vp->v_dircache, dc_vnodelist) {
+			MPASS(dc->dc_type != DT_NEGATIVE);
+		}
+		VI_UNLOCK(vp);
 	}
-	VI_UNLOCK(vp);
 }
 
-void
-dircache_update(struct dircache_cursor *curs, enum dircache_type type,
-    char *name, size_t namelen, ino_t inode, off_t offset)
-{
-	dc_update(curs, NULL, type, name, namelen, inode, offset, NULL);
-}
-
-int
-dircache_beginupdate(struct dircache_cursor *curs, struct vnode *dvp,
-    struct componentname *cnp, int flags)
-{

>>> TRUNCATED FOR MAIL (1000 lines) <<<



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201006220809.o5M89QsF069614>