From owner-p4-projects@FreeBSD.ORG Tue Jun 22 08:09:26 2010 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 613891065674; Tue, 22 Jun 2010 08:09:26 +0000 (UTC) Delivered-To: perforce@FreeBSD.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 243B81065672 for ; Tue, 22 Jun 2010 08:09:26 +0000 (UTC) (envelope-from gk@FreeBSD.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 10F498FC15 for ; Tue, 22 Jun 2010 08:09:26 +0000 (UTC) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.3/8.14.3) with ESMTP id o5M89QpQ069616 for ; Tue, 22 Jun 2010 08:09:26 GMT (envelope-from gk@FreeBSD.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.3/8.14.3/Submit) id o5M89QsF069614 for perforce@freebsd.org; Tue, 22 Jun 2010 08:09:26 GMT (envelope-from gk@FreeBSD.org) Date: Tue, 22 Jun 2010 08:09:26 GMT Message-Id: <201006220809.o5M89QsF069614@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to gk@FreeBSD.org using -f From: Gleb Kurtsou To: Perforce Change Reviews Precedence: bulk Cc: Subject: PERFORCE change 180094 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 22 Jun 2010 08:09:26 -0000 http://p4web.freebsd.org/@@180094?ac=10 Change 180094 by gk@gk_h1 on 2010/06/22 08:09:25 Change API dropping dircache_*update(), use granular entry locking Affected files ... .. //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs.h#2 edit .. //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs_subr.c#2 edit .. //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs_vfsops.c#2 edit .. //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs_vnops.c#2 edit .. //depot/projects/soc2010/gk_namecache/sys/fs/tmpfs/tmpfs_subr.c#3 edit .. //depot/projects/soc2010/gk_namecache/sys/fs/tmpfs/tmpfs_vnops.c#3 edit .. //depot/projects/soc2010/gk_namecache/sys/kern/vfs_dircache.c#3 edit .. //depot/projects/soc2010/gk_namecache/sys/sys/dircache.h#3 edit Differences ... ==== //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs.h#2 (text+ko) ==== @@ -103,7 +103,10 @@ }; #define PM_ROOT_CANRECURSE 0x01 -#define PM_DIRCACHE 0x02 +#define PM_DIRCACHE_NAMECACHE 0x02 +#define PM_DIRCACHE_VATTR 0x04 + +#define PM_DIRCACHE (PM_DIRCACHE_NAMECACHE | PM_DIRCACHE_VATTR) struct pefs_mount { struct mount *pm_lowervfs; ==== //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs_subr.c#2 (text+ko) ==== @@ -68,6 +68,7 @@ #include #include #include +#include #include #include #include @@ -253,6 +254,11 @@ int buflen = *encname_len; ASSERT_VOP_LOCKED(lvp, "pefs_node_lookup_name"); + + error = dircache_getname(lvp, encname, encname_len); + if (error == 0) + goto out; + locked = VOP_ISLOCKED(lvp); if (ldvp) { dlocked = VOP_ISLOCKED(ldvp); @@ -276,6 +282,7 @@ if (error) return (ENOENT); +out: memcpy(encname, encname + *encname_len, buflen - *encname_len); *encname_len = buflen - *encname_len; if (*encname_len < buflen) ==== //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs_vfsops.c#2 (text+ko) ==== @@ -46,6 +46,10 @@ #include +static const char *pefs_dircache_support_vattr[] = { + "zfs", NULL +}; + static MALLOC_DEFINE(M_PEFSMNT, "pefs_mount", "PEFS mount structure"); static const char *pefs_opts[] = { @@ -55,24 +59,35 @@ static void dircache_init(struct mount *mp, int opt, struct pefs_mount *pm) { + struct mount *lmp; char *lowerfs; - int supported; + const char **support_vattr; + int support; + + if (opt == 0) + return; - lowerfs = mp->mnt_vnodecovered->v_mount->mnt_vfc->vfc_name; - supported = (strcmp(lowerfs, "zfs") == 0 || - strcmp(lowerfs, "tmpfs") == 0); - if (opt < 0) - opt = supported; - else if (opt > 0 && supported == 0) { + support = 0; + lmp = mp->mnt_vnodecovered->v_mount; + lowerfs = lmp->mnt_vfc->vfc_name; + for (support_vattr = pefs_dircache_support_vattr; + *support_vattr != NULL; support_vattr++) + if (strcmp(lowerfs, *support_vattr) == 0) { + support |= PM_DIRCACHE_VATTR; + break; + } + MNT_ILOCK(lmp); + if ((lmp->mnt_kern_flag & MNTK_DIRCACHE) != 0) + support |= PM_DIRCACHE_NAMECACHE; + MNT_IUNLOCK(lmp); + if (opt > 0 && support == 0) { printf("pefs: dircache is not supported by filesystem: %s\n", lowerfs); opt = 0; } - if (opt == 0) - pm->pm_flags &= ~PM_DIRCACHE; - else - pm->pm_flags |= PM_DIRCACHE; + if (opt != 0) + pm->pm_flags |= support; PEFSDEBUG("pefs_mount: dircache %s\n", (opt ? "enabled" : "disabed")); } ==== //depot/projects/soc2010/gk_namecache/sys/fs/pefs/pefs_vnops.c#2 (text+ko) ==== @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -105,6 +106,13 @@ struct vattr va; int error; + /* Prefer PM_DIRCACHE_VATTR */ + if ((VFS_TO_PEFS(vp->v_mount)->pm_flags & PM_DIRCACHE) == + PM_DIRCACHE_NAMECACHE) { + va.va_gen = dircache_getgen(vp); + if (va.va_gen != 0) + return (va.va_gen); + } error = VOP_GETATTR(PEFS_LOWERVP(vp), &va, cred); if (error != 0) return (0); ==== //depot/projects/soc2010/gk_namecache/sys/fs/tmpfs/tmpfs_subr.c#3 (text+ko) ==== @@ -401,6 +401,8 @@ error = insmntque(vp, mp); if (error) vp = NULL; + else + dircache_allocvnode(vp, node->tn_id); unlock: TMPFS_NODE_LOCK(node); @@ -520,8 +522,7 @@ * insert the new node into the directory, an operation that * cannot fail. */ tmpfs_dir_attach(dvp, de); - dircache_add(dvp, *vpp, cnp, DT_STRONG, node->tn_id, - tmpfs_dircookie(de)); + dircache_add(dvp, *vpp, cnp, DT_STRONG, node->tn_id); out: @@ -588,8 +589,7 @@ tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, struct componentname *cnp) { - struct dircache_cursor curs; - boolean_t found, cache; + boolean_t found; struct tmpfs_dirent *de; MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); @@ -599,25 +599,16 @@ found = 0; - cache = dircache_beginupdate(&curs, node->tn_vnode, cnp, - DC_OP_IFPARTIAL) == 0; TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { - if (cache) - dircache_update(&curs, DT_STRONG, de->td_name, - de->td_namelen, de->td_node->tn_id, - tmpfs_dircookie(de)); if (f != NULL && de->td_node != f) continue; MPASS(cnp->cn_namelen < 0xffff); if (de->td_namelen == (uint16_t)cnp->cn_namelen && bcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) { found = 1; - if (!cache) - break; + break; } } - if (cache) - dircache_completeupdate(&curs); node->tn_status |= TMPFS_NODE_ACCESSED; return found ? de : NULL; ==== //depot/projects/soc2010/gk_namecache/sys/fs/tmpfs/tmpfs_vnops.c#3 (text+ko) ==== @@ -913,7 +913,7 @@ /* Insert the new directory entry into the appropriate directory. */ tmpfs_dir_attach(dvp, de); - dircache_add(dvp, vp, cnp, DT_STRONG, node->tn_id, tmpfs_dircookie(de)); + dircache_add(dvp, vp, cnp, DT_STRONG, node->tn_id); /* vp link count has changed, so update node times. */ node->tn_status |= TMPFS_NODE_CHANGED; @@ -1138,7 +1138,7 @@ tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de, TRUE); } - dircache_rename(fdvp, fvp, fcnp, tdvp, tvp, tcnp); + dircache_rename(fdvp, fcnp, tdvp, tcnp); error = 0; ==== //depot/projects/soc2010/gk_namecache/sys/kern/vfs_dircache.c#3 (text+ko) ==== @@ -49,19 +49,16 @@ #include -#define DCDEBUG(format, args...) \ +#define DC_NAMEROUND 16 /* power of 2 */ + +#define DC_OP_VLOCK 0x00000001 + +#define DCDEBUG(format, args...) \ do { \ if (dircache_debug != 0) \ printf(format ,## args); \ } while (0) -#define DIRCACHE_STAT(n, descr) \ - SYSCTL_PROC(_vfs_dircache, OID_AUTO, n, \ - CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, \ - NULL, __CONCAT(dps_, n), dps_sysctlhandler, "LU", descr) - -#define DC_NAMEROUND 16 /* power of 2 */ - enum { dps_interlock_same, dps_interlock_direct, @@ -70,18 +67,10 @@ dps_max }; -struct nspace; - struct dircache_poolstat { u_long dps_stats[dps_max]; }; -struct dircache_pool { - struct mtx dp_mtx; - u_long dp_gen; - struct dircache_poolstat dp_stat; -}; - struct dircache_root { struct mount *dr_mnt; struct dircache *dr_entry; @@ -91,120 +80,26 @@ static MALLOC_DEFINE(M_DIRCACHE, "dircache buf", "dircache buffers"); static uma_zone_t dircache_zone; -static struct dircache_pool **pool; -static size_t pool_size; -static u_long pool_id; -static char **pool_mtxname; -static const int pool_mtxnamesz = 20; - static int dircache_debug = 1; SYSCTL_UINT(_vfs_dircache, OID_AUTO, debug, CTLFLAG_RW, &dircache_debug, 0, "Enable debug"); -static int dps_sysctlhandler(SYSCTL_HANDLER_ARGS); - -DIRCACHE_STAT(interlock_same, - "Same lock hits in interlock"); -DIRCACHE_STAT(interlock_direct, - "Direct lock order hits in interlock"); -DIRCACHE_STAT(interlock_reverse, - "Reverse lock order hits in interlock"); -DIRCACHE_STAT(interlock_reverse_fast, - "Reverse lock order without sleeping hits in interlock"); - -static int -ptr_cmp(const void *a, const void *b) -{ - return (((uintptr_t)(*(void * const *)a)) - - ((uintptr_t)(*(void * const *)b))); -} - static void dircache_sysinit(void *arg __unused) { - int i; - - pool_size = 4; - dircache_zone = uma_zcreate("dircache", sizeof(struct dircache), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - pool = malloc(sizeof(void *) * pool_size, - M_DIRCACHE, M_WAITOK); - pool_mtxname = malloc(sizeof(void *) * pool_size, - M_DIRCACHE, M_WAITOK); - /* - * Keep struct dircache_pool size minimal. (and align at cache - * pipeline?) - * Use pool address for lock ordering. - */ - for (i = 0; i < pool_size; i++) { - pool_mtxname[i] = malloc(pool_mtxnamesz, - M_DIRCACHE, M_WAITOK | M_ZERO); - pool[i] = malloc(sizeof(struct dircache_pool), - M_DIRCACHE, M_WAITOK | M_ZERO); - } - qsort(pool, pool_size, sizeof(void *), ptr_cmp); - for (i = 0; i < pool_size; i++) { - pool[i]->dp_gen = pool_id++; - snprintf(pool_mtxname[i], pool_mtxnamesz, "dircache lock %d", i); - mtx_init(&pool[i]->dp_mtx, pool_mtxname[i], NULL, MTX_DEF); - } } SYSINIT(dircache, SI_SUB_VFS, SI_ORDER_SECOND, dircache_sysinit, NULL); static void dircache_sysuninit(void *arg __unused) { - int i; - - for (i = 0; i < pool_size; i++) { - mtx_destroy(&pool[i]->dp_mtx); - free(pool[i], M_DIRCACHE); - free(pool_mtxname[i], M_DIRCACHE); - } - free(pool, M_DIRCACHE); - free(pool_mtxname, M_DIRCACHE); - pool = NULL; uma_zdestroy(dircache_zone); } SYSUNINIT(dircache, SI_SUB_VFS, SI_ORDER_SECOND, dircache_sysuninit, NULL); -static void -pool_getstats(struct dircache_poolstat *ps) -{ - struct dircache_poolstat *stat; - int i, ind; - - for (i = 0; i < pool_size; i++) { - mtx_lock(&pool[i]->dp_mtx); - stat = &pool[i]->dp_stat; - for (ind = 0; ind < dps_max; ind++) - ps->dps_stats[ind] += stat->dps_stats[ind]; - mtx_unlock(&pool[i]->dp_mtx); - } -} - -static int -dps_sysctlhandler(SYSCTL_HANDLER_ARGS) -{ - struct dircache_poolstat st = {}; - u_long res; - int error; - - pool_getstats(&st); - res = st.dps_stats[arg2]; - error = SYSCTL_OUT(req, &res, sizeof(res)); - - return (error); -} - -static __inline void -dp_incstat(int ind, struct dircache_pool *dp, u_long val) -{ - dp->dp_stat.dps_stats[ind] += val; -} - static __inline int dc_cmpname(struct dircache *dc, char *name, size_t namelen) { @@ -232,65 +127,18 @@ return (r); } -RB_GENERATE_STATIC(dircache_tree, dircache, dc_listentry, dc_cmp); +RB_GENERATE_STATIC(dircache_tree, dircache, dc_tree, dc_cmp); #define DC_MTX(dc) (&(dc)->dc_pool->dp_mtx) -#define dc_lock(dc) mtx_lock(DC_MTX(dc)) +#define dc_lock(dc) mtx_lock(&(dc)->dc_mtx) -#define dc_trylock(dc) mtx_trylock(DC_MTX(dc)) +#define dc_trylock(dc) mtx_trylock(&(dc)->dc_mtx) -#define dc_unlock(dc) mtx_unlock(DC_MTX(dc)) +#define dc_unlock(dc) mtx_unlock(&(dc)->dc_mtx) -#define dc_assertlock(dc, w) mtx_assert(DC_MTX(dc), (w)) - -static void -dc_relock(struct dircache *from, struct dircache *to) -{ - dc_assertlock(from, MA_OWNED); - - if (from->dc_pool == to->dc_pool) - return; - - dc_assertlock(to, MA_NOTOWNED); - - dc_unlock(from); - dc_lock(to); -} - -static void -dc_interlock(struct dircache *from, struct dircache *to) -{ - dc_assertlock(from, MA_OWNED); - - if (from->dc_pool == to->dc_pool) { - dp_incstat(dps_interlock_same, to->dc_pool, 1); - return; - } - - dc_assertlock(to, MA_NOTOWNED); - if ((uintptr_t)from->dc_pool < (uintptr_t)to->dc_pool) { - dc_lock(to); - dc_unlock(from); - dp_incstat(dps_interlock_direct, to->dc_pool, 1); - return; - } +#define dc_assertlock(dc, w) mtx_assert(&(dc)->dc_mtx, (w)) - critical_enter(); - if (dc_trylock(to) != 0) { - dc_unlock(from); - critical_exit(); - dp_incstat(dps_interlock_reverse_fast, to->dc_pool, 1); - return; - } - critical_exit(); - - /* !!!! FIXME !!!! */ - dc_unlock(from); - dc_lock(to); - dp_incstat(dps_interlock_reverse, to->dc_pool, 1); -} - static __inline void dc_initname(struct dircache *dc, char *name, size_t namelen) { @@ -306,9 +154,9 @@ } static __inline int -dc_namebuffits(struct dircache *dc, size_t namelen) +dc_namebuffits(u_int dcnamelen, u_int nnamelen) { - return (dc_namebuflen(dc->dc_namelen) < namelen + 1); + return (dc_namebuflen(dcnamelen) < nnamelen + 1); } static __inline char * @@ -325,7 +173,8 @@ { MPASS(name != dc->dc_name); - if (dc->dc_name == NULL || dc_namebuffits(dc, namelen) == 0) { + if (dc->dc_name == NULL || + dc_namebuffits(dc->dc_namelen, namelen) == 0) { if (dc->dc_name != NULL) free(dc->dc_name, M_DIRCACHE); if (namebuf == NULL) @@ -342,39 +191,28 @@ static __inline void dc_updategen(struct dircache *dc) { - u_long *genp; + static u_long gen = 1; + dc_assertlock(dc, MA_OWNED); - - genp = &dc->dc_pool->dp_gen; - *genp += pool_size; - dc->dc_gen = *genp; + do { + dc->dc_gen = atomic_fetchadd_long(&gen, 1); + } while (__predict_false(dc->dc_gen == 0)); } static struct dircache * -dc_alloc(struct dircache *pdc, enum dircache_type type, - char *name, size_t namelen) +dc_alloc(enum dircache_type type, char *name, size_t namelen) { struct dircache *dc; - int poolind; dc = uma_zalloc(dircache_zone, M_WAITOK | M_ZERO); DCDEBUG("alloc: %p %s\n", dc, name); dc->dc_type = type; - dc->dc_flags = DC_CH_PARTIAL; - dc->dc_parent = pdc; refcount_init(&dc->dc_refcnt, 1); - cv_init(&dc->dc_condvar, "dircache cv"); + mtx_init(&dc->dc_mtx, "dircache mtx", NULL, MTX_DEF | MTX_DUPOK); - if (name != NULL && namelen != 0) { + if (name != NULL && namelen != 0) dc_setname(dc, name, namelen, NULL); - /* cheaper way to get pseudo-random value */ - poolind = dc->dc_namehash; - } else { - poolind = arc4random(); - } - poolind %= pool_size; - dc->dc_pool = pool[poolind]; return (dc); } @@ -386,7 +224,7 @@ MPASS(dc->dc_parent == NULL); DCDEBUG("free: %p %s\n", dc, dc->dc_name); - cv_destroy(&dc->dc_condvar); + mtx_destroy(&dc->dc_mtx); uma_zfree(dircache_zone, dc); } @@ -398,35 +236,28 @@ } static __inline int -dc_rele(struct dircache *dc) +dc_relel(struct dircache *dc) { - dc_assertlock(dc, MA_NOTOWNED); + dc_assertlock(dc, MA_OWNED); if (refcount_release(&dc->dc_refcnt) != 0) { + dc_unlock(dc); dc_free(dc); return (1); } return (0); } -static struct dircache * -dc_wait(struct dircache *dc) +static __inline int +dc_rele(struct dircache *dc) { - dc_assertlock(dc, MA_OWNED); + dc_assertlock(dc, MA_NOTOWNED); - if ((dc->dc_flags & DC_CH_UPDATING) == 0) - return (NULL); - - dc->dc_refcnt++; - cv_wait(&dc->dc_condvar, &dc->dc_pool->dp_mtx); - dc->dc_refcnt--; - if (dc->dc_refcnt == 0) { - dc_unlock(dc); + if (refcount_release(&dc->dc_refcnt) != 0) { dc_free(dc); - return (NULL); + return (1); } - - return (dc); + return (0); } static void @@ -449,7 +280,7 @@ } static void -dc_relevnode(struct dircache *dc) +dc_relevnode(struct dircache *dc, int flags) { MPASS(dc->dc_vnode != NULL); dc_assertlock(dc, MA_OWNED); @@ -458,12 +289,44 @@ VI_LOCK(dc->dc_vnode); TAILQ_REMOVE(&dc->dc_vnode->v_dircache, dc, dc_vnodelist); - VI_UNLOCK(dc->dc_vnode); + if ((flags & DC_OP_VLOCK) == 0) + VI_UNLOCK(dc->dc_vnode); dc->dc_vnode = NULL; dc_unlock(dc); dc_rele(dc); } +static int +dc_vinterlock(struct vnode *vp, struct dircache *dc) +{ + ASSERT_VI_LOCKED(vp, "dc_vinterlock"); + dc_assertlock(dc, MA_NOTOWNED); + + if (dc_trylock(dc)) { + MPASS(dc->dc_vnode == vp); + VI_UNLOCK(vp); + return (0); + } + + dc_ref(dc); + VI_UNLOCK(vp); + dc_lock(dc); + + if (dc->dc_vnode != vp) { + VI_LOCK(vp); + dc_unlock(dc); + return (1); + } + + if (dc_relel(dc) != 0) { + VI_LOCK(vp); + return (1); + } + + MPASS(dc->dc_vnode == vp); + return (0); +} + static struct dircache * dc_getentry(struct vnode *vp, struct componentname *cnp, struct vnode *dvp) { @@ -473,14 +336,15 @@ VI_LOCK(vp); dc = TAILQ_FIRST(&vp->v_dircache); if (dc == NULL) { - VI_UNLOCK(vp); if ((vp->v_vflag & VV_ROOT) != 0) { dc = vp->v_mount->mnt_dircache; + VI_UNLOCK(vp); DCDEBUG("getentry: root %p vp=%p\n", dc, vp); MPASS(dc != NULL); dc_lock(dc); dc_refvnode(dc, vp); } else { + VI_UNLOCK(vp); #if 0 DCDEBUG("getentry: not found vp=%p\n", vp); #else @@ -497,19 +361,19 @@ cnp->cn_nameptr[1] == '.')))); for(; dc != NULL; dc = TAILQ_NEXT(dc, dc_vnodelist)) { - VI_UNLOCK(vp); - dc_lock(dc); - if (dc->dc_vnode != vp) { - dc_unlock(dc); + if (dc_vinterlock(vp, dc) != 0) { DCDEBUG("getenrty: restart; multiple entries; vp=%p\n", vp); goto restart; } - /* FIXME: dc_parent locking */ + dc_lock(dc->dc_parent); if (dc_cmpname(dc, cnp->cn_nameptr, cnp->cn_namelen) == 0 && - dvp == dc->dc_parent->dc_vnode) + dvp == dc->dc_parent->dc_vnode) { + dc_unlock(dc->dc_parent); break; + } + dc_unlock(dc->dc_parent); dc_unlock(dc); VI_LOCK(vp); } @@ -522,115 +386,153 @@ #endif } } else { - VI_UNLOCK(vp); - dc_lock(dc); + if (dc_vinterlock(vp, dc) != 0) { + DCDEBUG("getenrty: restart; node removed; vp=%p\n", vp); + goto restart; + } } } dc_assertlock(dc, MA_OWNED); - if (dc->dc_vnode != vp) { - dc_unlock(dc); - DCDEBUG("getenrty: restart; vp=%p\n", vp); - goto restart; - } + MPASS(dc->dc_vnode == vp); return (dc); } static struct dircache * -dc_find(struct vnode *dvp, struct componentname *cnp, int flags) +dc_find(struct vnode *dvp, struct componentname *cnp) { struct dircache key; struct dircache *pdc, *dc; + int pdcref; pdc = dc_getentry(dvp, NULL, NULL); if (pdc == NULL) return (NULL); dc_assertlock(pdc, MA_OWNED); + pdcref = 0; dc_initname(&key, cnp->cn_nameptr, cnp->cn_namelen); + +restart: dc = RB_FIND(dircache_tree, &pdc->dc_children, &key); if (dc == NULL) { dc_unlock(pdc); - return (NULL); + goto out; } - if ((flags & DC_OP_NOWAIT) == 0) - dc_wait(pdc); - - dc_interlock(pdc, dc); + if (dc_trylock(dc) != 0) + dc_unlock(pdc); + else { + if (pdcref == 0) { + dc_ref(pdc); + pdcref++; + } + dc_ref(dc); + dc_unlock(pdc); + dc_lock(dc); + if (dc->dc_parent != pdc) { + dc_unlock(dc); + dc_rele(dc); + dc_lock(pdc); + goto restart; + } + if (dc_relel(dc) != 0) { + dc_lock(pdc); + goto restart; + } + } + dc_assertlock(pdc, MA_NOTOWNED); dc_assertlock(dc, MA_OWNED); MPASS(dc->dc_parent == pdc); +out: + if (pdcref != 0) + dc_rele(pdc); return (dc); } static struct dircache * -dc_update(struct dircache_cursor *curs, struct vnode *vp, - enum dircache_type type, char *name, size_t namelen, - ino_t inode, off_t offset, void *fspriv) +dc_insert(struct dircache *pdc, struct dircache *dc, struct vnode *vp, + ino_t inode) { - struct dircache *dc, *pdc, *col; + struct dircache *col; - pdc = curs->dcr_parent; - MPASS((pdc->dc_flags & DC_CH_UPDATING) != 0); + DCDEBUG("update: parent=%p name=%s\n", pdc, pdc->dc_name); - DCDEBUG("update: parent=%p name=%s\n", pdc, name); +restart: + dc_assertlock(dc, MA_OWNED); + dc_assertlock(pdc, MA_OWNED); - dc = dc_alloc(pdc, type, name, namelen); - - if (type == DT_WEAK) - curs->dcr_nflags |= DC_CH_HASWEAK; - dc->dc_fspriv = fspriv; col = RB_INSERT(dircache_tree, &pdc->dc_children, dc); if (col != NULL) { - if (type == col->dc_type) { + if (dc->dc_type == col->dc_type) { DCDEBUG("update: warn: same entry added: %s\n", dc->dc_name); - MPASS(col->dc_inode == inode && col->dc_offset == offset); - dc->dc_fspriv = NULL; - dc->dc_parent = NULL; + MPASS(col->dc_inode == inode); + dc_unlock(pdc); + dc_unlock(dc); dc_rele(dc); return (NULL); } else if (col->dc_type == DT_NEGATIVE) { DCDEBUG("update: replace negative entry: %p %s\n", dc, dc->dc_name); - dc_lock(col); - col->dc_type = type; - MPASS((col->dc_flags & DC_CH_COMPLETE) == 0); - col->dc_flags |= DC_CH_PARTIAL; + dc_unlock(dc); + if (dc_trylock(col) == 0) { + dc_unlock(pdc); + dc_lock(col); + if (col->dc_parent != pdc) { + dc_unlock(col); + dc_lock(dc); + dc_lock(pdc); + goto restart; + } + dc_lock(pdc); + } + col->dc_type = dc->dc_type; col->dc_inode = inode; - col->dc_offset = offset; - MPASS(col->dc_fspriv == NULL); - col->dc_fspriv = fspriv; - dc->dc_fspriv = NULL; - dc_unlock(col); - dc->dc_parent = NULL; + dc_unlock(pdc); dc_rele(dc); dc = col; } else panic("dircache: update: ivalid entry: %d %s\n", dc->dc_type, dc->dc_name); - } else + } else { + dc->dc_parent = pdc; dc_ref(pdc); - if (vp != NULL) { - dc_lock(dc); + dc_unlock(pdc); + } + if (vp != NULL) dc_refvnode(dc, vp); - dc_unlock(dc); + return (dc); +} + +static __inline void +dc_assertempty(struct dircache *dc) +{ + struct dircache *child; + + RB_FOREACH(child, dircache_tree, &dc->dc_children) { + MPASS(child->dc_type == DT_NEGATIVE); } - return (dc); } static void dc_removeentry(struct dircache *dc) { struct dircache *parent; + MPASS(dc->dc_parent != NULL); + dc_assertlock(dc, MA_OWNED); + dc_assertlock(dc->dc_parent, MA_OWNED); + dc_assertempty(dc); - dc->dc_fspriv = NULL; + parent = dc->dc_parent; + if (parent->dc_type != DT_NEGATIVE) + dc_updategen(parent); dc->dc_type = DT_INVAL; - parent = dc->dc_parent; dc->dc_parent = NULL; RB_REMOVE(dircache_tree, &parent->dc_children, dc); + dc_unlock(parent); if (dc->dc_vnode != NULL) - dc_relevnode(dc); + dc_relevnode(dc, 0); else dc_unlock(dc); dc_rele(parent); @@ -641,15 +543,14 @@ dc_marknegative(struct dircache *dc) { DCDEBUG("mark negative: %p %s; vp=%p\n", dc, dc->dc_name, dc->dc_vnode); + dc_lock(dc->dc_parent); + dc_assertempty(dc); + dc_updategen(dc->dc_parent); + dc->dc_type = DT_NEGATIVE; + dc_unlock(dc->dc_parent); dc->dc_inode = 0; - dc->dc_offset = 0; - dc->dc_fspriv = NULL; - dc->dc_type = DT_NEGATIVE; - dc->dc_flags &= ~DC_CH_COMPLETE; - dc->dc_flags |= DC_CH_PARTIAL; - dc_updategen(dc); if (dc->dc_vnode != NULL) - dc_relevnode(dc); + dc_relevnode(dc, 0); else dc_unlock(dc); dc_assertlock(dc, MA_NOTOWNED); @@ -661,7 +562,7 @@ struct dircache *dc; MPASS(mp->mnt_dircache == NULL); - dc = dc_alloc(NULL, DT_ROOT, NULL, 0); + dc = dc_alloc(DT_ROOT, NULL, 0); dc->dc_inode = inode; mp->mnt_dircache = dc; DCDEBUG("init: root=%p %d\n", dc, inode); @@ -679,119 +580,58 @@ void dircache_purge_negative(struct vnode *vp) { - struct dircache *dc, *ch, *tmp; + TAILQ_HEAD(, dircache) head = TAILQ_HEAD_INITIALIZER(head); + struct dircache *dc, *child, *tmp; + int r; restart: VI_LOCK(vp); - TAILQ_FOREACH(dc, &vp->v_dircache, dc_vnodelist) { - DCDEBUG("purge negative: %p %s; vp=%p\n", dc, dc->dc_name, vp); + dc = TAILQ_FIRST(&vp->v_dircache); + if (dc == NULL) { VI_UNLOCK(vp); - dc_lock(dc); - if (dc->dc_vnode != vp) { - dc_unlock(dc); + return; + } + if (vp->v_type == VDIR) { + MPASS(TAILQ_NEXT(dc, dc_vnodelist) == NULL); + if (dc_vinterlock(vp, dc) != 0) goto restart; - } - RB_FOREACH_SAFE(ch, dircache_tree, &dc->dc_children, tmp) { - if (ch->dc_type == DT_NEGATIVE) + dc_ref(dc); + RB_FOREACH_SAFE(child, dircache_tree, &dc->dc_children, tmp) { + if (child->dc_type == DT_NEGATIVE) { RB_REMOVE(dircache_tree, &dc->dc_children, - ch); + child); + if (dc_trylock(child) != 0) { + child->dc_parent = NULL; + dc_unlock(child); + dc_rele(child); + r = dc_relel(dc); + MPASS(r == 0); + } else + TAILQ_INSERT_HEAD(&head, child, + dc_tmplist); + } } - VI_LOCK(vp); dc_unlock(dc); + while(!TAILQ_EMPTY(&head)) { + child = TAILQ_FIRST(&head); + dc_lock(child); + MPASS(child->dc_parent == dc); + dc_lock(dc); + child->dc_parent = NULL; + dc_unlock(dc); + dc_rele(child); + dc_rele(dc); + } + dc_rele(dc); + } else { + /* Check invariants */ + TAILQ_FOREACH(dc, &vp->v_dircache, dc_vnodelist) { + MPASS(dc->dc_type != DT_NEGATIVE); + } + VI_UNLOCK(vp); } - VI_UNLOCK(vp); } -void -dircache_update(struct dircache_cursor *curs, enum dircache_type type, - char *name, size_t namelen, ino_t inode, off_t offset) -{ - dc_update(curs, NULL, type, name, namelen, inode, offset, NULL); -} - -int -dircache_beginupdate(struct dircache_cursor *curs, struct vnode *dvp, - struct componentname *cnp, int flags) -{ >>> TRUNCATED FOR MAIL (1000 lines) <<<