Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 14 Nov 2002 11:29:26 -0800 (PST)
From:      Matthew Dillon <dillon@apollo.backplane.com>
To:        "Cameron Grant" <gandalf@vilnya.demon.co.uk>, freebsd-hackers@freebsd.org
Cc:        Terry Lambert <tlambert2@mindspring.com>, "Daniel O'Connor" <doconnor@gsoft.com.au>, Hans Zaunere <zaunere@yahoo.com>
Subject:   Patch #6 (Re: Shared files within a jail)
Message-ID:  <200211141929.gAEJTQcl067196@apollo.backplane.com>
References:  <20021113034726.75787.qmail@web12801.mail.yahoo.com> <1037159767.66058.34.camel@chowder.localdomain> <200211130530.gAD5UxNt067928@apollo.backplane.com> <3DD1FAB9.82607C41@mindspring.com> <200211131114.gADBE3lM069566@apollo.backplane.com> <3DD2DF3A.18489E80@mindspring.com> <200211132358.gADNwAVP012795@apollo.backplane.com>

next in thread | previous in thread | raw e-mail | index | archive | help
    Cameron and I have been working through some of the more blatent bugs.

    Here is an intermediate patch for -stable, for both unionfs and nullfs.
    There are still plenty of bugs left but this patch should fix the 
    major issues with devices.

    Basically what is going on is that special vnode types like VBLK and VCHR
    are also assumed to have special fields filled in which nullfs and unionfs
    do not fill in when they synthesized a vnode.  Unfortunately, some of
    these fields *CAN'T* be filled in.  For example, take a VCHR vnode.  
    The system expects v_rdev to be filled in.  v_rdev cannot be safely 
    filled in without aliasing the device.  The device cannot be safely 
    aliased because the system makes major assumptions in regards to the
    alias/vnode-ref counts in order to determine whether a device close
    or a revoke() can be done.  If we alias the device, everything breaks.
    I spent four hours trying to alias the device and couldn't get it to
    work.  Either it caused specfs to call d_close without the device first
    being opened, or it caused revoke() to fail, or it through the device
    was opened multiple times when it wasn't, or it thought the device
    was opened when it wasn't (that was why sshd hung, because the child
    process closed the tty side of the pty and the pty side still thought
    the tty side was open because it the vnode was being cached by nullfs
    or unionfs).

    In short, FreeBSD's device tracking code needs to be seriously 
    rewritten.  FreeBSD cannot distinguish between vnodes which have
    d_open()'d (VOP_OPEN()'d) the device and vnodes which have not.

    So this patch is a hack.  It returns special devices directly whenever
    possible but must still synthesize temporary vnodes for them for
    RENAME and DELETE operations.  But short of rewriting a big chunk of
    the device tracking infrastructure there is no other solution.

						-Matt

Index: kern/vfs_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_subr.c,v
retrieving revision 1.249.2.29
diff -u -r1.249.2.29 vfs_subr.c
--- kern/vfs_subr.c	13 Oct 2002 16:19:12 -0000	1.249.2.29
+++ kern/vfs_subr.c	14 Nov 2002 18:01:43 -0000
@@ -2115,10 +2115,12 @@
 	int count;
 
 	count = 0;
-	simple_lock(&spechash_slock);
-	SLIST_FOREACH(vq, &vp->v_hashchain, v_specnext)
-		count += vq->v_usecount;
-	simple_unlock(&spechash_slock);
+	if (vp->v_rdev) {
+		simple_lock(&spechash_slock);
+		SLIST_FOREACH(vq, &vp->v_hashchain, v_specnext)
+			count += vq->v_usecount;
+		simple_unlock(&spechash_slock);
+	}
 	return (count);
 }
 
Index: miscfs/nullfs/null_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/miscfs/nullfs/Attic/null_subr.c,v
retrieving revision 1.21.2.4
diff -u -r1.21.2.4 null_subr.c
--- miscfs/nullfs/null_subr.c	26 Jun 2001 04:20:09 -0000	1.21.2.4
+++ miscfs/nullfs/null_subr.c	14 Nov 2002 17:55:09 -0000
@@ -181,6 +181,7 @@
 	xp->null_vnode = vp;
 	vp->v_data = xp;
 	xp->null_lowervp = lowervp;
+
 	/*
 	 * Before we insert our new node onto the hash chains,
 	 * check to see if someone else has beaten us to it.
Index: miscfs/nullfs/null_vfsops.c
===================================================================
RCS file: /home/ncvs/src/sys/miscfs/nullfs/Attic/null_vfsops.c,v
retrieving revision 1.35.2.3
diff -u -r1.35.2.3 null_vfsops.c
--- miscfs/nullfs/null_vfsops.c	26 Jul 2001 20:37:11 -0000	1.35.2.3
+++ miscfs/nullfs/null_vfsops.c	14 Nov 2002 17:55:09 -0000
@@ -246,6 +246,7 @@
 	 */
 	mntdata = mp->mnt_data;
 	mp->mnt_data = 0;
+	mp->mnt_flag &= ~MNT_LOCAL;
 	free(mntdata, M_NULLFSMNT);
 	return 0;
 }
Index: miscfs/nullfs/null_vnops.c
===================================================================
RCS file: /home/ncvs/src/sys/miscfs/nullfs/Attic/null_vnops.c,v
retrieving revision 1.38.2.6
diff -u -r1.38.2.6 null_vnops.c
--- miscfs/nullfs/null_vnops.c	31 Jul 2002 00:32:28 -0000	1.38.2.6
+++ miscfs/nullfs/null_vnops.c	14 Nov 2002 18:38:28 -0000
@@ -194,6 +194,7 @@
 static int	null_destroyvobject(struct vop_destroyvobject_args *ap);
 static int	null_getattr(struct vop_getattr_args *ap);
 static int	null_getvobject(struct vop_getvobject_args *ap);
+static int	null_revoke(struct vop_revoke_args *ap);
 static int	null_inactive(struct vop_inactive_args *ap);
 static int	null_islocked(struct vop_islocked_args *ap);
 static int	null_lock(struct vop_lock_args *ap);
@@ -388,14 +389,39 @@
 	if (cnp->cn_flags & PDIRUNLOCK)
 		VOP_UNLOCK(dvp, LK_THISLAYER, p);
 	if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) {
+		/*
+		 * Return an appropriately synthesized node.   Special
+		 * file types (e.g. VBLK, VCHR, and others) are a real
+		 * problem because the system makes assumptions about
+		 * special fields in the vnode which we cannot safely
+		 * duplicate.  Unfortunately we have to synthesize nodes if
+		 * we are going to do a deletion or rename to avoid
+		 * confusing the bypass code.
+		 *
+		 * VCHR and VBLK are particularly difficult, because the
+		 * rest of the system makes some bad assumptions on whether
+		 * to close a device or whether the device is 'opened' multiple
+		 * times simply based on the number of vnodes aliased to it
+		 * and theri ref counts.
+		 */
+		int can_synthesize = 0;
+
+		if (cnp->cn_nameiop != LOOKUP && cnp->cn_nameiop != CREATE) {
+			can_synthesize = 1;
+		} else if (lvp->v_type == VDIR || lvp->v_type == VREG ||
+		     lvp->v_type == VLNK) {
+			can_synthesize = 1;
+		}
 		if (ldvp == lvp) {
 			*ap->a_vpp = dvp;
 			VREF(dvp);
 			vrele(lvp);
-		} else {
+		} else if (can_synthesize) {
 			error = null_node_create(dvp->v_mount, lvp, &vp);
 			if (error == 0)
 				*ap->a_vpp = vp;
+		} else {
+			*ap->a_vpp = lvp;
 		}
 	}
 	return (error);
@@ -726,6 +752,7 @@
 		VOP_UNLOCK(vp, LK_THISLAYER, p);
 
 	vput(lowervp);
+
 	/*
 	 * Now it is safe to drop references to the lower vnode.
 	 * VOP_INACTIVE() will be called by vrele() if necessary.
@@ -829,11 +856,31 @@
 }
 
 /*
+ * Revoke - just vgone the node.  Device nodes are passed to the
+ * 	    caller layer directly.
+ */
+static int      
+null_revoke(ap)
+        struct vop_revoke_args /* {
+                struct vnode *a_vp;
+                int a_flags;
+        } */ *ap;
+{
+	struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp);
+
+	if (lvp == NULL)
+		return EINVAL;
+	vgone(ap->a_vp);
+        return (0);
+}
+
+/*
  * Global vfs data structures
  */
 vop_t **null_vnodeop_p;
 static struct vnodeopv_entry_desc null_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) null_bypass },
+	{ &vop_revoke_desc,		(vop_t *) null_revoke },
 	{ &vop_access_desc,		(vop_t *) null_access },
 	{ &vop_createvobject_desc,	(vop_t *) null_createvobject },
 	{ &vop_destroyvobject_desc,	(vop_t *) null_destroyvobject },
Index: miscfs/union/union_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/miscfs/union/Attic/union_subr.c,v
retrieving revision 1.43.2.2
diff -u -r1.43.2.2 union_subr.c
--- miscfs/union/union_subr.c	25 Dec 2001 01:44:45 -0000	1.43.2.2
+++ miscfs/union/union_subr.c	14 Nov 2002 19:02:30 -0000
@@ -369,6 +369,52 @@
 		vflag = VROOT;
 	}
 
+	/*
+	 * We have to synthesize special nodes under certain circumstances..
+	 * when a DELETE or RENAME is to be performed.  But for anything
+	 * that will open the vnode (LOOKUP, CREATE), we cannot safely return
+	 * a synthesized vnode and must instead return the actual vnode.
+	 * This is because the system makes assumptions not only about
+	 * special fields in the vnode when non-normal vnodes are returned,
+	 * but also makes assumptions based on the ref count in special vnodes.
+	 * (see revoke() and the miscfs/specfs code for examples).
+	 *
+	 * (The docache flag is ignored in the direct case).
+	 */
+	if (cnp && (cnp->cn_nameiop == LOOKUP || cnp->cn_nameiop == CREATE)) {
+		if (uppervp && uppervp->v_type != VREG && 
+		    uppervp->v_type != VDIR && uppervp->v_type != VLNK) {
+			*vpp = uppervp;
+			if (upperdvp)
+				vrele(upperdvp);
+			if (lowervp)
+				vrele(lowervp);
+			vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);
+			return(0);
+		} else if (lowervp && lowervp->v_type != VREG &&
+		    lowervp->v_type != VDIR && lowervp->v_type != VLNK) {
+			*vpp = lowervp;
+			if (upperdvp)
+				vrele(upperdvp);
+			if (uppervp)
+				vrele(uppervp);
+			vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);
+			return(0);
+		}
+	}
+
+	/*
+	 * Do not cache special situations
+	 */
+	if (uppervp && uppervp->v_type != VREG && 
+	    uppervp->v_type != VDIR && uppervp->v_type != VLNK) {
+		docache = 0;
+	}
+	if (lowervp && lowervp->v_type != VREG && 
+	    lowervp->v_type != VDIR && lowervp->v_type != VLNK) {
+		docache = 0;
+	}
+
 loop:
 	if (!docache) {
 		un = 0;
@@ -538,7 +584,6 @@
 	/*
 	 * Create new node rather then replace old node
 	 */
-
 	error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
 	if (error) {
 		/*
Index: miscfs/union/union_vnops.c
===================================================================
RCS file: /home/ncvs/src/sys/miscfs/union/Attic/union_vnops.c,v
retrieving revision 1.72
diff -u -r1.72 union_vnops.c
--- miscfs/union/union_vnops.c	15 Dec 1999 23:02:14 -0000	1.72
+++ miscfs/union/union_vnops.c	14 Nov 2002 18:02:50 -0000
@@ -98,6 +98,7 @@
 static int	union_revoke __P((struct vop_revoke_args *ap));
 static int	union_rmdir __P((struct vop_rmdir_args *ap));
 static int	union_poll __P((struct vop_poll_args *ap));
+static int	union_kqfilter __P((struct vop_kqfilter_args *ap));
 static int	union_setattr __P((struct vop_setattr_args *ap));
 static int	union_strategy __P((struct vop_strategy_args *ap));
 static int	union_getpages __P((struct vop_getpages_args *ap));
@@ -1189,6 +1190,26 @@
 }
 
 static int
+union_kqfilter(ap)
+	struct vop_kqfilter_args /* {
+		struct vnode *a_vp;
+		...
+	} */ *ap;
+{
+	struct vnode *ovp = OTHERVP(ap->a_vp);
+
+	ap->a_vp = ovp;
+	return (VCALL(ovp, VOFFSET(vop_kqfilter), ap));
+}
+
+/*
+ * Revoke access
+ *
+ *	Note that if this is a device node, the lower or upper vp is already
+ *	on the vnode alias list for the device and revoke will be called on it,
+ *	so a duplicate call here would panic the box.
+ */
+static int
 union_revoke(ap)
 	struct vop_revoke_args /* {
 		struct vnode *a_vp;
@@ -1198,9 +1219,9 @@
 {
 	struct vnode *vp = ap->a_vp;
 
-	if (UPPERVP(vp))
+	if (UPPERVP(vp) && vcount(UPPERVP(vp)) > 1)
 		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
-	if (LOWERVP(vp))
+	if (LOWERVP(vp) && vcount(LOWERVP(vp)) > 1)
 		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
 	vgone(vp);
 	return (0);
@@ -1958,6 +1979,7 @@
 	{ &vop_open_desc,		(vop_t *) union_open },
 	{ &vop_pathconf_desc,		(vop_t *) union_pathconf },
 	{ &vop_poll_desc,		(vop_t *) union_poll },
+	{ &vop_kqfilter_desc,		(vop_t *) union_kqfilter },
 	{ &vop_print_desc,		(vop_t *) union_print },
 	{ &vop_read_desc,		(vop_t *) union_read },
 	{ &vop_readdir_desc,		(vop_t *) union_readdir },

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-hackers" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200211141929.gAEJTQcl067196>