Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 30 Apr 2016 03:19:07 +0000 (UTC)
From:      Jamie Gritton <jamie@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r298833 - in stable/10/sys: kern sys
Message-ID:  <201604300319.u3U3J7QH039611@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jamie
Date: Sat Apr 30 03:19:07 2016
New Revision: 298833
URL: https://svnweb.freebsd.org/changeset/base/298833

Log:
  MFC r298565:
  
    Add a new jail OSD method, PR_METHOD_REMOVE.  It's called when a jail is
    removed from the user perspective, i.e. when the last pr_uref goes away,
    even though the jail mail still exist in the dying state.  It will also
    be called if either PR_METHOD_CREATE or PR_METHOD_SET fail.
  
  MFC r298683:
  
    Delay removing the last jail reference in prison_proc_free, and instead
    put it off into the pr_task.  This is similar to prison_free, and in fact
    uses the same task even though they do something slightly different.
  
  MFC r298566:
  
    Pass the current/new jail to PR_METHOD_CHECK, which pushes the call
    until after the jail is found or created.  This requires unlocking the
    jail for the call and re-locking it afterward, but that works because
    nothing in the jail has been changed yet, and other processes won't
    change the important fields as long as allprison_lock remains held.
  
    Keep better track of name vs namelc in kern_jail_set.  Name should
    always be the hierarchical name (relative to the caller), and namelc
    the last component.
  
  MFC r298668:
  
    Use crcopysafe in jail_attach.
  
  PR:		48471

Modified:
  stable/10/sys/kern/kern_jail.c
  stable/10/sys/sys/jail.h
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/kern/kern_jail.c
==============================================================================
--- stable/10/sys/kern/kern_jail.c	Sat Apr 30 03:05:36 2016	(r298832)
+++ stable/10/sys/kern/kern_jail.c	Sat Apr 30 03:19:07 2016	(r298833)
@@ -560,8 +560,9 @@ kern_jail_set(struct thread *td, struct 
 	void *op;
 #endif
 	unsigned long hid;
-	size_t namelen, onamelen;
-	int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos;
+	size_t namelen, onamelen, pnamelen;
+	int born, created, cuflags, descend, enforce;
+	int error, errmsg_len, errmsg_pos;
 	int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
 	int fi, jid, jsys, len, level;
 	int childmax, osreldt, rsnum, slevel;
@@ -584,7 +585,7 @@ kern_jail_set(struct thread *td, struct 
 		error = priv_check(td, PRIV_JAIL_ATTACH);
 	if (error)
 		return (error);
-	mypr = ppr = td->td_ucred->cr_prison;
+	mypr = td->td_ucred->cr_prison;
 	if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
 		return (EPERM);
 	if (flags & ~JAIL_SET_MASK)
@@ -611,6 +612,13 @@ kern_jail_set(struct thread *td, struct 
 #endif
 	g_path = NULL;
 
+	cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
+	if (!cuflags) {
+		error = EINVAL;
+		vfs_opterror(opts, "no valid operation (create or update)");
+		goto done_errmsg;
+	}
+
 	error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
 	if (error == ENOENT)
 		jid = 0;
@@ -1020,42 +1028,18 @@ kern_jail_set(struct thread *td, struct 
 	}
 
 	/*
-	 * Grab the allprison lock before letting modules check their
-	 * parameters.  Once we have it, do not let go so we'll have a
-	 * consistent view of the OSD list.
-	 */
-	sx_xlock(&allprison_lock);
-	error = osd_jail_call(NULL, PR_METHOD_CHECK, opts);
-	if (error)
-		goto done_unlock_list;
-
-	/* By now, all parameters should have been noted. */
-	TAILQ_FOREACH(opt, opts, link) {
-		if (!opt->seen && strcmp(opt->name, "errmsg")) {
-			error = EINVAL;
-			vfs_opterror(opts, "unknown parameter: %s", opt->name);
-			goto done_unlock_list;
-		}
-	}
-
-	/*
-	 * See if we are creating a new record or updating an existing one.
+	 * Find the specified jail, or at least its parent.
 	 * This abuses the file error codes ENOENT and EEXIST.
 	 */
-	cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
-	if (!cuflags) {
-		error = EINVAL;
-		vfs_opterror(opts, "no valid operation (create or update)");
-		goto done_unlock_list;
-	}
 	pr = NULL;
-	namelc = NULL;
+	ppr = mypr;
 	if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
 		namelc = strrchr(name, '.');
 		jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
 		if (*p != '\0')
 			jid = 0;
 	}
+	sx_xlock(&allprison_lock);
 	if (jid != 0) {
 		/*
 		 * See if a requested jid already exists.  There is an
@@ -1121,6 +1105,7 @@ kern_jail_set(struct thread *td, struct 
 	 * and updates keyed by the name itself (where the name must exist
 	 * because that is the jail being updated).
 	 */
+	namelc = NULL;
 	if (name != NULL) {
 		namelc = strrchr(name, '.');
 		if (namelc == NULL)
@@ -1131,7 +1116,6 @@ kern_jail_set(struct thread *td, struct 
 			 * parent and child names, and make sure the parent
 			 * exists or matches an already found jail.
 			 */
-			*namelc = '\0';
 			if (pr != NULL) {
 				if (strncmp(name, ppr->pr_name, namelc - name)
 				    || ppr->pr_name[namelc - name] != '\0') {
@@ -1142,6 +1126,7 @@ kern_jail_set(struct thread *td, struct 
 					goto done_unlock_list;
 				}
 			} else {
+				*namelc = '\0';
 				ppr = prison_find_name(mypr, name);
 				if (ppr == NULL) {
 					error = ENOENT;
@@ -1150,17 +1135,18 @@ kern_jail_set(struct thread *td, struct 
 					goto done_unlock_list;
 				}
 				mtx_unlock(&ppr->pr_mtx);
+				*namelc = '.';
 			}
-			name = ++namelc;
+			namelc++;
 		}
-		if (name[0] != '\0') {
-			namelen =
+		if (namelc[0] != '\0') {
+			pnamelen =
 			    (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
  name_again:
 			deadpr = NULL;
 			FOREACH_PRISON_CHILD(ppr, tpr) {
 				if (tpr != pr && tpr->pr_ref > 0 &&
-				    !strcmp(tpr->pr_name + namelen, name)) {
+				    !strcmp(tpr->pr_name + pnamelen, namelc)) {
 					if (pr == NULL &&
 					    cuflags != JAIL_CREATE) {
 						mtx_lock(&tpr->pr_mtx);
@@ -1237,7 +1223,8 @@ kern_jail_set(struct thread *td, struct 
 		if (ppr->pr_ref == 0) {
 			mtx_unlock(&ppr->pr_mtx);
 			error = ENOENT;
-			vfs_opterror(opts, "parent jail went away!");
+			vfs_opterror(opts, "jail \"%s\" not found",
+			    prison_name(mypr, ppr));
 			goto done_unlock_list;
 		}
 		ppr->pr_ref++;
@@ -1291,8 +1278,8 @@ kern_jail_set(struct thread *td, struct 
 		pr->pr_id = jid;
 
 		/* Set some default values, and inherit some from the parent. */
-		if (name == NULL)
-			name = "";
+		if (namelc == NULL)
+			namelc = "";
 		if (path == NULL) {
 			path = "/";
 			root = mypr->pr_root;
@@ -1355,6 +1342,7 @@ kern_jail_set(struct thread *td, struct 
 
 		LIST_INIT(&pr->pr_children);
 		mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
+		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
 
 #ifdef VIMAGE
 		/* Allocate a new vnet if specified. */
@@ -1374,7 +1362,7 @@ kern_jail_set(struct thread *td, struct 
 		mtx_lock(&pr->pr_mtx);
 		/*
 		 * New prisons do not yet have a reference, because we do not
-		 * want other to see the incomplete prison once the
+		 * want others to see the incomplete prison once the
 		 * allprison_lock is downgraded.
 		 */
 	} else {
@@ -1588,13 +1576,13 @@ kern_jail_set(struct thread *td, struct 
 	}
 #endif
 	onamelen = namelen = 0;
-	if (name != NULL) {
+	if (namelc != NULL) {
 		/* Give a default name of the jid.  Also allow the name to be
 		 * explicitly the jid - but not any other number, and only in
 		 * normal form (no leading zero/etc).
 		 */
-		if (name[0] == '\0')
-			snprintf(name = numbuf, sizeof(numbuf), "%d", jid);
+		if (namelc[0] == '\0')
+			snprintf(namelc = numbuf, sizeof(numbuf), "%d", jid);
 		else if ((strtoul(namelc, &p, 10) != jid ||
 			  namelc[0] < '1' || namelc[0] > '9') && *p == '\0') {
 			error = EINVAL;
@@ -1606,9 +1594,10 @@ kern_jail_set(struct thread *td, struct 
 		 * Make sure the name isn't too long for the prison or its
 		 * children.
 		 */
-		onamelen = strlen(pr->pr_name);
-		namelen = strlen(name);
-		if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) {
+		pnamelen = (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
+		onamelen = strlen(pr->pr_name + pnamelen);
+		namelen = strlen(namelc);
+		if (pnamelen + namelen + 1 > sizeof(pr->pr_name)) {
 			error = ENAMETOOLONG;
 			goto done_deref_locked;
 		}
@@ -1625,6 +1614,30 @@ kern_jail_set(struct thread *td, struct 
 		goto done_deref_locked;
 	}
 
+	/*
+	 * Let modules check their parameters.  This requires unlocking and
+	 * then re-locking the prison, but this is still a valid state as long
+	 * as allprison_lock remains xlocked.
+	 */
+	mtx_unlock(&pr->pr_mtx);
+	error = osd_jail_call(pr, PR_METHOD_CHECK, opts);
+	if (error != 0) {
+		prison_deref(pr, created
+		    ? PD_LIST_XLOCKED
+		    : PD_DEREF | PD_LIST_XLOCKED);
+		goto done_releroot;
+	}
+	mtx_lock(&pr->pr_mtx);
+
+	/* At this point, all valid parameters should have been noted. */
+	TAILQ_FOREACH(opt, opts, link) {
+		if (!opt->seen && strcmp(opt->name, "errmsg")) {
+			error = EINVAL;
+			vfs_opterror(opts, "unknown parameter: %s", opt->name);
+			goto done_deref_locked;
+		}
+	}
+
 	/* Set the parameters of the prison. */
 #ifdef INET
 	redo_ip4 = 0;
@@ -1698,12 +1711,12 @@ kern_jail_set(struct thread *td, struct 
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 			tpr->pr_devfs_rsnum = rsnum;
 	}
-	if (name != NULL) {
+	if (namelc != NULL) {
 		if (ppr == &prison0)
-			strlcpy(pr->pr_name, name, sizeof(pr->pr_name));
+			strlcpy(pr->pr_name, namelc, sizeof(pr->pr_name));
 		else
 			snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s",
-			    ppr->pr_name, name);
+			    ppr->pr_name, namelc);
 		/* Change this component of child names. */
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 			bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen,
@@ -1781,6 +1794,7 @@ kern_jail_set(struct thread *td, struct 
 	 * for now, so new ones will remain unseen until after the module
 	 * handlers have completed.
 	 */
+	born = pr->pr_uref == 0;
 	if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
 		if (pr_flags & PR_PERSIST) {
 			pr->pr_ref++;
@@ -1850,15 +1864,20 @@ kern_jail_set(struct thread *td, struct 
 
 	/* Let the modules do their work. */
 	sx_downgrade(&allprison_lock);
-	if (created) {
+	if (born) {
 		error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
 		if (error) {
-			prison_deref(pr, PD_LIST_SLOCKED);
+			(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+			prison_deref(pr, created
+			    ? PD_LIST_SLOCKED
+			    : PD_DEREF | PD_LIST_SLOCKED);
 			goto done_errmsg;
 		}
 	}
 	error = osd_jail_call(pr, PR_METHOD_SET, opts);
 	if (error) {
+		if (born)
+			(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
 		prison_deref(pr, created
 		    ? PD_LIST_SLOCKED
 		    : PD_DEREF | PD_LIST_SLOCKED);
@@ -1910,7 +1929,7 @@ kern_jail_set(struct thread *td, struct 
 			sx_sunlock(&allprison_lock);
 	}
 
-	goto done_errmsg;
+	goto done_free;
 
  done_deref_locked:
 	prison_deref(pr, created
@@ -2404,7 +2423,6 @@ sys_jail_attach(struct thread *td, struc
 static int
 do_jail_attach(struct thread *td, struct prison *pr)
 {
-	struct prison *ppr;
 	struct proc *p;
 	struct ucred *newcred, *oldcred;
 	int error;
@@ -2432,7 +2450,6 @@ do_jail_attach(struct thread *td, struct
 	/*
 	 * Reparent the newly attached process to this jail.
 	 */
-	ppr = td->td_ucred->cr_prison;
 	p = td->td_proc;
 	error = cpuset_setproc_update_set(p, pr->pr_cpuset);
 	if (error)
@@ -2451,23 +2468,23 @@ do_jail_attach(struct thread *td, struct
 
 	newcred = crget();
 	PROC_LOCK(p);
-	oldcred = p->p_ucred;
-	setsugid(p);
-	crcopy(newcred, oldcred);
+	oldcred = crcopysafe(p, newcred);
 	newcred->cr_prison = pr;
 	p->p_ucred = newcred;
+	setsugid(p);
 	PROC_UNLOCK(p);
 #ifdef RACCT
 	racct_proc_ucred_changed(p, oldcred, newcred);
 #endif
+	prison_deref(oldcred->cr_prison, PD_DEREF | PD_DEUREF);
 	crfree(oldcred);
-	prison_deref(ppr, PD_DEREF | PD_DEUREF);
 	return (0);
+
  e_unlock:
 	VOP_UNLOCK(pr->pr_root, 0);
  e_revert_osd:
 	/* Tell modules this thread is still in its old jail after all. */
-	(void)osd_jail_call(ppr, PR_METHOD_ATTACH, td);
+	(void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td);
 	prison_deref(pr, PD_DEREF | PD_DEUREF);
 	return (error);
 }
@@ -2576,16 +2593,13 @@ prison_allow(struct ucred *cred, unsigne
 void
 prison_free_locked(struct prison *pr)
 {
+	int ref;
 
 	mtx_assert(&pr->pr_mtx, MA_OWNED);
-	pr->pr_ref--;
-	if (pr->pr_ref == 0) {
-		mtx_unlock(&pr->pr_mtx);
-		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
-		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
-		return;
-	}
+	ref = --pr->pr_ref;
 	mtx_unlock(&pr->pr_mtx);
+	if (ref == 0)
+		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
 }
 
 void
@@ -2596,11 +2610,17 @@ prison_free(struct prison *pr)
 	prison_free_locked(pr);
 }
 
+/*
+ * Complete a call to either prison_free or prison_proc_free.
+ */
 static void
 prison_complete(void *context, int pending)
 {
+	struct prison *pr = context;
 
-	prison_deref((struct prison *)context, 0);
+	mtx_lock(&pr->pr_mtx);
+	prison_deref(pr, pr->pr_uref
+	    ? PD_DEREF | PD_DEUREF | PD_LOCKED : PD_LOCKED);
 }
 
 /*
@@ -2613,19 +2633,53 @@ static void
 prison_deref(struct prison *pr, int flags)
 {
 	struct prison *ppr, *tpr;
+	int ref, lasturef;
 
 	if (!(flags & PD_LOCKED))
 		mtx_lock(&pr->pr_mtx);
 	for (;;) {
 		if (flags & PD_DEUREF) {
+			KASSERT(pr->pr_uref > 0,
+			    ("prison_deref PD_DEUREF on a dead prison (jid=%d)",
+			     pr->pr_id));
 			pr->pr_uref--;
+			lasturef = pr->pr_uref == 0;
+			if (lasturef)
+				pr->pr_ref++;
 			KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0"));
-		}
-		if (flags & PD_DEREF)
+		} else
+			lasturef = 0;
+		if (flags & PD_DEREF) {
+			KASSERT(pr->pr_ref > 0,
+			    ("prison_deref PD_DEREF on a dead prison (jid=%d)",
+			     pr->pr_id));
 			pr->pr_ref--;
-		/* If the prison still has references, nothing else to do. */
-		if (pr->pr_ref > 0) {
+		}
+		ref = pr->pr_ref;
+		mtx_unlock(&pr->pr_mtx);
+
+		/*
+		 * Tell the modules if the last user reference was removed
+		 * (even it sticks around in dying state).
+		 */
+		if (lasturef) {
+			if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
+				if (ref > 1) {
+					sx_slock(&allprison_lock);
+					flags |= PD_LIST_SLOCKED;
+				} else {
+					sx_xlock(&allprison_lock);
+					flags |= PD_LIST_XLOCKED;
+				}
+			}
+			(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+			mtx_lock(&pr->pr_mtx);
+			ref = --pr->pr_ref;
 			mtx_unlock(&pr->pr_mtx);
+		}
+
+		/* If the prison still has references, nothing else to do. */
+		if (ref > 0) {
 			if (flags & PD_LIST_SLOCKED)
 				sx_sunlock(&allprison_lock);
 			else if (flags & PD_LIST_XLOCKED)
@@ -2633,7 +2687,6 @@ prison_deref(struct prison *pr, int flag
 			return;
 		}
 
-		mtx_unlock(&pr->pr_mtx);
 		if (flags & PD_LIST_SLOCKED) {
 			if (!sx_try_upgrade(&allprison_lock)) {
 				sx_sunlock(&allprison_lock);
@@ -2715,7 +2768,20 @@ prison_proc_free(struct prison *pr)
 	mtx_lock(&pr->pr_mtx);
 	KASSERT(pr->pr_uref > 0,
 	    ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
-	prison_deref(pr, PD_DEUREF | PD_LOCKED);
+	if (pr->pr_uref > 1)
+		pr->pr_uref--;
+	else {
+		/*
+		 * Don't remove the last user reference in this context, which
+		 * is expected to be a process that is not only locked, but
+		 * also half dead.
+		 */
+		pr->pr_ref++;
+		mtx_unlock(&pr->pr_mtx);
+		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
+		return;
+	}
+	mtx_unlock(&pr->pr_mtx);
 }
 
 

Modified: stable/10/sys/sys/jail.h
==============================================================================
--- stable/10/sys/sys/jail.h	Sat Apr 30 03:05:36 2016	(r298832)
+++ stable/10/sys/sys/jail.h	Sat Apr 30 03:19:07 2016	(r298833)
@@ -149,7 +149,6 @@ struct prison_racct;
  *   (p) locked by pr_mtx
  *   (c) set only during creation before the structure is shared, no mutex
  *       required to read
- *   (d) set only during destruction of jail, no mutex needed
  */
 struct prison {
 	TAILQ_ENTRY(prison) pr_list;			/* (a) all prisons */
@@ -161,7 +160,7 @@ struct prison {
 	LIST_ENTRY(prison) pr_sibling;			/* (a) next in parent's list */
 	struct prison	*pr_parent;			/* (c) containing jail */
 	struct mtx	 pr_mtx;
-	struct task	 pr_task;			/* (d) destroy task */
+	struct task	 pr_task;			/* (c) destroy task */
 	struct osd	 pr_osd;			/* (p) additional data */
 	struct cpuset	*pr_cpuset;			/* (p) cpuset */
 	struct vnet	*pr_vnet;			/* (c) network stack */
@@ -243,7 +242,8 @@ struct prison_racct {
 #define	PR_METHOD_SET		2
 #define	PR_METHOD_CHECK		3
 #define	PR_METHOD_ATTACH	4
-#define	PR_MAXMETHOD		5
+#define	PR_METHOD_REMOVE	5
+#define	PR_MAXMETHOD		6
 
 /*
  * Lock/unlock a prison.



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201604300319.u3U3J7QH039611>