Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 15 Aug 2004 17:51:36 -0700 (PDT)
From:      Don Lewis <truckman@FreeBSD.org>
To:        julian@elischer.org
Cc:        cvs-all@FreeBSD.org
Subject:   Re: cvs commit: src/sys/kern kern_proc.c
Message-ID:  <200408160051.i7G0pamD001343@gw.catspoiler.org>
In-Reply-To: <411E7CD4.7010504@elischer.org>

next in thread | previous in thread | raw e-mail | index | archive | help
On 14 Aug, Julian Elischer wrote:
> Robert Watson wrote:
>> rwatson     2004-08-14 17:15:16 UTC
>> 
>>   FreeBSD src repository
>> 
>>   Modified files:
>>     sys/kern             kern_proc.c 
>>   Log:
>>   Cause pfind() not to return processes in the PRS_NEW state.  As a result,
>>   threads consuming the result of pfind() will not need to check for a NULL
>>   credential pointer or other signs of an incompletely created process.
>>   However, this also means that pfind() cannot be used to test for the
>>   existence or find such a process.  Annotate pfind() to indicate that this
>>   is the case.  A review of curent consumers seems to indicate that this is
>>   not a problem for any of them.  This closes a number of race conditions
>>   that could result in NULL pointer dereferences and related failure modes.
>>   Other related races continue to exist, especially during iteration of the
>>   allproc list without due caution.
> 
> possibly part of the answer would be to not put the proc on any queues until it 
> is more set up..

That's been my opinion for quite a while.

I spent some time this morning rototilling fork1() to move pid selection
and adding the child process to the allproc and other lists to one
location in the code.  I also gathered all the scheduler stuff in one
spot.

I think there are more things that should be done, such as moving the
block of code projected by Giant that beings with vm_forkproc() to a
spot above where we grab proctree_lock the final time.  I think the
_PHOLD() and _PRELE() calls could be moved with it.  Maybe also move the
EVENTHANDLER_INVOKE() call a bit later, near the KNOTE_LOCKED() call.

This patch has survived a "make -j10 buildworld" and building some ports
on a UP machine.  Unfortunately this is not a good time for making
extensive changes to the fork code.


Index: sys/kern/kern_fork.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.234
diff -u -r1.234 kern_fork.c
--- sys/kern/kern_fork.c	15 Aug 2004 06:24:41 -0000	1.234
+++ sys/kern/kern_fork.c	15 Aug 2004 21:17:15 -0000
@@ -194,9 +194,8 @@
 	int pages;
 	struct proc **procp;
 {
-	struct proc *p1, *p2, *pptr;
+	struct proc *p1, *p2, *p3, *pptr;
 	uid_t uid;
-	struct proc *newproc;
 	int ok, trypid;
 	static int curfail, pidchecked = 0;
 	static struct timeval lastfail;
@@ -283,14 +282,11 @@
 	}
 
 	/* Allocate new proc. */
-	newproc = uma_zalloc(proc_zone, M_WAITOK);
+	p2 = uma_zalloc(proc_zone, M_WAITOK);
 #ifdef MAC
-	mac_init_proc(newproc);
+	mac_init_proc(p2);
 #endif
-	knlist_init(&newproc->p_klist, &newproc->p_mtx);
-
-	/* We have to lock the process tree while we look for a pid. */
-	sx_slock(&proctree_lock);
+	knlist_init(&p2->p_klist, &p2->p_mtx);
 
 	/*
 	 * Although process entries are dynamically created, we still keep
@@ -326,92 +322,6 @@
 	 * are hard-limits as to the number of processes that can run.
 	 */
 	nprocs++;
-
-	/*
-	 * Find an unused process ID.  We remember a range of unused IDs
-	 * ready to use (from lastpid+1 through pidchecked-1).
-	 *
-	 * If RFHIGHPID is set (used during system boot), do not allocate
-	 * low-numbered pids.
-	 */
-	trypid = lastpid + 1;
-	if (flags & RFHIGHPID) {
-		if (trypid < 10)
-			trypid = 10;
-	} else {
-		if (randompid)
-			trypid += arc4random() % randompid;
-	}
-retry:
-	/*
-	 * If the process ID prototype has wrapped around,
-	 * restart somewhat above 0, as the low-numbered procs
-	 * tend to include daemons that don't exit.
-	 */
-	if (trypid >= PID_MAX) {
-		trypid = trypid % PID_MAX;
-		if (trypid < 100)
-			trypid += 100;
-		pidchecked = 0;
-	}
-	if (trypid >= pidchecked) {
-		int doingzomb = 0;
-
-		pidchecked = PID_MAX;
-		/*
-		 * Scan the active and zombie procs to check whether this pid
-		 * is in use.  Remember the lowest pid that's greater
-		 * than trypid, so we can avoid checking for a while.
-		 */
-		p2 = LIST_FIRST(&allproc);
-again:
-		for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) {
-			PROC_LOCK(p2);
-			while (p2->p_pid == trypid ||
-			    (p2->p_pgrp != NULL &&
-			    (p2->p_pgrp->pg_id == trypid ||
-			    (p2->p_session != NULL &&
-			    p2->p_session->s_sid == trypid)))) {
-				trypid++;
-				if (trypid >= pidchecked) {
-					PROC_UNLOCK(p2);
-					goto retry;
-				}
-			}
-			if (p2->p_pid > trypid && pidchecked > p2->p_pid)
-				pidchecked = p2->p_pid;
-			if (p2->p_pgrp != NULL) {
-				if (p2->p_pgrp->pg_id > trypid &&
-				    pidchecked > p2->p_pgrp->pg_id)
-					pidchecked = p2->p_pgrp->pg_id;
-				if (p2->p_session != NULL &&
-				    p2->p_session->s_sid > trypid &&
-				    pidchecked > p2->p_session->s_sid)
-					pidchecked = p2->p_session->s_sid;
-			}
-			PROC_UNLOCK(p2);
-		}
-		if (!doingzomb) {
-			doingzomb = 1;
-			p2 = LIST_FIRST(&zombproc);
-			goto again;
-		}
-	}
-	sx_sunlock(&proctree_lock);
-
-	/*
-	 * RFHIGHPID does not mess with the lastpid counter during boot.
-	 */
-	if (flags & RFHIGHPID)
-		pidchecked = 0;
-	else
-		lastpid = trypid;
-
-	p2 = newproc;
-	p2->p_state = PRS_NEW;		/* protect against others */
-	p2->p_pid = trypid;
-	LIST_INSERT_HEAD(&allproc, p2, p_list);
-	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
 	sx_xunlock(&allproc_lock);
 
 	/*
@@ -511,15 +421,7 @@
 	p2->p_flag = 0;
 	if (p1->p_flag & P_PROFIL)
 		startprofclock(p2);
-	mtx_lock_spin(&sched_lock);
-	p2->p_sflag = PS_INMEM;
-	/*
-	 * Allow the scheduler to adjust the priority of the child and
-	 * parent while we hold the sched_lock.
-	 */
-	sched_fork(td, p2);
 
-	mtx_unlock_spin(&sched_lock);
 	p2->p_ucred = crhold(td->td_ucred);
 	td2->td_ucred = crhold(p2->p_ucred);	/* XXXKSE */
 
@@ -551,42 +453,109 @@
 	if (p2->p_textvp)
 		vref(p2->p_textvp);
 
+	sx_xlock(&proctree_lock);
+	/* We have to lock the allproc list while we look for a pid. */
+	sx_xlock(&allproc_lock);
+
 	/*
-	 * Set up linkage for kernel based threading.
+	 * Find an unused process ID.  We remember a range of unused IDs
+	 * ready to use (from lastpid+1 through pidchecked-1).
+	 *
+	 * If RFHIGHPID is set (used during system boot), do not allocate
+	 * low-numbered pids.
 	 */
-	if ((flags & RFTHREAD) != 0) {
-		mtx_lock(&ppeers_lock);
-		p2->p_peers = p1->p_peers;
-		p1->p_peers = p2;
-		p2->p_leader = p1->p_leader;
-		mtx_unlock(&ppeers_lock);
-		PROC_LOCK(p1->p_leader);
-		if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
-			PROC_UNLOCK(p1->p_leader);
-			/*
-			 * The task leader is exiting, so process p1 is
-			 * going to be killed shortly.  Since p1 obviously
-			 * isn't dead yet, we know that the leader is either
-			 * sending SIGKILL's to all the processes in this
-			 * task or is sleeping waiting for all the peers to
-			 * exit.  We let p1 complete the fork, but we need
-			 * to go ahead and kill the new process p2 since
-			 * the task leader may not get a chance to send
-			 * SIGKILL to it.  We leave it on the list so that
-			 * the task leader will wait for this new process
-			 * to commit suicide.
-			 */
-			PROC_LOCK(p2);
-			psignal(p2, SIGKILL);
-			PROC_UNLOCK(p2);
-		} else
-			PROC_UNLOCK(p1->p_leader);
+	trypid = lastpid + 1;
+	if (flags & RFHIGHPID) {
+		if (trypid < 10)
+			trypid = 10;
 	} else {
-		p2->p_peers = NULL;
-		p2->p_leader = p2;
+		if (randompid)
+			trypid += arc4random() % randompid;
+	}
+retry:
+	/*
+	 * If the process ID prototype has wrapped around,
+	 * restart somewhat above 0, as the low-numbered procs
+	 * tend to include daemons that don't exit.
+	 */
+	if (trypid >= PID_MAX) {
+		trypid = trypid % PID_MAX;
+		if (trypid < 100)
+			trypid += 100;
+		pidchecked = 0;
 	}
+	if (trypid >= pidchecked) {
+		int doingzomb = 0;
+
+		pidchecked = PID_MAX;
+		/*
+		 * Scan the active and zombie procs to check whether this pid
+		 * is in use.  Remember the lowest pid that's greater
+		 * than trypid, so we can avoid checking for a while.
+		 */
+		p3 = LIST_FIRST(&allproc);
+again:
+		for (; p3 != NULL; p3 = LIST_NEXT(p3, p_list)) {
+			PROC_LOCK(p3);
+			while (p3->p_pid == trypid ||
+			    (p3->p_pgrp != NULL &&
+			    (p3->p_pgrp->pg_id == trypid ||
+			    (p3->p_session != NULL &&
+			    p3->p_session->s_sid == trypid)))) {
+				trypid++;
+				if (trypid >= pidchecked) {
+					PROC_UNLOCK(p3);
+					goto retry;
+				}
+			}
+			if (p3->p_pid > trypid && pidchecked > p3->p_pid)
+				pidchecked = p3->p_pid;
+			if (p3->p_pgrp != NULL) {
+				if (p3->p_pgrp->pg_id > trypid &&
+				    pidchecked > p3->p_pgrp->pg_id)
+					pidchecked = p3->p_pgrp->pg_id;
+				if (p3->p_session != NULL &&
+				    p3->p_session->s_sid > trypid &&
+				    pidchecked > p3->p_session->s_sid)
+					pidchecked = p3->p_session->s_sid;
+			}
+			PROC_UNLOCK(p3);
+		}
+		if (!doingzomb) {
+			doingzomb = 1;
+			p3 = LIST_FIRST(&zombproc);
+			goto again;
+		}
+	}
+
+	/*
+	 * RFHIGHPID does not mess with the lastpid counter during boot.
+	 */
+	if (flags & RFHIGHPID)
+		pidchecked = 0;
+	else
+		lastpid = trypid;
+
+	p2->p_state = PRS_NEW;		/* protect against others */
+	p2->p_pid = trypid;
+	LIST_INSERT_HEAD(&allproc, p2, p_list);
+	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
+	sx_xunlock(&allproc_lock);
+
+	/*
+	 * Attach the new process to its parent.
+	 *
+	 * If RFNOWAIT is set, the newly created process becomes a child
+	 * of init.  This effectively disassociates the child from the
+	 * parent.
+	 */
+	if (flags & RFNOWAIT)
+		pptr = initproc;
+	else
+		pptr = p1;
+	p2->p_pptr = pptr;
+	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
 
-	sx_xlock(&proctree_lock);
 	PGRP_LOCK(p1->p_pgrp);
 	PROC_LOCK(p2);
 	PROC_LOCK(p1);
@@ -645,20 +614,42 @@
 	_PHOLD(p1);
 	PROC_UNLOCK(p1);
 
+	sx_xunlock(&proctree_lock);
+
 	/*
-	 * Attach the new process to its parent.
-	 *
-	 * If RFNOWAIT is set, the newly created process becomes a child
-	 * of init.  This effectively disassociates the child from the
-	 * parent.
+	 * Set up linkage for kernel based threading.
 	 */
-	if (flags & RFNOWAIT)
-		pptr = initproc;
-	else
-		pptr = p1;
-	p2->p_pptr = pptr;
-	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
-	sx_xunlock(&proctree_lock);
+	if ((flags & RFTHREAD) != 0) {
+		mtx_lock(&ppeers_lock);
+		p2->p_peers = p1->p_peers;
+		p1->p_peers = p2;
+		p2->p_leader = p1->p_leader;
+		mtx_unlock(&ppeers_lock);
+		PROC_LOCK(p1->p_leader);
+		if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
+			PROC_UNLOCK(p1->p_leader);
+			/*
+			 * The task leader is exiting, so process p1 is
+			 * going to be killed shortly.  Since p1 obviously
+			 * isn't dead yet, we know that the leader is either
+			 * sending SIGKILL's to all the processes in this
+			 * task or is sleeping waiting for all the peers to
+			 * exit.  We let p1 complete the fork, but we need
+			 * to go ahead and kill the new process p2 since
+			 * the task leader may not get a chance to send
+			 * SIGKILL to it.  We leave it on the list so that
+			 * the task leader will wait for this new process
+			 * to commit suicide.
+			 */
+			PROC_LOCK(p2);
+			psignal(p2, SIGKILL);
+			PROC_UNLOCK(p2);
+		} else
+			PROC_UNLOCK(p1->p_leader);
+	} else {
+		p2->p_peers = NULL;
+		p2->p_leader = p2;
+	}
 
 	/* Inform accounting that we have forked. */
 	p2->p_acflag = AFORK;
@@ -700,10 +691,18 @@
 	/*
 	 * Set the child start time and mark the process as being complete.
 	 */
+	PROC_LOCK(p2);
+	PROC_LOCK(p1);
 	microuptime(&p2->p_stats->p_start);
 	mtx_lock_spin(&sched_lock);
+	p2->p_sflag = PS_INMEM;
 	p2->p_state = PRS_NORMAL;
-
+	/*
+	 * Allow the scheduler to adjust the priority of the child and
+	 * parent while we hold the sched_lock.
+	 */
+	sched_fork(td, p2);
+	PROC_UNLOCK(p2);
 	/*
 	 * If RFSTOPPED not requested, make child runnable and add to
 	 * run queue.
@@ -717,7 +716,6 @@
 	/*
 	 * Now can be swapped.
 	 */
-	PROC_LOCK(p1);
 	_PRELE(p1);
 
 	/*
@@ -752,15 +750,14 @@
 	*procp = p2;
 	return (0);
 fail:
-	sx_sunlock(&proctree_lock);
 	if (ppsratecheck(&lastfail, &curfail, 1))
 		printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n",
 			uid);
 	sx_xunlock(&allproc_lock);
 #ifdef MAC
-	mac_destroy_proc(newproc);
+	mac_destroy_proc(p2);
 #endif
-	uma_zfree(proc_zone, newproc);
+	uma_zfree(proc_zone, p2);
 	if (p1->p_flag & P_SA) {
 		PROC_LOCK(p1);
 		thread_single_end();



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200408160051.i7G0pamD001343>