Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 02 Feb 2011 00:30:20 +0600
From:      Eugene Grosbein <egrosbein@rdtc.ru>
To:        Julian Elischer <julian@freebsd.org>
Cc:        freebsd-net@freebsd.org, John Baldwin <jhb@freebsd.org>
Subject:   Re: panic: bufwrite: buffer is not busy???
Message-ID:  <4D48513C.40503@rdtc.ru>
In-Reply-To: <4D4670C2.4050500@freebsd.org>
References:  <4D3011DB.9050900@frasunek.com>	<4D30458D.30007@sentex.net>	<4D309983.70709@rdtc.ru>	<201101141437.55421.jhb@freebsd.org>	<4D46575A.802@rdtc.ru> <4D4670C2.4050500@freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
On 31.01.2011 14:20, Julian Elischer wrote:

> replace with:
> 
> 3504            if ((hook == NULL) ||
> 3505                NG_HOOK_NOT_VALID(hook) ||
>                      ((peer = NG_HOOK_PEER(hook)) == NULL) ||
> 3506                NG_HOOK_NOT_VALID(peer) ||
>                      ((peernode = NG_PEER_NODE(hook)) == NULL) ||
> 3507                NG_NODE_NOT_VALID(peernode)) {
>                          if (peer)
>                                kassert((peernode != NULL), ("peer node NULL wile peer hook exists"));
> 3508                    NG_FREE_ITEM(item);

This day I have updated panicing router to RELENG_8 and combined changes supposed
by Julian and Gleb. After 8 hours it has just paniced again and could not finish
to write crashdump again:

Fatal trap 12: page fault while in kernel mode
cpuid = 3; apic id = 06
fault virtual address   = 0x63
fault code              = supervisor read data, page not present
instruction pointer     = 0x20:0xffffffff803d4ccd
stack pointer           = 0x28:0xffffff80ebffc600
frame pointer           = 0x28:0xffffff80ebffc680
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 2390 (mpd5)
trap number             = 12
panic: page fault
cpuid = 3
Uptime: 8h3m51s
Dumping 4087 MB (3 chunks)
  chunk 0: 1MB (150 pages) ... ok
  chunk 1: 3575MB (915088 pages) 3559 3543panic: bufwrite: buffer is not busy???
cpuid = 3
Uptime: 8h3m52s
Automatic reboot in 15 seconds - press a key on the console to abort

# gdb kernel
GNU gdb 6.1.1 [FreeBSD]
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "amd64-marcel-freebsd"...
(gdb) l *0xffffffff803d4ccd
0xffffffff803d4ccd is in ng_pppoe_disconnect (netgraph.h:191).
186                                     int line);
187
188     static __inline void
189     _chkhook(hook_p hook, char *file, int line)
190     {
191             if (hook->hk_magic != HK_MAGIC) {
192                     printf("Accessing freed hook ");
193                     dumphook(hook, file, line);
194             }
195             hook->lastline = line;
(gdb) x/i 0xffffffff803d4ccd
0xffffffff803d4ccd <ng_pppoe_disconnect+301>:   cmpl   $0x78573011,0x64(%rbx)


Here is a patch I've applied to the tree:

--- sys/netgraph/ng_base.c.orig	2011-02-01 12:34:09.000000000 +0600
+++ sys/netgraph/ng_base.c	2011-02-01 12:00:17.000000000 +0600
@@ -1643,10 +1643,8 @@
 				node_p *destp, hook_p *lasthook)
 {
 	char    fullpath[NG_PATHSIZ];
-	char   *nodename, *path, pbuf[2];
+	char   *nodename, *path;
 	node_p  node, oldnode;
-	char   *cp;
-	hook_p hook = NULL;
 
 	/* Initialize */
 	if (destp == NULL) {
@@ -1664,11 +1662,6 @@
 		TRAP_ERROR();
 		return EINVAL;
 	}
-	if (path == NULL) {
-		pbuf[0] = '.';	/* Needs to be writable */
-		pbuf[1] = '\0';
-		path = pbuf;
-	}
 
 	/*
 	 * For an absolute address, jump to the starting node.
@@ -1690,41 +1683,41 @@
 		NG_NODE_REF(node);
 	}
 
+	if (path == NULL) {
+		if (lasthook != NULL)
+			*lasthook = NULL;
+		*destp = node;
+		return (0);
+	}
+
 	/*
 	 * Now follow the sequence of hooks
-	 * XXX
-	 * We actually cannot guarantee that the sequence
-	 * is not being demolished as we crawl along it
-	 * without extra-ordinary locking etc.
-	 * So this is a bit dodgy to say the least.
-	 * We can probably hold up some things by holding
-	 * the nodelist mutex for the time of this
-	 * crawl if we wanted.. At least that way we wouldn't have to
-	 * worry about the nodes disappearing, but the hooks would still
-	 * be a problem.
+	 *
+	 * XXXGL: The path may demolish as we go the sequence, but if
+	 * we hold the topology mutex at critical places, then, I hope,
+	 * we would always have valid pointers in hand, although the
+	 * path behind us may no longer exist.
 	 */
-	for (cp = path; node != NULL && *cp != '\0'; ) {
+	for (;;) {
+		hook_p hook;
 		char *segment;
 
 		/*
 		 * Break out the next path segment. Replace the dot we just
-		 * found with a NUL; "cp" points to the next segment (or the
+		 * found with a NUL; "path" points to the next segment (or the
 		 * NUL at the end).
 		 */
-		for (segment = cp; *cp != '\0'; cp++) {
-			if (*cp == '.') {
-				*cp++ = '\0';
+		for (segment = path; *path != '\0'; path++) {
+			if (*path == '.') {
+				*path++ = '\0';
 				break;
 			}
 		}
 
-		/* Empty segment */
-		if (*segment == '\0')
-			continue;
-
 		/* We have a segment, so look for a hook by that name */
 		hook = ng_findhook(node, segment);
 
+		mtx_lock(&ng_topo_mtx);
 		/* Can't get there from here... */
 		if (hook == NULL
 		    || NG_HOOK_PEER(hook) == NULL
@@ -1732,15 +1725,7 @@
 		    || NG_HOOK_NOT_VALID(NG_HOOK_PEER(hook))) {
 			TRAP_ERROR();
 			NG_NODE_UNREF(node);
-#if 0
-			printf("hooknotvalid %s %s %d %d %d %d ",
-					path,
-					segment,
-					hook == NULL,
-					NG_HOOK_PEER(hook) == NULL,
-					NG_HOOK_NOT_VALID(hook),
-					NG_HOOK_NOT_VALID(NG_HOOK_PEER(hook)));
-#endif
+			mtx_unlock(&ng_topo_mtx);
 			return (ENOENT);
 		}
 
@@ -1757,21 +1742,25 @@
 		NG_NODE_UNREF(oldnode);	/* XXX another race */
 		if (NG_NODE_NOT_VALID(node)) {
 			NG_NODE_UNREF(node);	/* XXX more races */
-			node = NULL;
+			mtx_unlock(&ng_topo_mtx);
+			TRAP_ERROR();
+			return (ENXIO);
 		}
-	}
 
-	/* If node somehow missing, fail here (probably this is not needed) */
-	if (node == NULL) {
-		TRAP_ERROR();
-		return (ENXIO);
+		if (*path == '\0') {
+			if (lasthook != NULL) {
+				if (hook != NULL) {
+					*lasthook = NG_HOOK_PEER(hook);
+					NG_HOOK_REF(*lasthook);
+				} else
+					*lasthook = NULL;
+			}
+			mtx_unlock(&ng_topo_mtx);
+			*destp = node;
+			return (0);
+		}
+		mtx_unlock(&ng_topo_mtx);
 	}
-
-	/* Done */
-	*destp = node;
-	if (lasthook != NULL)
-		*lasthook = (hook ? NG_HOOK_PEER(hook) : NULL);
-	return (0);
 }
 
 /***************************************************************\
@@ -3501,12 +3490,18 @@
 	 * that the peer is still connected (even if invalid,) we know
 	 * that the peer node is present, though maybe invalid.
 	 */
+	mtx_lock(&ng_topo_mtx);
 	if ((hook == NULL) ||
 	    NG_HOOK_NOT_VALID(hook) ||
-	    NG_HOOK_NOT_VALID(peer = NG_HOOK_PEER(hook)) ||
-	    NG_NODE_NOT_VALID(peernode = NG_PEER_NODE(hook))) {
+	     ((peer = NG_HOOK_PEER(hook)) == NULL) ||
+	    NG_HOOK_NOT_VALID(peer) ||
+	     ((peernode = NG_PEER_NODE(hook)) == NULL) ||
+	    NG_NODE_NOT_VALID(peernode)) {
+		if (peer)
+		    KASSERT((peernode != NULL), ("peer node NULL while peer hook exists"));
 		NG_FREE_ITEM(item);
 		TRAP_ERROR();
+ 		mtx_unlock(&ng_topo_mtx);
 		return (ENETDOWN);
 	}
 
@@ -3518,6 +3513,9 @@
 	NGI_SET_HOOK(item, peer);
 	NGI_SET_NODE(item, peernode);
 	SET_RETADDR(item, here, retaddr);
+
+	mtx_unlock(&ng_topo_mtx);
+
 	return (0);
 }
 
@@ -3539,10 +3537,9 @@
 		return (error);
 	}
 	NGI_SET_NODE(item, dest);
-	if ( hook) {
-		NG_HOOK_REF(hook);	/* don't let it go while on the queue */
+	if (hook)
 		NGI_SET_HOOK(item, hook);
-	}
+
 	SET_RETADDR(item, here, retaddr);
 	return (0);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4D48513C.40503>