Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 14 Jan 2011 10:05:31 +0100
From:      Przemyslaw Frasunek <przemyslaw@frasunek.com>
To:        freebsd-net@freebsd.org
Subject:   Netgraph/mpd5 stability issues
Message-ID:  <4D3011DB.9050900@frasunek.com>

next in thread | raw e-mail | index | archive | help
Hello,

I'm using mpd 5.5 on three PPPoE routers, each servicing about 300 PPPoE
concurrent sessions. Routers are based on Intel SR1630GP hardware platforms and
runs FreeBSD 7.3-RELEASE.

I'm experiencing stability issues related to Netgraph. None of above routers can
survive more than 20-30 days of uptime under typical load. There are different
flavors of kernel panics, but all are somehow related to netgraph. Typical
backtraces follow:

(kgdb) bt
#1  0xc0836ac7 in boot (howto=260) at ../../../kern/kern_shutdown.c:418
#2  0xc0836d99 in panic (fmt=Variable "fmt" is not available.
) at ../../../kern/kern_shutdown.c:574
#3  0xc0b5ef1c in trap_fatal (frame=0xe7ce6820, eva=152)
    at ../../../i386/i386/trap.c:950
#4  0xc0b5f1a0 in trap_pfault (frame=0xe7ce6820, usermode=0, eva=152)
    at ../../../i386/i386/trap.c:863
#5  0xc0b5fb95 in trap (frame=0xe7ce6820) at ../../../i386/i386/trap.c:541
#6  0xc0b42e7b in calltrap () at ../../../i386/i386/exception.s:166
#7  0xc5f486b9 in ng_name2noderef (here=0xc62a0b80, name=0xe7ce6894 "ng366")
    at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:896
#8  0xc5f488cc in ng_path2noderef (here=0xc62a0b80,
    address=0xcc4c2110 "ng366:", destp=0xe7ce6ac8, lasthook=0xe7ce6ac4)
    at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:1673
#9  0xc5f48cc0 in ng_address_path (here=0xc62a0b80, item=0xc5e42ae0,
    address=0xcc4c2110 "ng366:", retaddr=0)
    at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:3488
#10 0xc5f431d3 in ngc_send (so=0xc5b53340, flags=0, m=0xd4c6cb00,
    addr=0xccac9780, control=0x0, td=0xc65a2b40)
    at /usr/src/sys/modules/netgraph/socket/../../../netgraph/ng_socket.c:288
#11 0xc0894bfa in sosend_generic (so=0xc5b53340, addr=0xccac9780,
    uio=0xe7ce6be8, top=0xd4c6cb00, control=0x0, flags=0, td=0xc65a2b40)
    at ../../../kern/uipc_socket.c:1243
#12 0xc0890a3f in sosend (so=0xc5b53340, addr=0xccac9780, uio=0xe7ce6be8,
    top=0x0, control=0x0, flags=0, td=0xc65a2b40)
    at ../../../kern/uipc_socket.c:1285
#13 0xc0897fa6 in kern_sendit (td=0xc65a2b40, s=5, mp=0xe7ce6c64, flags=0,
    control=0x0, segflg=UIO_USERSPACE) at ../../../kern/uipc_syscalls.c:805
#14 0xc089b181 in sendit (td=0xc65a2b40, s=5, mp=0xe7ce6c64, flags=0)
    at ../../../kern/uipc_syscalls.c:742
#15 0xc089b298 in sendto (td=0xc65a2b40, uap=0xe7ce6cfc)
    at ../../../kern/uipc_syscalls.c:857
#16 0xc0b5f4f5 in syscall (frame=0xe7ce6d38) at ../../../i386/i386/trap.c:1101
#17 0xc0b42ee0 in Xint0x80_syscall () at ../../../i386/i386/exception.s:262
#18 0x00000033 in ?? ()
(kgdb) frame 7
#7  0xc5f486b9 in ng_name2noderef (here=0xc62a0b80, name=0xe7ce6894 "ng366")
    at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:896
896             LIST_FOREACH(node, &ng_name_hash[hash], nd_nodes) {
(kgdb) list
891             }
892
893             /* Find node by name */
894             NG_NAMEHASH(name, hash);
895             mtx_lock(&ng_namehash_mtx);
896             LIST_FOREACH(node, &ng_name_hash[hash], nd_nodes) {
897                     if (NG_NODE_IS_VALID(node) &&
898                         (strcmp(NG_NODE_NAME(node), name) == 0)) {
899                             break;
900                     }
(kgdb) print node
$1 = 0x74
(kgdb) print ng_name_hash
$3 = {{lh_first = 0xcbab6200}, {lh_first = 0x0}, {lh_first = 0xc6538300}, {
    lh_first = 0xc67e6400}, {lh_first = 0xc6538700}, {lh_first = 0xca2abc00}, {
    lh_first = 0xc66d5000}, {lh_first = 0xca8f9200}, {lh_first = 0xca815580}, {
    lh_first = 0xc62a2180}, {lh_first = 0xca2ab180}, {lh_first = 0xc6af7d00}, {
    lh_first = 0xcbe09a00}, {lh_first = 0xca81b800}, {lh_first = 0xc5b4e980}, {
    lh_first = 0xcbc1f080}, {lh_first = 0xca2a5480}, {lh_first = 0xc672b580}, {
    lh_first = 0xcbdb1e80}, {lh_first = 0xcc772c00}, {lh_first = 0xc6a99980}, {
    lh_first = 0xc629d600}, {lh_first = 0xc6733000}, {lh_first = 0xca967800}, {
    lh_first = 0xc5b3b780}, {lh_first = 0xc629c280}, {lh_first = 0xc6396980}, {
    lh_first = 0xc6a5f300}, {lh_first = 0xc5bf2280}, {lh_first = 0xcc5ebe80}, {
    lh_first = 0xc5e0a400}, {lh_first = 0xc6608100}, {lh_first = 0xc6520e00}, {
    lh_first = 0xc6642680}, {lh_first = 0xca8f7b80}, {lh_first = 0xcbd9ce80}, {
    lh_first = 0xca81b380}, {lh_first = 0x0} <repeats 13 times>, {
    lh_first = 0xc67b8080}, {lh_first = 0xc6455c80}, {lh_first = 0xc652a380}, {
    lh_first = 0xc6a74780}, {lh_first = 0xc62d8400}, {lh_first = 0xcc154400}, {
    lh_first = 0xca852b80}, {lh_first = 0xcc351580}, {lh_first = 0xc6396a80}, {
    lh_first = 0xc66f9580}, {lh_first = 0xc58c8e00}, {lh_first = 0xcc01a000}, {
    lh_first = 0xc6614e80}, {lh_first = 0xc6750800}, {lh_first = 0xcc154e80}, {
    lh_first = 0xcc32f080}, {lh_first = 0xcbb10e80}, {lh_first = 0xcc1e3700}, {
    lh_first = 0xcc020280}, {lh_first = 0xcc75ad00}, {lh_first = 0xca901b00}, {
    lh_first = 0xcc3c8380}, {lh_first = 0xcbd90580}, {lh_first = 0xcbb0c480}, {
    lh_first = 0xcbed1300}, {lh_first = 0xc6644480}, {lh_first = 0xcc02ca80}, {
    lh_first = 0xcc0d1980}, {lh_first = 0xcc35e200}, {lh_first = 0xcc0dc200}, {
    lh_first = 0xca9dc200}, {lh_first = 0xcbecf880}, {lh_first = 0xcc065080}, {
    lh_first = 0xcc47b280}, {lh_first = 0xcc722a80}, {lh_first = 0xcc28cd80}, {
    lh_first = 0xcbd73400}, {lh_first = 0xcbf76b00}, {lh_first = 0xcbbfc280}, {
    lh_first = 0xc629c800}, {lh_first = 0xc6700200}, {lh_first = 0x0}, {
    lh_first = 0x0}, {lh_first = 0xc5e0b700}, {lh_first = 0xc672a200}, {
    lh_first = 0xc62a2080}, {lh_first = 0x0}, {lh_first = 0xc673fc80}, {
    lh_first = 0xc5bf2600}, {lh_first = 0xca969800}, {lh_first = 0xc6aa6700}, {
    lh_first = 0xc6750b80}, {lh_first = 0xcc0bc200}, {lh_first = 0xcbeead80}, {
    lh_first = 0xcc484e00}, {lh_first = 0xcbae6900}, {lh_first = 0xcbbef800}, {
    lh_first = 0xcc797500}, {lh_first = 0xc65f3d80}, {lh_first = 0xcbe95900}, {
    lh_first = 0xcba8fb80}, {lh_first = 0xcbdb1580}, {lh_first = 0xcc75b080}, {
    lh_first = 0xcbd7fb80}, {lh_first = 0xcc75db80}, {lh_first = 0xc5e59500}, {
    lh_first = 0xcbd6fb00}, {lh_first = 0xc6a7ed00}, {lh_first = 0xcbe0bc80}, {
    lh_first = 0xcc3c1180}, {lh_first = 0xc7486d00}, {lh_first = 0xcba93880}, {
    lh_first = 0xcc0c6000}, {lh_first = 0x0}, {lh_first = 0x0}, {
    lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x0}}

Another one:

(kgdb) bt
#0  doadump () at pcpu.h:196
#1  0xc0836ac7 in boot (howto=260) at ../../../kern/kern_shutdown.c:418
#2  0xc0836d99 in panic (fmt=Variable "fmt" is not available.
) at ../../../kern/kern_shutdown.c:574
#3  0xc0b5ef1c in trap_fatal (frame=0xc53dbaac, eva=36)
    at ../../../i386/i386/trap.c:950
#4  0xc0b5f1a0 in trap_pfault (frame=0xc53dbaac, usermode=0, eva=36)
    at ../../../i386/i386/trap.c:863
#5  0xc0b5fb95 in trap (frame=0xc53dbaac) at ../../../i386/i386/trap.c:541
#6  0xc0b42e7b in calltrap () at ../../../i386/i386/exception.s:166
#7  0xc5f39d95 in ng_address_hook (here=0x0, item=0xc66619f0, hook=0xcc87f680,
    retaddr=0)
    at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:3456
#8  0xc5f339ff in ngd_send (so=0xc5b68680, flags=0, m=0xd58aec00,
    addr=0xc666d870, control=0x0, td=0xc5910000)
    at /usr/src/sys/modules/netgraph/socket/../../../netgraph/ng_socket.c:445
#9  0xc0894bfa in sosend_generic (so=0xc5b68680, addr=0xc666d870,
    uio=0xc53dbbe8, top=0xd58aec00, control=0x0, flags=0, td=0xc5910000)
    at ../../../kern/uipc_socket.c:1243
#10 0xc0890a3f in sosend (so=0xc5b68680, addr=0xc666d870, uio=0xc53dbbe8,
    top=0x0, control=0x0, flags=0, td=0xc5910000)
    at ../../../kern/uipc_socket.c:1285
#11 0xc0897fa6 in kern_sendit (td=0xc5910000, s=6, mp=0xc53dbc64, flags=0,
    control=0x0, segflg=UIO_USERSPACE) at ../../../kern/uipc_syscalls.c:805
#12 0xc089b181 in sendit (td=0xc5910000, s=6, mp=0xc53dbc64, flags=0)
    at ../../../kern/uipc_syscalls.c:742
#13 0xc089b298 in sendto (td=0xc5910000, uap=0xc53dbcfc)
    at ../../../kern/uipc_syscalls.c:857
#14 0xc0b5f4f5 in syscall (frame=0xc53dbd38) at ../../../i386/i386/trap.c:1101
#15 0xc0b42ee0 in Xint0x80_syscall () at ../../../i386/i386/exception.s:262
#16 0x00000033 in ?? ()
(kgdb) frame 7
#7  0xc5f39d95 in ng_address_hook (here=0x0, item=0xc66619f0, hook=0xcc87f680,
    retaddr=0)
    at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:3456
3456            if ((hook == NULL) ||
(kgdb) list
3451             * Quick sanity check..
3452             * Since a hook holds a reference on it's node, once we know
3453             * that the peer is still connected (even if invalid,) we know
3454             * that the peer node is present, though maybe invalid.
3455             */
3456            if ((hook == NULL) ||
3457                NG_HOOK_NOT_VALID(hook) ||
3458                NG_HOOK_NOT_VALID(peer = NG_HOOK_PEER(hook)) ||
3459                NG_NODE_NOT_VALID(peernode = NG_PEER_NODE(hook))) {
3460                    NG_FREE_ITEM(item);
(kgdb) x/i $eip
0xc5f39d95 <ng_address_hook+69>:        testb  $0x1,0x24(%edi)
(kgdb) info reg edi
edi            0x0      0
(kgdb) print *hook
$2 = {hk_name = "b99", '\0' <repeats 28 times>, hk_private = 0xc5b27140,
  hk_flags = 0, hk_refs = 2, hk_type = 0, hk_peer = 0xc647bc00,
  hk_node = 0xc592d500, hk_hooks = {le_next = 0xc69a1b00,
    le_prev = 0xc6991238}, hk_rcvmsg = 0, hk_rcvdata = 0}


Besides of that, I had interesting issue, when one of misconfigured customer's
router tried to establish several PPPoE sessions per second. Such "stress test"
caused multiple kernel panics, each occuring after few minutes of uptime. I have
no backtrace, but I can remember, that it was similar to one of above.

I'll be grateful for any advices.



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4D3011DB.9050900>