Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 3 Oct 2008 08:58:42 -0500
From:      "Weldon Godfrey" <wgodfrey@ena.com>
To:        "Volker Werth" <vwe@freebsd.org>
Cc:        freebsd-bugs@freebsd.org
Subject:   RE: kern/125149: [zfs][nfs] changing into .zfs dir from nfs client causes endless panic loop
Message-ID:  <A7B0A9F02975A74A845FE85D0B95B8FA0A1107A9@misex01.ena.com>
In-Reply-To: <48E535D8.4030101@freebsd.org>
References:  <200810012106.m91L6jq2007417@freefall.freebsd.org> <A7B0A9F02975A74A845FE85D0B95B8FA0A1107A6@misex01.ena.com> <48E535D8.4030101@freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help

No problem, here is the result.  Thanks!
Weldon


store1# kgdb /usr/obj/usr/src/sys/GENERIC/kernel.debug vmcore.27
[GDB will not be able to debug user-mode threads: =
/usr/lib/libthread_db.so: Undefined symbol "ps_pglobal_lookup"]
GNU gdb 6.1.1 [FreeBSD]
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you =
are
welcome to change it and/or distribute copies of it under certain =
conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for =
details.
This GDB was configured as "amd64-marcel-freebsd".

Unread portion of the kernel message buffer:


Fatal trap 12: page fault while in kernel mode
cpuid =3D 5; apic id =3D 05
fault virtual address   =3D 0x108
fault code              =3D supervisor write data, page not present
instruction pointer     =3D 0x8:0xffffffff804f06fa
stack pointer           =3D 0x10:0xffffffffdf761590
frame pointer           =3D 0x10:0x4
code segment            =3D base 0x0, limit 0xfffff, type 0x1b
                        =3D DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        =3D interrupt enabled, resume, IOPL =3D 0
current process         =3D 807 (nfsd)
trap number             =3D 12
panic: page fault
cpuid =3D 5
Uptime: 1m19s
Physical memory: 16367 MB
Dumping 891 MB: 876 860 844 828 812 796 780 764 748 732 716 700 684 668 =
652 636 620 604 588 572 556 540 524 508 492 476 460 444 428 412 396 380 =
364 348 332 316 300 284 268 252 236 220 204 188 172 156 140 124 108 92 =
76 60 44 28 12

#0  doadump () at pcpu.h:194
194     pcpu.h: No such file or directory.
        in pcpu.h
(kgdb) frame 9
#9  0xffffffff8060670d in nfsrv_readdirplus (nfsd=3D0xffffff000584f100, =
slp=3D0xffffff0005725900,=20
    td=3D0xffffff00059a0340, mrq=3D0xffffffffdf761af0) at =
/usr/src/sys/nfsserver/nfs_serv.c:3613
3613            vput(nvp);
(kgdb) list
3608                    nfsm_reply(NFSX_V3POSTOPATTR);
3609                    nfsm_srvpostop_attr(getret, &at);
3610                    error =3D 0;
3611                    goto nfsmout;
3612            }
3613            vput(nvp);
3614            nvp =3D NULL;
3615
3616            dirlen =3D len =3D NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF =
+
3617                2 * NFSX_UNSIGNED;
(kgdb) p *vp
$1 =3D {v_type =3D VDIR, v_tag =3D 0xffffffffdf8a7647 "zfs", v_op =3D =
0xffffffffdf8ab4e0, v_data =3D 0xffffff0005958d00,=20
  v_mount =3D 0xffffff0005908978, v_nmntvnodes =3D {tqe_next =3D =
0xffffff0005aed1f0, tqe_prev =3D 0xffffff0005a117e8},=20
  v_un =3D {vu_mount =3D 0x0, vu_socket =3D 0x0, vu_cdev =3D 0x0, =
vu_fifoinfo =3D 0x0}, v_hashlist =3D {le_next =3D 0x0,=20
    le_prev =3D 0x0}, v_hash =3D 0, v_cache_src =3D {lh_first =3D 0x0}, =
v_cache_dst =3D {tqh_first =3D 0x0,=20
    tqh_last =3D 0xffffff0005aed440}, v_dd =3D 0x0, v_cstart =3D 0, =
v_lasta =3D 0, v_lastw =3D 0, v_clen =3D 0, v_lock =3D {
    lk_object =3D {lo_name =3D 0xffffffffdf8a7647 "zfs", lo_type =3D =
0xffffffffdf8a7647 "zfs", lo_flags =3D 70844416,=20
      lo_witness_data =3D {lod_list =3D {stqe_next =3D 0x0}, lod_witness =
=3D 0x0}}, lk_interlock =3D 0xffffffff80a49ed0,=20
    lk_flags =3D 128, lk_sharecount =3D 0, lk_waitcount =3D 0, =
lk_exclusivecount =3D 0, lk_prio =3D 80, lk_timo =3D 51,=20
    lk_lockholder =3D 0xffffffffffffffff, lk_newlock =3D 0x0}, =
v_interlock =3D {lock_object =3D {
      lo_name =3D 0xffffffff807ee47a "vnode interlock", lo_type =3D =
0xffffffff807ee47a "vnode interlock",=20
      lo_flags =3D 16973824, lo_witness_data =3D {lod_list =3D =
{stqe_next =3D 0x0}, lod_witness =3D 0x0}}, mtx_lock =3D 4,=20
    mtx_recurse =3D 0}, v_vnlock =3D 0xffffff0005aed478, v_holdcnt =3D =
2, v_usecount =3D 2, v_iflag =3D 0, v_vflag =3D 0,=20
  v_writecount =3D 0, v_freelist =3D {tqe_next =3D 0x0, tqe_prev =3D =
0x0}, v_bufobj =3D {bo_mtx =3D 0xffffff0005aed4c8,=20
    bo_clean =3D {bv_hd =3D {tqh_first =3D 0x0, tqh_last =3D =
0xffffff0005aed538}, bv_root =3D 0x0, bv_cnt =3D 0}, bo_dirty =3D {
      bv_hd =3D {tqh_first =3D 0x0, tqh_last =3D 0xffffff0005aed558}, =
bv_root =3D 0x0, bv_cnt =3D 0}, bo_numoutput =3D 0,=20
    bo_flag =3D 0, bo_ops =3D 0xffffffff809cc320, bo_bsize =3D 0, =
bo_object =3D 0x0, bo_synclist =3D {le_next =3D 0x0,=20
      le_prev =3D 0x0}, bo_private =3D 0xffffff0005aed3e0, __bo_vnode =
=3D 0xffffff0005aed3e0}, v_pollinfo =3D 0x0,=20
  v_label =3D 0x0}
(kgdb) p *dp
$2 =3D {d_fileno =3D 1, d_reclen =3D 12, d_type =3D 4 '\004', d_namlen =
=3D 1 '\001',=20
  d_name =3D =
".\000\000\000\001\000\000\000\f\000\004\002..\000\000\002\000\000\000\02=
4\000\004\bsnapshot\000\000\000\000\000\000\000\000@s'\n\000=FF=FF=FF\004=
\000\000\000\003\000\000\000\022\000\000\000\000\000\000\000|D~\200=FF=FF=
=FF=FF|D~\200=FF=FF=FF=FF\000\000:\002", '\0' <repeats 12 times>, =
"\006", '\0' <repeats 32 times>, =
"=E0\224\005\000=FF=FF=FF\000=E0\224\005\000=FF=FF=FF\000=E0\224\005\000=FF=
=FF=FF\000\000\000\000\000\000\000\000\030=D6\224\005\000=FF=FF=FF", =
'\0' <repeats 87 times>}
(kgdb) frame 8
#8  0xffffffff804f06fa in vput (vp=3D0x0) at atomic.h:142
142     atomic.h: No such file or directory.
        in atomic.h
(kgdb) list
137     in atomic.h
(kgdb)

Weldon


-----Original Message-----
From: Volker Werth [mailto:vwe@freebsd.org]=20
Sent: Thursday, October 02, 2008 3:58 PM
To: Weldon Godfrey
Cc: freebsd-bugs@freebsd.org
Subject: Re: kern/125149: [zfs][nfs] changing into .zfs dir from nfs =
client causes endless panic loop

On 10/02/08 21:05, Weldon Godfrey wrote:
> Yes, I can replicate statting .zfs dir from NFS client causes FreeBSD =
to
> panic and reboot, this time from CentOS 5.0 box.  ...
>=20
>=20
> Replicate:
>=20
> [root@asmtp2 ~]# df
> Filesystem           1K-blocks      Used Available Use% Mounted on
> /dev/mapper/VolGroup00-LogVol00
>                       60817412   2814548  54863692   5% /
> /dev/sda1               101086     28729     67138  30% /boot
> tmpfs                  2008628         0   2008628   0% /dev/shm
> 192.168.2.22:/vol/enamail
>                      1286702144 1032758816 253943328  81%
> /var/spool/mail
> 192.168.2.21:/vol/exports/gaggle
>                      400959408 144327584 256631824  36%
> /var/spool/mail/archive/gaggle
> 192.168.2.36:/export/store1-1
>                      1413955712   4619136 1409336576   1%
> /var/spool/mail/store1-1
> [root@asmtp2 ~]#=20
> [root@asmtp2 ~]#=20
> [root@asmtp2 ~]# cd /var/spool/mail/store1-1
> [root@asmtp2 store1-1]# ls
> 1  2  3  4  5  6  7  8  9  crap
> [root@asmtp2 store1-1]# cd .zfs
> [root@asmtp2 .zfs]# ls
> (FreeBSD ZFS server panics here)
>=20
> Weldon
>=20
> Backtrace:
>=20
> store1# kgdb /usr/obj/usr/src/sys/GENERIC/kernel.debug vmcore.27
> [GDB will not be able to debug user-mode threads:
> /usr/lib/libthread_db.so: Undefined symbol "ps_pglobal_lookup"]
> GNU gdb 6.1.1 [FreeBSD]
> Copyright 2004 Free Software Foundation, Inc.
> GDB is free software, covered by the GNU General Public License, and =
you
> are
> welcome to change it and/or distribute copies of it under certain
> conditions.
> Type "show copying" to see the conditions.
> There is absolutely no warranty for GDB.  Type "show warranty" for
> details.
> This GDB was configured as "amd64-marcel-freebsd".
>=20
> Unread portion of the kernel message buffer:
>=20
>=20
> Fatal trap 12: page fault while in kernel mode
> cpuid =3D 5; apic id =3D 05
> fault virtual address   =3D 0x108
> fault code              =3D supervisor write data, page not present
> instruction pointer     =3D 0x8:0xffffffff804f06fa
> stack pointer           =3D 0x10:0xffffffffdf761590
> frame pointer           =3D 0x10:0x4
> code segment            =3D base 0x0, limit 0xfffff, type 0x1b
>                         =3D DPL 0, pres 1, long 1, def32 0, gran 1
> processor eflags        =3D interrupt enabled, resume, IOPL =3D 0
> current process         =3D 807 (nfsd)
> trap number             =3D 12
> panic: page fault
> cpuid =3D 5
> Uptime: 1m19s
> Physical memory: 16367 MB
> Dumping 891 MB: 876 860 844 828 812 796 780 764 748 732 716 700 684 =
668
> 652 636 620 604 588 572 556 540 524 508 492 476 460 444 428 412 396 =
380
> 364 348 332 316 300 284 268 252 236 220 204 188 172 156 140 124 108 92
> 76 60 44 28 12
>=20
> #0  doadump () at pcpu.h:194
> 194     pcpu.h: No such file or directory.
>         in pcpu.h
> (kgdb) vt
> Undefined command: "vt".  Try "help".
> (kgdb) bt
> #0  doadump () at pcpu.h:194
> #1  0x0000000000000004 in ?? ()
> #2  0xffffffff80477699 in boot (howto=3D260) at
> /usr/src/sys/kern/kern_shutdown.c:409
> #3  0xffffffff80477a9d in panic (fmt=3D0x104 <Address 0x104 out of
> bounds>) at /usr/src/sys/kern/kern_shutdown.c:563
> #4  0xffffffff8072ed24 in trap_fatal (frame=3D0xffffff00059a0340,
> eva=3D18446742974291977320)
>     at /usr/src/sys/amd64/amd64/trap.c:724
> #5  0xffffffff8072f0f5 in trap_pfault (frame=3D0xffffffffdf7614e0,
> usermode=3D0) at /usr/src/sys/amd64/amd64/trap.c:641
> #6  0xffffffff8072fa38 in trap (frame=3D0xffffffffdf7614e0) at
> /usr/src/sys/amd64/amd64/trap.c:410
> #7  0xffffffff807156ae in calltrap () at
> /usr/src/sys/amd64/amd64/exception.S:169
> #8  0xffffffff804f06fa in vput (vp=3D0x0) at atomic.h:142
> #9  0xffffffff8060670d in nfsrv_readdirplus =
(nfsd=3D0xffffff000584f100,
> slp=3D0xffffff0005725900,=20
>     td=3D0xffffff00059a0340, mrq=3D0xffffffffdf761af0) at
> /usr/src/sys/nfsserver/nfs_serv.c:3613
> #10 0xffffffff80615a5d in nfssvc (td=3DVariable "td" is not available.
> ) at /usr/src/sys/nfsserver/nfs_syscalls.c:461
> #11 0xffffffff8072f377 in syscall (frame=3D0xffffffffdf761c70) at
> /usr/src/sys/amd64/amd64/trap.c:852
> #12 0xffffffff807158bb in Xfast_syscall () at
> /usr/src/sys/amd64/amd64/exception.S:290
> #13 0x000000080068746c in ?? ()
> Previous frame inner to this frame (corrupt stack?)
>=20
>=20

Weldon,

can you please try the following from kgdb and send the output:

(kgdb) frame 9
(kgdb) list
(kgdb) p *vp
(kgdb) p *dp
(kgdb) frame 8
(kgdb) list

Please keep the core dump as we might need to check some variable values
later.

I think the problem is the NULL pointer to vput. A maintainer needs to
check how nvp can get a NULL pointer (judging by assuming my fresh
codebase is not too different from yours).

Thanks

Volker



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?A7B0A9F02975A74A845FE85D0B95B8FA0A1107A9>