Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 9 Mar 2014 14:16:57 -0400
From:      Glen Barber <gjb@FreeBSD.org>
To:        Konstantin Belousov <kostikbel@gmail.com>
Cc:        freebsd-current@FreeBSD.org
Subject:   Re: panic: vm_fault: fault on nofault entry
Message-ID:  <20140309181657.GI1776@glenbarber.us>
In-Reply-To: <20140309180132.GO24664@kib.kiev.ua>
References:  <20140309165648.GF1776@glenbarber.us> <20140309180132.GO24664@kib.kiev.ua>

next in thread | previous in thread | raw e-mail | index | archive | help

--ZY5CS28jBCfb727c
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Sun, Mar 09, 2014 at 08:01:32PM +0200, Konstantin Belousov wrote:
> On Sun, Mar 09, 2014 at 12:56:48PM -0400, Glen Barber wrote:
> > We are having regular panics on several machines in the cluster.
> >=20
> > Below follows the script from the kgdb(1) session, hopefully providing
> > enough information.  This machine runs 11.0-CURRENT #2 r262892, from
> > 2 days ago.
> >=20
> > It uses tmpfs(5) for the port build workspace.  I have an unconfirmed
> > suspicion that use of sysutils/lsof is involved somehow, but cannot be
> > sure.  (In my experience with panics with port building, removing lsof
> > from the system did have an effect, but I may be going down the wrong
> > rabbit hole.)
> >=20
>=20
> This is very similar to issue reported several time ago.
> Try this patch.  I never get a feedback.
>=20
> diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c
> index abbbb21..fd9c5df 100644
> --- a/sys/amd64/amd64/mem.c
> +++ b/sys/amd64/amd64/mem.c
> @@ -98,7 +98,13 @@ memrw(struct cdev *dev, struct uio *uio, int flags)
>  kmemphys:
>  			o =3D v & PAGE_MASK;
>  			c =3D min(uio->uio_resid, (u_int)(PAGE_SIZE - o));
> -			error =3D uiomove((void *)PHYS_TO_DMAP(v), (int)c, uio);
> +			v =3D PHYS_TO_DMAP(v);
> +			if (v < DMAP_MIN_ADDRESS ||
> +			    (v > DMAP_MIN_ADDRESS + dmaplimit &&
> +			    v <=3D DMAP_MAX_ADDRESS) ||
> +			    pmap_kextract(v) =3D=3D 0)
> +				return (EFAULT);
> +			error =3D uiomove((void *)v, (int)c, uio);
>  			continue;
>  		}
>  		else if (dev2unit(dev) =3D=3D CDEV_MINOR_KMEM) {

There is a very similar patch on one of these machines.

  Index: sys/amd64/amd64/mem.c
  =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
  --- sys/amd64/amd64/mem.c	(revision 262298)
  +++ sys/amd64/amd64/mem.c	(working copy)
  @@ -98,6 +98,12 @@
   kmemphys:
   			o =3D v & PAGE_MASK;
   			c =3D min(uio->uio_resid, (u_int)(PAGE_SIZE - o));
  +			v =3D PHYS_TO_DMAP(v);
  +			if (v < DMAP_MIN_ADDRESS ||
  +			    (v > DMAP_MIN_ADDRESS + dmaplimit &&
  +			    v <=3D DMAP_MAX_ADDRESS) ||
  +			    pmap_kextract(v) =3D=3D 0)
  +				return (EFAULT);
   			error =3D uiomove((void *)PHYS_TO_DMAP(v), (int)c, uio);
   			continue;
   		}
  Index: sys/amd64/amd64/pmap.c
  =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
  --- sys/amd64/amd64/pmap.c	(revision 262298)
  +++ sys/amd64/amd64/pmap.c	(working copy)
  @@ -321,7 +321,7 @@
       "Number of kernel page table pages allocated on bootup");
  =20
   static int ndmpdp;
  -static vm_paddr_t dmaplimit;
  +vm_paddr_t dmaplimit;
   vm_offset_t kernel_vm_end =3D VM_MIN_KERNEL_ADDRESS;
   pt_entry_t pg_nx;
  =20
  Index: sys/amd64/include/pmap.h
  =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
  --- sys/amd64/include/pmap.h	(revision 262298)
  +++ sys/amd64/include/pmap.h	(working copy)
  @@ -369,6 +369,7 @@
   extern vm_paddr_t dump_avail[];
   extern vm_offset_t virtual_avail;
   extern vm_offset_t virtual_end;
  +extern vm_paddr_t dmaplimit;
  =20
   #define	pmap_page_get_memattr(m)	((vm_memattr_t)(m)->md.pat_mode)
   #define	pmap_page_is_write_mapped(m)	(((m)->aflags & PGA_WRITEABLE) !=3D=
 0)

The machine this change is on paniced today as well.  That machine runs
r262298M, and I have a vmcore from Feb 24 (there was not enough
available space to get a crash dump today.)

The backtrace from Feb 24 follows.

Script started on Sun Mar  9 18:14:41 2014
root@redbuild04.nyi:/usr/obj/usr/src/sys/REDBUILD # sh
# kgdb ./kernel.debug /var/crash/vmcore.3
GNU gdb 6.1.1 [FreeBSD]
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain condition=
s.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "amd64-marcel-freebsd"...

Unread portion of the kernel message buffer:
panic: vm_fault: fault on nofault entry, addr: fffffe03becbc000
cpuid =3D 23
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfffffe1838ec1=
180
kdb_backtrace() at kdb_backtrace+0x39/frame 0xfffffe1838ec1230
panic() at panic+0x155/frame 0xfffffe1838ec12b0
vm_fault_hold() at vm_fault_hold+0x1e7a/frame 0xfffffe1838ec1500
vm_fault() at vm_fault+0x77/frame 0xfffffe1838ec1540
trap_pfault() at trap_pfault+0x199/frame 0xfffffe1838ec15e0
trap() at trap+0x4a0/frame 0xfffffe1838ec17f0
calltrap() at calltrap+0x8/frame 0xfffffe1838ec17f0
--- trap 0xc, rip =3D 0xffffffff80d971fb, rsp =3D 0xfffffe1838ec18b0, rbp =
=3D 0xfffffe1838ec1910 ---
copyout() at copyout+0x3b/frame 0xfffffe1838ec1910
memrw() at memrw+0x1ef/frame 0xfffffe1838ec1950
giant_read() at giant_read+0xa4/frame 0xfffffe1838ec1990
devfs_read_f() at devfs_read_f+0xeb/frame 0xfffffe1838ec19f0
dofileread() at dofileread+0x95/frame 0xfffffe1838ec1a40
kern_readv() at kern_readv+0x68/frame 0xfffffe1838ec1a90
sys_read() at sys_read+0x63/frame 0xfffffe1838ec1ae0
amd64_syscall() at amd64_syscall+0x3fb/frame 0xfffffe1838ec1bf0
Xfast_syscall() at Xfast_syscall+0xfb/frame 0xfffffe1838ec1bf0
--- syscall (3, FreeBSD ELF64, sys_read), rip =3D 0x800b8343a, rsp =3D 0x7f=
ffffffcfe8, rbp =3D 0x7fffffffd030 ---
KDB: enter: panic

Reading symbols from /boot/kernel/zfs.ko.symbols...done.
Loaded symbols for /boot/kernel/zfs.ko.symbols
Reading symbols from /boot/kernel/opensolaris.ko.symbols...done.
Loaded symbols for /boot/kernel/opensolaris.ko.symbols
Reading symbols from /boot/kernel/ums.ko.symbols...done.
Loaded symbols for /boot/kernel/ums.ko.symbols
Reading symbols from /boot/kernel/tmpfs.ko.symbols...done.
Loaded symbols for /boot/kernel/tmpfs.ko.symbols
Reading symbols from /boot/kernel/nullfs.ko.symbols...done.
Loaded symbols for /boot/kernel/nullfs.ko.symbols
Reading symbols from /boot/kernel/linprocfs.ko.symbols...done.
Loaded symbols for /boot/kernel/linprocfs.ko.symbols
Reading symbols from /boot/kernel/linux.ko.symbols...done.
Loaded symbols for /boot/kernel/linux.ko.symbols
#0  doadump (textdump=3D-954994000) at pcpu.h:219
219		__asm("movq %%gs:%1,%0" : "=3Dr" (td)
(kgdb) bt
#0  doadump (textdump=3D-954994000) at pcpu.h:219
#1  0xffffffff8034a175 in db_fncall (dummy1=3D<value optimized out>,=20
    dummy2=3D<value optimized out>, dummy3=3D<value optimized out>, dummy4=
=3D<value optimized out>)
    at /usr/src/sys/ddb/db_command.c:578
#2  0xffffffff80349e5d in db_command (cmd_table=3D0x0) at /usr/src/sys/ddb/=
db_command.c:449
#3  0xffffffff80349bd4 in db_command_loop () at /usr/src/sys/ddb/db_command=
=2Ec:502
#4  0xffffffff8034c630 in db_trap (type=3D<value optimized out>, code=3D0)
    at /usr/src/sys/ddb/db_main.c:231
#5  0xffffffff80987329 in kdb_trap (type=3D3, code=3D0, tf=3D<value optimiz=
ed out>)
    at /usr/src/sys/kern/subr_kdb.c:656
#6  0xffffffff80d99009 in trap (frame=3D0xfffffe1838ec1160)
    at /usr/src/sys/amd64/amd64/trap.c:571
#7  0xffffffff80d7dd12 in calltrap () at /usr/src/sys/amd64/amd64/exception=
=2ES:231
#8  0xffffffff80986a8e in kdb_enter (why=3D0xffffffff8100ed4f "panic", msg=
=3D<value optimized out>)
    at cpufunc.h:63
#9  0xffffffff809462b5 in panic (fmt=3D<value optimized out>)
    at /usr/src/sys/kern/kern_shutdown.c:752
#10 0xffffffff80c0981a in vm_fault_hold (map=3D<value optimized out>,=20
    vaddr=3D<value optimized out>, fault_type=3D<value optimized out>,=20
    fault_flags=3D<value optimized out>, m_hold=3D<value optimized out>)
    at /usr/src/sys/vm/vm_fault.c:272
#11 0xffffffff80c07957 in vm_fault (map=3D0xfffff80002000000, vaddr=3D<valu=
e optimized out>,=20
    fault_type=3D1 '\001', fault_flags=3D128) at /usr/src/sys/vm/vm_fault.c=
:217
#12 0xffffffff80d997f9 in trap_pfault (frame=3D0xfffffe1838ec1800, usermode=
=3D0)
    at /usr/src/sys/amd64/amd64/trap.c:767
#13 0xffffffff80d99020 in trap (frame=3D0xfffffe1838ec1800)
    at /usr/src/sys/amd64/amd64/trap.c:455
#14 0xffffffff80d7dd12 in calltrap () at /usr/src/sys/amd64/amd64/exception=
=2ES:231
#15 0xffffffff80d971fb in copyout () at /usr/src/sys/amd64/amd64/support.S:=
246
#16 0xffffffff8099bb35 in uiomove_faultflag (cp=3D<value optimized out>,=20
    n=3D<value optimized out>, uio=3D0xfffffe1838ec1ab0, nofault=3D<value o=
ptimized out>)
    at /usr/src/sys/kern/subr_uio.c:192
#17 0xffffffff80d8576f in memrw (dev=3D<value optimized out>, uio=3D<value =
optimized out>,=20
    flags=3D<value optimized out>) at /usr/src/sys/amd64/amd64/mem.c:107
---Type <return> to continue, or q <return> to quit---
#18 0xffffffff808ec764 in giant_read (dev=3D0xfffff80011347c00, uio=3D0xfff=
ffe1838ec1ab0, ioflag=3D0)
    at /usr/src/sys/kern/kern_conf.c:442
#19 0xffffffff80817e2b in devfs_read_f (fp=3D0xfffff80854be3140, uio=3D0xff=
fffe1838ec1ab0,=20
    cred=3D<value optimized out>, flags=3D0, td=3D0xfffff801f52c5490)
    at /usr/src/sys/fs/devfs/devfs_vnops.c:1193
#20 0xffffffff809a0e25 in dofileread (td=3D0xfffff801f52c5490, fd=3D4, fp=
=3D0xfffff80854be3140,=20
    auio=3D0xfffffe1838ec1ab0, offset=3D<value optimized out>, flags=3D1172=
307968) at file.h:299
#21 0xffffffff809a0b48 in kern_readv (td=3D0xfffff801f52c5490, fd=3D4, auio=
=3D0xfffffe1838ec1ab0)
    at /usr/src/sys/kern/sys_generic.c:256
#22 0xffffffff809a0ad3 in sys_read (td=3D<value optimized out>, uap=3D<valu=
e optimized out>)
    at /usr/src/sys/kern/sys_generic.c:171
#23 0xffffffff80d9a04b in amd64_syscall (td=3D0xfffff801f52c5490, traced=3D=
0) at subr_syscall.c:133
#24 0xffffffff80d7dffb in Xfast_syscall () at /usr/src/sys/amd64/amd64/exce=
ption.S:390
#25 0x0000000800b8343a in ?? ()
Previous frame inner to this frame (corrupt stack?)
Current language:  auto; currently minimal
(kgdb) quit

Script done on Sun Mar  9 18:14:59 2014

Glen


--ZY5CS28jBCfb727c
Content-Type: application/pgp-signature

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.22 (FreeBSD)

iQIcBAEBCAAGBQJTHLAZAAoJELls3eqvi17QbxAP/31rC/3qptag074ujxnGgGim
3roBfVTbYQzAIhyWUex2pteByRL1ZQMB8tVftSmvVxm9QBhkNeGmuMiK5t6qfAC9
/aXaGDAGI/ieYki9nS29uSlmJaRCFxd6iN/kaHaJuhTajRB4dsK83E5dKV7TUFAy
kqs23jPCaFjYtIXeegk8YRFXyfkwNsBqXp4qvHD4l5eiXSwGPD9oIXJuRuMfYZOR
90pZLgpcjy1THVnshPC29RHA5ppr49QRVgu/haEQMKscK5JgekLF69XZ3H/n+ILb
poDk9/e+Ypyq1fBIs6gvC/y9IfTQjGdPQVBfvMPR7IAgLr5St0qQvFAEdTTbdOkb
vDt5PZL+nZNPH0g8N/HUB4DEnEDtI7DUkoM7WrM916Le77SBvvG37VCVyrExhu4r
uio5pYlSteD5hll4eh0o1g0tpzbfPtnO0W7cx0ZDQteImD/J5ybFbVGk5ERHWeHY
jXuq+JRPcmTeCvaPHRCL+3ceIsTnHNnzW/RwilgaOpLKH0CNY9rMpsX5gwkEZuaK
P+2dK5w9vOYTuoCBHZYfJyp68C9R6x+pbu0QUMEqGwit7ZhAngRgDHxNJiVhpr1N
dMTOoNh9adKRR2umXe3M0Mpt68kMuPqQBbSZdU9IsEs6JYjEAN+BlHLrHzrIGsNK
J1Ed7Xlf7PMLqFHVcfs8
=oYZe
-----END PGP SIGNATURE-----

--ZY5CS28jBCfb727c--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20140309181657.GI1776>