Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 19 Nov 2004 14:06:11 +0100
From:      Herve Boulouis <amon@sockar.homeip.net>
To:        Robert Watson <rwatson@freebsd.org>
Cc:        freebsd-stable@freebsd.org
Subject:   Re: fast data access mmu miss with 5.3-STABLE from today
Message-ID:  <20041119140611.C91236@ra.aabs>
In-Reply-To: <Pine.NEB.3.96L.1041119104427.66045O-100000@fledge.watson.org>; from rwatson@freebsd.org on Fri, Nov 19, 2004 at 10:45:43AM %2B0000
References:  <20041117143450.A91236@ra.aabs> <Pine.NEB.3.96L.1041119104427.66045O-100000@fledge.watson.org>

next in thread | previous in thread | raw e-mail | index | archive | help
Le 19/11/2004 à 10:45, Robert Watson a écrit:
> 
> On Wed, 17 Nov 2004, Herve Boulouis wrote:
> 
> > just cvsuped my 5.3-STABLE 3 hours ago (dual ultra sparc with SMP) and
> > got the following panic : 
> 
> This is starting to look familiar, I saw a similar report today also.
> Could you conver the below symbol+offset addresses to file+line using a
> copy of the kernel with symbol tables and gdb?
> 
> If you could also grab the results of "show pcpu" and "ps", that would be
> helpful.

Here they are (from the last panic)

I tried to do some testing with/without mpsafenet, always got the panic.
(the box is doing ftp traffic). This panic was with mpsafenet=0.

Relevant portions of the kernel config :

machine         sparc64
cpu             SUN4U
ident           NETRA
maxusers        512
makeoptions     DEBUG=-g                #Build kernel with gdb(1) debug symbols
options         KDB                     # Enable kernel debugger support.
options         DDB                     # Support DDB.
options         GDB                     # Support remote GDB.
options         ADAPTIVE_GIANT
options		INET
options		INET6
device          ofw_console     # OpenBoot firmware console device
device          genclock        # Generic clock interface
device          eeprom          # eeprom (really an ebus driver for the MK48Txx)
device          "mk48txx"       # Mostek MK48T02, MK48T08, MK48T59 clock
options         ZERO_COPY_SOCKETS
options         SMP                     # Symmetric MultiProcessor Kernel

No invariants, no witness.

db> trace
panic() at panic+0x168
trap() at trap+0x3ac
-- fast data access mmu miss tar=0 %o7=0xc02ba5c0 --
m_copym() at m_copym+0x2c
tcp_output() at tcp_output+0xeec
tcp_input() at tcp_input+0x2484
ip_input() at ip_input+0x934
netisr_processqueue() at netisr_processqueue+0xc0
swi_net() at swi_net+0x104
ithread_loop() at ithread_loop+0x1dc
fork_exit() at fork_exit+0x9c
fork_trampoline() at fork_trampoline+0x8
db> show pcpu
cpuid        = 0
curthread    = 0xfffff800af9c5480: pid 15 "swi1: net"
curpcb       = 0xdb071980
fpcurthread  = none
idlethread   = 0xfffff800af9c4cd0: pid 12 "idle: cpu0"
db> ps
  pid   proc     uarea   uid  ppid  pgrp  flag   stat  wmesg    wchan  cmd
17922 fffff800866b2be0 e20f2000    0   408   408 0000101 [SLPQ sbwait 0xfffff800a2897dd0][SLP] pure-ftpd
17916 fffff800866b2ed8 e20f4000    0   408   408 0000101 [SLPQ select 0xc03e0350][SLP] pure-ftpd
17915 fffff800867ba2f8 e2100000    0   408   408 0000101 [SLPQ select 0xc03e0350][SLP] pure-ftpd
17907 fffff800a38185f0 e232e000    0   408   408 0000101 [SLPQ sbwait 0xfffff8009d35ea68][SLP] pure-ftpd
17895 fffff800af5eb1d0 e118e000    0   408   408 0000101 [SLPQ sbwait 0xfffff80084808a68][SLP] pure-ftpd
17877 fffff800866b3ab8 e20fc000    0   408   408 0000101 [SLPQ sbwait 0xfffff8009d35ec90][SLP] pure-ftpd
17864 fffff800a38191d0 e2336000    0   408   408 0000101 [SLPQ sbwait 0xfffff800a4733530][SLP] pure-ftpd
17863 fffff800af640000 e11ae000    0   408   408 0000101 [SLPQ sbwait 0xfffff80084809308][SLP] pure-ftpd
17858 fffff800866b2000 e1ffa000    0   408   408 0000101 [SLPQ select 0xc03e0350][SLP] pure-ftpd
17856 fffff800a35de2f8 e2430000    0   408   408 0000101 [SLPQ select 0xc03e0350][SLP] pure-ftpd
17843 fffff800a35debe0 e2436000    0   408   408 0000101 [SLPQ sbwait 0xfffff8009d35e618][SLP] pure-ftpd
17826 fffff800a3819ab8 e242c000    0   408   408 0000101 [SLPQ sbwait 0xfffff80084809758][SLP] pure-ftpd
17822 fffff8009c3f8ed8 e27a0000    0   408   408 0000101 [SLPQ sbwait 0xfffff800ad6090e0][SLP] pure-ftpd
17820 fffff800a35de5f0 e2432000    0   408   408 0000101 [SLPQ sbwait 0xfffff800a2897758][SLP] pure-ftpd
  447 fffff800867ba8e8 e2104000    0   446   447 0004002 [SLPQ ttyin 0xfffff800806a6410][SLP] bash
  446 fffff800866b37c0 e20fa000 1528   445   446 0004102 [SLPQ wait 0xfffff800866b37c0][SLP] su
  445 fffff800867ba000 e20fe000 1528   444   445 0004002 [SLPQ wait 0xfffff800867ba000][SLP] bash
  444 fffff800867bb4c8 e210c000 1528   441   441 0000100 [SLPQ select 0xc03e0350][SLP] sshd
  441 fffff800866b31d0 e20f6000    0   373   441 0000100 [SLPQ sbwait 0xfffff800a2896340][SLP] sshd
  438 fffff800866b34c8 e20f8000    0   408   408 0000101 [SLPQ sbwait 0xfffff800a4775308][SLP] pure-ftpd
  437 fffff800866b28e8 e20f0000    0     1   437 0004002 [SLPQ ttyin 0xfffff800806a6010][SLP] getty
  436 fffff800a38188e8 e2330000    0     1   436 0004002 [SLPQ ttyin 0xfffff800806ad810][SLP] getty
  408 fffff800af640be0 e122e000    0     1   408 0000000 [SLPQ select 0xc03e0350][SLP] pure-ftpd
  397 fffff800af6405f0 e11b2000    0     1   397 0000000 [SLPQ nanslp 0xc03d59c0][SLP] cron
  385 fffff800867ba5f0 e2102000   25     1   385 0000100 [SLPQ pause 0xfffff800867ba660][SLP] sendmail
  379 fffff800af5eb4c8 e1190000    0     1   379 0000100 [SLPQ select 0xc03e0350][SLP] sendmail
  373 fffff800a38197c0 e233a000    0     1   373 0000100 [SLPQ select 0xc03e0350][SLP] sshd
  269 fffff800866b25f0 e20ee000    0     1   269 0000000 [SLPQ select 0xc03e0350][SLP] syslogd
  249 fffff800af6402f8 e11b0000    0     1   249 0000000 [SLPQ select 0xc03e0350][SLP] devd
  168 fffff800af5ebab8 e1194000    0     1   168 0000000 [SLPQ nanslp 0xc03d59c0][SLP] ipmon
   41 fffff800af640ed8 e1230000    0     0     0 0000204 [SLPQ - 0xe11f96ec][SLP] schedcpu
   40 fffff800af6411d0 e1232000    0     0     0 0000204 [SLPQ - 0xc03e91f0][SLP] nfsiod 3
   39 fffff800af6414c8 e1234000    0     0     0 0000204 [SLPQ - 0xc03e91e8][SLP] nfsiod 2
   38 fffff800af6417c0 e1236000    0     0     0 0000204 [SLPQ - 0xc03e91e0][SLP] nfsiod 1
   37 fffff800af641ab8 e1238000    0     0     0 0000204 [SLPQ - 0xc03e91d8][SLP] nfsiod 0
   36 fffff800af6088e8 df8ec000    0     0     0 0000204 [SLPQ syncer 0xc03d5648][SLP] syncer
   35 fffff800af608be0 df8ee000    0     0     0 0000204 [SLPQ vlruwt 0xfffff800af608be0][SLP] vnlru
   34 fffff800af608ed8 df8f0000    0     0     0 0000204 [SLPQ psleep 0xc03e0b0c][SLP] bufdaemon
    9 fffff800af6091d0 df8f2000    0     0     0 000020c [SLPQ pgzero 0xc03f19d4][SLP] pagezero
    8 fffff800af6094c8 df8f4000    0     0     0 0000204 [SLPQ psleep 0xc03f1a3c][SLP] vmdaemon
    7 fffff800af6097c0 df96e000    0     0     0 0000204 [SLPQ psleep 0xc03f1a24][SLP] pagedaemon
   33 fffff800af609ab8 df970000    0     0     0 0000204 [IWAIT] intr1985: sym3
   32 fffff800af5ea000 e1182000    0     0     0 0000204 [IWAIT] intr1984: sym2
   31 fffff800af5ea2f8 e1184000    0     0     0 0000204 [IWAIT] intr2022: sym1
   30 fffff800af5ea5f0 e1186000    0     0     0 0000204 [IWAIT] intr2016: sym0
   29 fffff800af5ea8e8 e1188000    0     0     0 0000204 [CPU 1] intr2017: hme0
   28 fffff800af5eabe0 e118a000    0     0     0 0000204 [IWAIT] swi0: tty:sab
   27 fffff800af5eaed8 e118c000    0     0     0 0000204 [IWAIT] intr2027: sab0
   26 fffff800af9d22f8 db11c000    0     0     0 0000204 [IWAIT] intr2033: pcib0
   25 fffff800af9d25f0 db11e000    0     0     0 0000204 [IWAIT] intr2021: pcib0
   24 fffff800af9d28e8 db120000    0     0     0 0000204 [IWAIT] intr2032: pcib0
   23 fffff800af9d2be0 db19a000    0     0     0 0000204 [IWAIT] intr2031: pcib0
   22 fffff800af9d2ed8 db19c000    0     0     0 0000204 [IWAIT] intr2030: pcib0
   21 fffff800af9d31d0 db19e000    0     0     0 0000204 [IWAIT] swi6:+
    6 fffff800af9d34c8 db1a0000    0     0     0 0000204 [SLPQ - 0xfffff8008048a880][SLP] thread taskq
   20 fffff800af9d37c0 db1a2000    0     0     0 0000204 [IWAIT] swi6:+
   19 fffff800af9d3ab8 db1a4000    0     0     0 0000204 [IWAIT] swi6: task queue
    5 fffff800af608000 df8e6000    0     0     0 0000204 [SLPQ - 0xfffff8008048ab00][SLP] kqueue taskq
   18 fffff800af6082f8 df8e8000    0     0     0 0000204 [IWAIT] swi3: cambio
   17 fffff800af6085f0 df8ea000    0     0     0 0000204 [IWAIT] swi2: camnet
   16 fffff800af982000 db016000    0     0     0 0000204 [SLPQ - 0xc03c83e8][SLP] yarrow
    4 fffff800af9822f8 db108000    0     0     0 0000204 [SLPQ - 0xc03cc650][SLP] g_down
    3 fffff800af9825f0 db10a000    0     0     0 0000204 [SLPQ - 0xc03cc648][SLP] g_up
    2 fffff800af9828e8 db10c000    0     0     0 0000204 [SLPQ - 0xc03cc638][SLP] g_event
   15 fffff800af982be0 db10e000    0     0     0 0000204 [CPU 0] swi1: net
   14 fffff800af982ed8 db110000    0     0     0 0000204 [IWAIT] swi4: vm
   13 fffff800af9831d0 db112000    0     0     0 000020c [RUNQ] swi5: clock
   12 fffff800af9834c8 db114000    0     0     0 000020c [Can run] idle: cpu0
   11 fffff800af9837c0 db116000    0     0     0 000020c [Can run] idle: cpu1
    1 fffff800af983ab8 db118000    0     0     1 0004200 [SLPQ wait 0xfffff800af983ab8][SLP] init
   10 fffff800af9d2000 db11a000    0     0     0 0000204 [SLPQ ktrace 0xc03d1eb0][SLP] ktrace
    0 c03cc768 c0409a68    0     0     0 0000200 [SLPQ sched 0xc03cc768][SLP] swapper

As a side note, gdb 6.1.1 kernel debugging seems to be hosed but dunno since when :

shando:/usr/src/sys/sparc64/conf# kgdb
GNU gdb 6.1.1 [FreeBSD]
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "sparc64-marcel-freebsd".
(no debugging symbols found).../usr/src/gnu/usr.bin/gdb/libgdb/../../../../contrib/gdb/gdb/regcache.c:1264: internal-error: regcache_raw_supply: Assertion `regnum >= 0 && regnum < regcache->descr->nr_raw_registers' failed.
A problem internal to GDB has been detected,
further debugging may prove unreliable.
Quit this debugging session? (y or n) n

/usr/src/gnu/usr.bin/gdb/libgdb/../../../../contrib/gdb/gdb/regcache.c:1264: internal-error: regcache_raw_supply: Assertion `regnum >= 0 && regnum < regcache->descr->nr_raw_registers' failed.
A problem internal to GDB has been detected,
further debugging may prove unreliable.
Create a core file of GDB? (y or n) n

So I used gdb 5.3 from ports to get the traceback :

(kgdb) bt
#0  doadump () at ../../../kern/kern_shutdown.c:245
#1  0x00000000c0141d50 in boot (howto=260) at ../../../kern/kern_shutdown.c:396
#2  0x00000000c01421f8 in panic (fmt=0xc0359630 "from debugger") at ../../../kern/kern_shutdown.c:552
#3  0x00000000c007a3c8 in db_panic (addr=3222674300, have_addr=0, count=-1, modif=0xdb0706b0 "") at ../../../ddb/db_command.c:435
#4  0x00000000c007a32c in db_command (last_cmdp=0xc03c68e8, cmd_table=0x0, aux_cmd_tablep=0xc0384db8, aux_cmd_tablep_end=0xc0384dd0)
    at ../../../ddb/db_command.c:349
#5  0x00000000c007a450 in db_command_loop () at ../../../ddb/db_command.c:455
#6  0x00000000c007cf70 in db_trap (type=-620296000, code=0) at ../../../ddb/db_main.c:219
#7  0x00000000c0161fa0 in kdb_trap (type=107, code=0, tf=0x1) at ../../../kern/subr_kdb.c:418
#8  0x00000000c02d9d40 in trap (tf=0xdb070b40) at ../../../sparc64/sparc64/trap.c:308
#9  0x00000000c0161b7c in kdb_enter (msg=0x0) at ../../../kern/subr_kdb.c:238
#10 0x00000000c0161b74 in kdb_enter (msg=0xc03698a0 "panic") at ../../../kern/subr_kdb.c:238
#11 0x00000000c0142108 in panic (fmt=0xc0381ab0 "trap: %s") at ../../../kern/kern_shutdown.c:536
#12 0x00000000c02d9ecc in trap (tf=0xdb070f40) at ../../../sparc64/sparc64/trap.c:370
#13 0x00000000c018622c in m_copym (m=0xfffff80084689700, off0=0, len=-620293616, wait=1) at ../../../kern/uipc_mbuf.c:376
#14 0x00000000c02ba5c0 in uma_zalloc_arg (zone=0x0, udata=0x1, flags=1460) at ../../../vm/uma_core.c:1826
#15 0x00000000c01ef74c in tcp_output (tp=0xfffff800a3416590) at ../../../netinet/tcp_output.c:803
#16 0x00000000c01ec684 in tcp_input (m=0xfffff800a493ae00, off0=52) at ../../../netinet/tcp_input.c:1952
#17 0x00000000c01e1a94 in ip_input (m=0xfffff800a493ae00) at ../../../netinet/ip_input.c:739
#18 0x00000000c01cce40 in netisr_processqueue (ni=0xc03e1708) at ../../../net/netisr.c:233
#19 0x00000000c01cd124 in swi_net (dummy=0x0) at ../../../net/netisr.c:338
#20 0x00000000c0128edc in ithread_loop (arg=0xfffff8008042b700) at ../../../kern/kern_intr.c:547
#21 0x00000000c01277dc in fork_exit (callout=0xc0128d00 <ithread_loop>, arg=0xfffff8008042b700, frame=0xdb071880) at ../../../kern/kern_fork.c:811

I kept the crash dump.

Hope this helps.

-- 
Herve Boulouis



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20041119140611.C91236>