Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 28 Oct 2006 19:52:40 +0200
From:      Ulrich Spoerlein <uspoerlein@gmail.com>
To:        stable@freebsd.org
Cc:        kris@freebsd.org
Subject:   Re: RELENG_6: I/O deadlock under load
Message-ID:  <20061028175240.GB1519@roadrunner.q.local>
In-Reply-To: <7ad7ddd90610262337q25afcf0ej7610d0e1b4ff202d@mail.gmail.com>
References:  <7ad7ddd90610262337q25afcf0ej7610d0e1b4ff202d@mail.gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help
Ulrich Spoerlein wrote:
> Our fileserver deadlocked, again. It is running RELENG_6 checked out
> yesterday. I have enabled DDB, WITNESS and INVARIANTS and have it
> hooked up via serial console.

Happend again, now I have DEBUG_LOCKS and DEBUG_VFS_LOCK included. There
are hundreds of cron processes waiting on wmesg 'sysctl' (they seem to
have piled up prior to me entering the debugger).

db> show pcpu
cpuid        = 0
curthread    = 0xc8326780: pid 11 "idle: cpu0"
curpcb       = 0xe6f1fd90
fpcurthread  = none
idlethread   = 0xc8326780: pid 11 "idle: cpu0"
APIC ID      = 0
currentldt   = 0x50
spin locks held:
db> show allpcpu
Current CPU: 0

cpuid        = 0
curthread    = 0xc8326780: pid 11 "idle: cpu0"
curpcb       = 0xe6f1fd90
fpcurthread  = none
idlethread   = 0xc8326780: pid 11 "idle: cpu0"
APIC ID      = 0
currentldt   = 0x50
spin locks held:

cpuid        = 1
curthread    = 0xc8326600: pid 10 "idle: cpu1"
curpcb       = 0xe6f1cd90
fpcurthread  = none
idlethread   = 0xc8326600: pid 10 "idle: cpu1"
APIC ID      = 6
currentldt   = 0x50
spin locks held:

db> show alllocks
Process 60935 (gmirror) thread 0xc88ce780 (100122)
exclusive sx sysctl lock r = 0 (0xc0971dc0) locked @ /usr/src/sys/kern/kern_sysctl.c:1375
Process 50 (g_mirror gm0) thread 0xc86b7600 (100062)
exclusive sx gmirror:lock r = 0 (0xc84b282c) locked @ /usr/src/sys/geom/mirror/g_mirror.c:1809

'gm0' is the mirror where the OS resides on. It is 8GB in size and spans
across da0s1 and da1s1 which are RAID5 volumes attached through two
twa(4) controllers.

db> show lockedvnods
Locked vnodes

0xcb4a4984: tag ufs, type VREG
    usecount 1, writecount 0, refcount 3 mountedhere 0
    flags ()
    v_object 0xcc804e70 ref 0 pages 1
     lock type ufs: SHARED (count 1)#0 0xc0667314 at lockmgr+0x160
#1 0xc0783fea at ffs_lock+0x76
#2 0xc083688f at VOP_LOCK_APV+0x87
#3 0xc06d50b8 at vn_lock+0xac
#4 0xc06d478e at vn_read+0x132
#5 0xc0697a89 at dofileread+0x85
#6 0xc0697922 at kern_readv+0x36
#7 0xc069784d at read+0x45
#8 0xc0824037 at syscall+0x25b
#9 0xc08106af at Xint0x80_syscall+0x1f

        ino 8315, on dev ufs/root

0xc87682b8: tag ufs, type VDIR
    usecount 1, writecount 0, refcount 4 mountedhere 0
    flags ()
    v_object 0xcb4b6630 ref 0 pages 1
     lock type ufs: EXCL (count 1) by thread 0xc850b000 (pid 43987)#0 0xc06676a1 at lockmgr+0x4ed
#1 0xc0783fea at ffs_lock+0x76
#2 0xc083688f at VOP_LOCK_APV+0x87
#3 0xc06d50b8 at vn_lock+0xac
#4 0xc06c8f46 at vget+0xc2
#5 0xc06bd9be at cache_lookup+0x34a
#6 0xc06bdef2 at vfs_cache_lookup+0x92
#7 0xc083494f at VOP_LOOKUP_APV+0x87
#8 0xc06c20a2 at lookup+0x46e
#9 0xc06c19b6 at namei+0x39a
#10 0xc06d3e9f at vn_open_cred+0x5b
#11 0xc06d3e42 at vn_open+0x1e
#12 0xc06cd342 at kern_open+0xb6
#13 0xc06cd256 at open+0x1a
#14 0xc0824037 at syscall+0x25b
#15 0xc08106af at Xint0x80_syscall+0x1f

        ino 94210, on dev ufs/var

0xc87746cc: tag ufs, type VREG
    usecount 1, writecount 1, refcount 3 mountedhere 0
    flags ()
    v_object 0xc876a210 ref 0 pages 3
     lock type ufs: EXCL (count 1) by thread 0xc86b7000 (pid 14753)#0 0xc06676a1 at lockmgr+0x4ed
#1 0xc0783fea at ffs_lock+0x76
#2 0xc083688f at VOP_LOCK_APV+0x87
#3 0xc06d50b8 at vn_lock+0xac
#4 0xc06d4a54 at vn_write+0x138
#5 0xc0697d5f at dofilewrite+0x77
#6 0xc0697c03 at kern_writev+0x3b
#7 0xc0697bac at writev+0x30
#8 0xc0824037 at syscall+0x25b
#9 0xc08106af at Xint0x80_syscall+0x1f

        ino 94280, on dev ufs/var

0xca357414: tag ufs, type VDIR
    usecount 1, writecount 0, refcount 2 mountedhere 0
    flags ()
     lock type ufs: EXCL (count 1) by thread 0xc8cdf480 (pid 20101)#0 0xc06676a1 at lockmgr+0x4ed
#1 0xc0783fea at ffs_lock+0x76
#2 0xc083688f at VOP_LOCK_APV+0x87
#3 0xc06d50b8 at vn_lock+0xac
#4 0xc06c8f46 at vget+0xc2
#5 0xc06bd9be at cache_lookup+0x34a
#6 0xc06bdef2 at vfs_cache_lookup+0x92
#7 0xc083494f at VOP_LOOKUP_APV+0x87
#8 0xc06c20a2 at lookup+0x46e
#9 0xc06c19b6 at namei+0x39a
#10 0xc06cf3f1 at kern_stat+0x35
#11 0xc06cf39f at stat+0x1b
#12 0xc0824037 at syscall+0x25b
#13 0xc08106af at Xint0x80_syscall+0x1f

        ino 94211, on dev ufs/var

0xc875c15c: tag syncer, type VNON
    usecount 1, writecount 0, refcount 2 mountedhere 0
    flags ()
     lock type syncer: EXCL (count 1) by thread 0xc84ce480 (pid 46)#0 0xc06676a1 at lockmgr+0x4ed
#1 0xc06c00e1 at vop_stdlock+0x21
#2 0xc083688f at VOP_LOCK_APV+0x87
#3 0xc06d50b8 at vn_lock+0xac
#4 0xc06c8703 at sync_vnode+0xe3
#5 0xc06c89a1 at sched_sync+0x1ed
#6 0xc065e864 at fork_exit+0xa0
#7 0xc08106bc at fork_trampoline+0x8


0xc8771d98: tag ufs, type VREG
    usecount 3, writecount 0, refcount 4 mountedhere 0
    flags (VV_TEXT)
    v_object 0xc88ddbdc ref 1 pages 7
     lock type ufs: EXCL (count 1) by thread 0xc84ce480 (pid 46)#0 0xc06676a1 at lockmgr+0x4ed
#1 0xc0783fea at ffs_lock+0x76
#2 0xc083688f at VOP_LOCK_APV+0x87
#3 0xc06d50b8 at vn_lock+0xac
#4 0xc06c8f46 at vget+0xc2
#5 0xc0782ab5 at ffs_sync+0x1c1
#6 0xc06caaa0 at sync_fsync+0x164
#7 0xc0835c1f at VOP_FSYNC_APV+0x9b
#8 0xc06c8726 at sync_vnode+0x106
#9 0xc06c89a1 at sched_sync+0x1ed
#10 0xc065e864 at fork_exit+0xa0
#11 0xc08106bc at fork_trampoline+0x8

        ino 259327, on dev ufs/usr

0xc8770828: tag ufs, type VDIR
    usecount 1, writecount 0, refcount 2 mountedhere 0
    flags ()
    v_object 0xc8b42a50 ref 0 pages 0
     lock type ufs: EXCL (count 1) by thread 0xc8cc4d80 (pid 48623)#0 0xc06676a1 at lockmgr+0x4ed
#1 0xc0783fea at ffs_lock+0x76
#2 0xc083688f at VOP_LOCK_APV+0x87
#3 0xc06d50b8 at vn_lock+0xac
#4 0xc06c8f46 at vget+0xc2
#5 0xc06bd9be at cache_lookup+0x34a
#6 0xc06bdef2 at vfs_cache_lookup+0x92
#7 0xc083494f at VOP_LOOKUP_APV+0x87
#8 0xc06c20a2 at lookup+0x46e
#9 0xc06c19b6 at namei+0x39a
#10 0xc06cf3f1 at kern_stat+0x35
#11 0xc06cf39f at stat+0x1b
#12 0xc0824037 at syscall+0x25b
#13 0xc08106af at Xint0x80_syscall+0x1f

        ino 353305, on dev ufs/usr

0xc8b81c3c: tag ufs, type VREG
    usecount 5, writecount 0, refcount 7 mountedhere 0
    flags (VV_TEXT)
    v_object 0xc8b588c4 ref 3 pages 207
     lock type ufs: SHARED (count 1) with 1 pending#0 0xc0667314 at lockmgr+0x160
#1 0xc0783fea at ffs_lock+0x76
#2 0xc083688f at VOP_LOCK_APV+0x87
#3 0xc06d50b8 at vn_lock+0xac
#4 0xc06c8f46 at vget+0xc2
#5 0xc07aa3e7 at vnode_pager_lock+0x127
#6 0xc079828f at vm_fault+0x227
#7 0xc08239c6 at trap_pfault+0xce
#8 0xc082359b at trap+0x1eb
#9 0xc081065a at calltrap+0x5

        ino 353904, on dev ufs/usr

0xc8c5c828: tag ufs, type VREG
    usecount 1, writecount 1, refcount 2997 mountedhere 0
    flags ()
    v_object 0xc8ba1e70 ref 0 pages 446652
     lock type ufs: EXCL (count 1) by thread 0xc850b600 (pid 16977)#0 0xc06676a1 at lockmgr+0x4ed
#1 0xc0783fea at ffs_lock+0x76
#2 0xc083688f at VOP_LOCK_APV+0x87
#3 0xc06d50b8 at vn_lock+0xac
#4 0xc06d4a54 at vn_write+0x138
#5 0xc0697d5f at dofilewrite+0x77
#6 0xc0697c03 at kern_writev+0x3b
#7 0xc0697b29 at write+0x45
#8 0xc0824037 at syscall+0x25b
#9 0xc08106af at Xint0x80_syscall+0x1f

        ino 6, on dev mirror/share

0xcc95f2b8: tag ufs, type VREG
    usecount 1, writecount 1, refcount 1851 mountedhere 0
    flags ()
    v_object 0xcc804528 ref 0 pages 331134
     lock type ufs: EXCL (count 1) by thread 0xc86ea480 (pid 50442)#0 0xc06676a1 at lockmgr+0x4ed
#1 0xc0783fea at ffs_lock+0x76
#2 0xc083688f at VOP_LOCK_APV+0x87
#3 0xc06d50b8 at vn_lock+0xac
#4 0xc06d4a54 at vn_write+0x138
#5 0xc0697d5f at dofilewrite+0x77
#6 0xc0697c03 at kern_writev+0x3b
#7 0xc0697b29 at write+0x45
#8 0xc0824037 at syscall+0x25b
#9 0xc08106af at Xint0x80_syscall+0x1f

        ino 5, on dev da0s2e

0xcab1ac3c: tag ufs, type VREG
    usecount 1, writecount 0, refcount 1139 mountedhere 0
    flags ()
     lock type ufs: EXCL (count 1) by thread 0xc8b9dc00 (pid 34426)#0 0xc06676a1 at lockmgr+0x4ed
#1 0xc06c0ffe at vfs_hash_insert+0x26
#2 0xc0782e9f at ffs_vget+0x15b
#3 0xc07688ac at ffs_valloc+0x100
#4 0xc078f77c at ufs_makeinode+0x68
#5 0xc078c6f1 at ufs_create+0x25
#6 0xc0834b37 at VOP_CREATE_APV+0x9b
#7 0xc0770f7a at ffs_snapshot+0x322
#8 0xc0780cb0 at ffs_mount+0x8d8
#9 0xc06c42ab at vfs_domount+0x687
#10 0xc06c3a7f at vfs_donmount+0x2ef
#11 0xc06c621d at kernel_mount+0x6d
#12 0xc0780e85 at ffs_cmount+0x5d
#13 0xc06c3c12 at mount+0x156
#14 0xc0824037 at syscall+0x25b
#15 0xc08106af at Xint0x80_syscall+0x1f

        ino 6, on dev da0s2e

db> ps
  pid  ppid  pgrp   uid   state   wmesg     wchan    cmd
40384 40425 40425     0  S       sysctl l 0xc0971df0 cron
18974 40425 40425     0  S       sysctl l 0xc0971df0 cron
29806 40425 40425     0  S       sysctl l 0xc0971df0 cron
38225 40425 40425     0  S       sysctl l 0xc0971df0 cron
83456 40425 40425     0  S       sysctl l 0xc0971df0 cron
55635 40425 40425     0  S       sysctl l 0xc0971df0 cron
56207 40425 40425     0  S       sysctl l 0xc0971df0 cron
30952 40425 40425     0  S       sysctl l 0xc0971df0 cron
  485 40425 40425     0  S       sysctl l 0xc0971df0 cron
... hundreds more
32057 40425 40425     0  S       sysctl l 0xc0971df0 cron
79683 23876 23876     0  SL      vmpfw    0xc42ed220 sshd
97911 23876 23876     0  SL      vmpfw    0xc42ed220 sshd
67504 40425 40425     0  S       sysctl l 0xc0971df0 cron
... even more crons
55722 40425 40425     0  S       sysctl l 0xc0971df0 cron
 5605 23876 23876     0  SL      vmpfw    0xc42ed220 sshd
13648 23876 23876     0  SL      swread   0xc42ed220 sshd
38374 40425 40425     0  S       sysctl l 0xc0971df0 cron
... yet some hundred more
33848 40425 40425     0  S       sysctl l 0xc0971df0 cron
60935 71690  3566     0  S       g_waitfo 0xc9f76880 gmirror
71690 37484  3566     0  S       piperd   0xc95bc000 perl5.8.8
37484  3566  3566     0  S       piperd   0xc8709cc0 perl5.8.8
 3566 46050  3566     0  Ss      wait     0xc8e47430 sh
46050 40425 40425     0  S       piperd   0xc880b660 cron
48623 43770 26178     0  S       ufs      0xc8b81c94 sh
43770 48166 26178     0  S       piperd   0xc880b990 sh
20101 79935 79935     2  S       biord    0xdc566b1c sh
 8301 75724 75724     0  S       biord    0xdc5736b0 newsyslog
48166 26178 26178     0  S       piperd   0xc880bcc0 perl5.8.8
75724 37661 75724     0  Ss      wait     0xc8cc2c90 sh
79935 12147 79935     2  Ss      wait     0xc8b98218 sh
26178 41679 26178     0  Ss      wait     0xc86e8648 sh
12147 40425 40425     0  S       piperd   0xc8ef6b28 cron
37661 40425 40425     0  S       piperd   0xc8f0bcc0 cron
41679 40425 40425     0  S       piperd   0xc8f0b330 cron
59177 10624 40486     0  S       piperd   0xc951b000 awk
 4943 10624 40486     0  SL      vnread   0xdc390830 tw_cli
10624 51888 40486     0  S       wait     0xc8cd8000 sh
51888 96902 40486     0  S       piperd   0xc87ab4c8 sh
96902 40486 40486     0  S       piperd   0xc8c68198 perl5.8.8
40486 14946 40486     0  Ss      wait     0xc8cd7860 sh
14946 40425 40425     0  S       piperd   0xc95acb28 cron
34426 59514 56159     0  S+      biord    0xdc4049a4 mksnap_ffs
59514 13116 56159     0  S+      wait     0xc8e46648 sh
13116  7858 56159     0  S+      wait     0xc8e46430 dump
50442  9618  9618     0  S+      wdrain   0xc09bf8e4 dd
16977 52273 52273     0  S+      wdrain   0xc09bf8e4 dd
 7858 56159 56159     0  S+      wait     0xc850a430 sh
51796 61348 51796     0  Ss+     ttyin    0xc8481410 tcsh
 9618 24699  9618     0  S+      wait     0xc88cc430 sh
24699 61348 24699     0  SWs+    pause    0xc8b9867c tcsh
52273 78081 52273     0  S+      wait     0xc8b99860 sh
78081 61348 78081     0  SWs+    pause    0xc8b98894 tcsh
36839 58682 36839     0  S+      sysctl l 0xc0971df0 iostat
56159 21882 56159     0  S+      wait     0xc8b99218 sh
58682 61348 58682     0  SWs+    pause    0xc8a1967c tcsh
21882 61348 21882     0  SWs+    pause    0xc88cc67c tcsh
61348 90803 61348     0  Ss      select   0xc09bf344 screen
90803 63507 90803     0  S+      pause    0xc88cf894 screen
63507 73206 63507     0  SWs+    pause    0xc86e8cc4 tcsh
11117 73206 11117     0  Ss+     ttyin    0xc8528410 tcsh
73206 23876 73206     0  Ss      sysctl l 0xc0971df0 sshd
47418     1 47418     0  SWs+    ttyin    0xc8513410 getty
37674     1 37674     0  SWs+    ttyin    0xc8525010 getty
81454     1 81454     0  SWs+    ttyin    0xc8525410 getty
85718     1 85718     0  SWs+    ttyin    0xc8518810 getty
63281     1 63281     0  SWs+    ttyin    0xc851b410 getty
15399     1 15399     0  SWs+    ttyin    0xc8513810 getty
36534     1 36534     0  SWs+    ttyin    0xc851a010 getty
64717     1 64717     0  SWs+    ttyin    0xc851a810 getty
32019     1 32019     0  SWs+    ttyin    0xc8519010 getty
21692     1 21692     0  Ss      sysctl l 0xc0971df0 bsnmpd
37851     1 37851     0  SWs     select   0xc09bf344 inetd
34757 54856 54856     0  S       sbwait   0xc89c7370 heartbeat
58032 54856 54856     0  S       select   0xc09bf344 heartbeat
16232 54856 54856     0  S       sbwait   0xc877c638 heartbeat
54856     1 54856     0  Ss      select   0xc09bf344 heartbeat
40425     1 40425     0  Ss      nanslp   0xc0971fec cron
 8391     1  8391    25  Ss      sysctl l 0xc0971df0 sendmail
10886     1 10886     0  Ss      sysctl l 0xc0971df0 sendmail
23876     1 23876     0  Ss      select   0xc09bf344 sshd
43987     1 43987     0  Ss      biord    0xdc3e6200 ntpd
71800     1 30662     0  S       nanslp   0xc0971fec smartd
 1186 34002 34002     0  SW      nfslockd 0xc09c30e8 rpc.lockd
34002     1 34002     0  Ss      select   0xc09bf344 rpc.lockd
96123     1 96123     0  Ss      select   0xc09bf344 rpc.statd
81769     1 81769     0  Ss      select   0xc09bf344 amd
77412     1 77412     0  Ss      select   0xc09bf344 rpcbind
21109     1 21109   115  Ss      sysctl l 0xc0971df0 symon
14753     1 14753     0  Ss      biord    0xdc3c8a24 syslogd
46810     1 46810     0  SWs     select   0xc09bf344 devd
   51     0     0     0  SL      m:w1     0xc8676e00 [g_mirror share]
   50     0     0     0  SL      gwrite   0xc87f5dec [g_mirror gm0]
   49     0     0     0  SL      -        0xe8ca1d04 [schedcpu]
   48     0     0     0  SL      sdflush  0xc09c8514 [softdepflush]
   47     0     0     0  SL      vlruwt   0xc84cdc90 [vnlru]
   46     0     0     0  SL      biord    0xdc4f2030 [syncer]
   45     0     0     0  SL      wdrain   0xc09bf8e4 [bufdaemon]
   44     0     0     0  SL      pgzero   0xc09c9484 [pagezero]
   43     0     0     0  SL      psleep   0xc09c8fd4 [vmdaemon]
   42     0     0     0  SL      psleep   0xc09c8f90 [pagedaemon]
   41     0     0     0  WL                          [swi0: sio]
   40     0     0     0  WL                          [irq1: atkbd0]
   39     0     0     0  WL                          [irq17: ichsmb0]
   38     0     0     0  WL                          [irq15: ata1]
   37     0     0     0  WL                          [irq14: ata0]
   36     0     0     0  SL      usbevt   0xc8411a10 [usb4]
   35     0     0     0  WL                          [irq23: ehci0]
   34     0     0     0  SL      usbevt   0xc84e0210 [usb3]
   33     0     0     0  SL      usbevt   0xc84d1210 [usb2]
   32     0     0     0  WL                          [irq18: uhci2]
   31     0     0     0  SL      usbevt   0xc84d6210 [usb1]
   30     0     0     0  WL                          [irq19: uhci1]
   29     0     0     0  SL      usbtsk   0xc096ec04 [usbtask]
   28     0     0     0  SL      usbevt   0xc8485210 [usb0]
   27     0     0     0  WL                          [irq16: uhci0 uhci3]
   26     0     0     0  SL      -        0xc8449b00 [em1 taskq]
   25     0     0     0  SL      -        0xc847f380 [em0 taskq]
   24     0     0     0  SL      idle     0xc844a000 [aic_recovery1]
   23     0     0     0  WL                          [irq25: ahd1]
   22     0     0     0  SL      idle     0xc843d000 [aic_recovery0]
   21     0     0     0  WL                          [irq24: ahd0]
   20     0     0     0  WL                          [irq9: acpi0]
    9     0     0     0  SL      -        0xc8406980 [kqueue taskq]
    8     0     0     0  SL      -        0xc8406a80 [acpi_task_2]
    7     0     0     0  SL      -        0xc8406a80 [acpi_task_1]
    6     0     0     0  SL      -        0xc8406a80 [acpi_task_0]
   19     0     0     0  WL                          [swi5: +]
    5     0     0     0  SL      -        0xc8406c00 [thread taskq]
   18     0     0     0  WL                          [swi6: Giant taskq]
   17     0     0     0  WL                          [swi6: task queue]
   16     0     0     0  WL                          [swi2: cambio]
   15     0     0     0  SL      -        0xc096c920 [yarrow]
    4     0     0     0  SL      -        0xc096f488 [g_down]
    3     0     0     0  SL      -        0xc096f484 [g_up]
    2     0     0     0  SL      gmirror: 0xc84b285c [g_event]
   14     0     0     0  WL                          [swi3: vm]
   13     0     0     0  WL                          [swi4: clock sio]
   12     0     0     0  WL                          [swi1: net]
   11     0     0     0  RL      CPU 0               [idle: cpu0]
   10     0     0     0  RL      CPU 1               [idle: cpu1]
    1     0     1     0  SLs     wait     0xc832a000 [init]
    0     0     0     0  WLs                         [swapper]
78481 71690  3566     0  Z                           hostname
39905 37484  3566     0  Z                           df
75688 48166 26178     0  Z                           df
99435 96902 40486     0  Z                           df

db> trace 51
Tracing pid 51 tid 100038 td 0xc84cea80
sched_switch(c84cea80,0,1) at sched_switch+0x177
mi_switch(1,0) at mi_switch+0x270
sleepq_switch(c8676e00,c09716c0,0,c0895bd7,211,...) at sleepq_switch+0xc1
sleepq_wait(c8676e00,0,c088c7ec,6f8,c0970e40,...) at sleepq_wait+0x46
msleep(c8676e00,c8676e80,24c,c088ce4d,0) at msleep+0x279
g_mirror_worker(c8676e00,e8ca4d38,c8676e00,c0639754,0,...) at g_mirror_worker+0x4d8
fork_exit(c0639754,c8676e00,e8ca4d38) at fork_exit+0xa0
fork_trampoline() at fork_trampoline+0x8
--- trap 0x1, eip = 0, esp = 0xe8ca4d6c, ebp = 0 ---
db> trace 50
Tracing pid 50 tid 100062 td 0xc86b7600
sched_switch(c86b7600,0,1) at sched_switch+0x177
mi_switch(1,0) at mi_switch+0x270
sleepq_switch(c87f5dec,c09716c0,0,c0895bd7,236,...) at sleepq_switch+0xc1
sleepq_timedwait(c87f5dec) at sleepq_timedwait+0x4a
msleep(c87f5dec,c09bf980,4c,c088b6e1,64) at msleep+0x255
biowait(c87f5dec,c088b6e1,c87f5dec,c86a6100,eaf13c88,...) at biowait+0x52
g_write_data(c86a6100,ffd5a800,1,c956a400,200,...) at g_write_data+0x8d
g_mirror_write_metadata(c86a1900,eaf13c08,c84b2800,c86a1900,eaf13c08,c84b282c,1,c088c7ec,2de) at g_mirror_write_metadata+0x3eb
g_mirror_update_metadata(c86a1900) at g_mirror_update_metadata+0x55
g_mirror_unidle(c84b2800) at g_mirror_unidle+0xd5
g_mirror_register_request(c8838210) at g_mirror_register_request+0xd7
g_mirror_worker(c84b2800,eaf13d38,c84b2800,c0639754,0,...) at g_mirror_worker+0x62e
fork_exit(c0639754,c84b2800,eaf13d38) at fork_exit+0xa0
fork_trampoline() at fork_trampoline+0x8
--- trap 0x1, eip = 0, esp = 0xeaf13d6c, ebp = 0 ---
db> trace 2
Tracing pid 2 tid 100001 td 0xc8326d80
sched_switch(c8326d80,0,1) at sched_switch+0x177
mi_switch(1,0) at mi_switch+0x270
sleepq_switch(c84b285c,c09716c0,0,c0895bd7,211,...) at sleepq_switch+0xc1
sleepq_wait(c84b285c,c08933e3,9f,c84b282c,c84b2800,...) at sleepq_wait+0x46
cv_wait(c84b285c,c0970588,c86a1a00,c84b2800,0,...) at cv_wait+0x17b
_sx_xlock(c84b282c,c088c7ec,c2e,c86a1a00,c86a1a00,...) at _sx_xlock+0x68
g_mirror_dumpconf(cb8fb020,c085ac70,c86a1a00,0,0,...) at g_mirror_dumpconf+0x376
g_conf_geom(cb8fb020,c86a1a00,0,0) at g_conf_geom+0x7c
g_conf_class(cb8fb020,c0902280,0,0,0) at g_conf_class+0x49
g_conf_specific(cb8fb020,0,0,0,0,...) at g_conf_specific+0x58
g_confxml(cb8fb020,0,66666667,e6f2bcf4,c062fc29,...) at g_confxml+0x3d
one_event(e6f2bd0c,c0631411,258,190,c06313a4,...) at one_event+0x172
g_run_events(258,190,c06313a4,c8325218,e6f2bd24,...) at g_run_events+0x9
g_event_procbody(0,e6f2bd38,0,c06313a4,0,...) at g_event_procbody+0x6d
fork_exit(c06313a4,0,e6f2bd38) at fork_exit+0xa0
fork_trampoline() at fork_trampoline+0x8
--- trap 0x1, eip = 0, esp = 0xe6f2bd6c, ebp = 0 ---
db> trace 60935
Tracing pid 60935 tid 100122 td 0xc88ce780
sched_switch(c88ce780,0,1) at sched_switch+0x177
mi_switch(1,0) at mi_switch+0x270
sleepq_switch(c9f76880,c09716c0,0,c0895bd7,236,...) at sleepq_switch+0xc1
sleepq_timedwait(c9f76880) at sleepq_timedwait+0x4a
msleep(c9f76880,0,4c,c088b366,3e8) at msleep+0x255
g_waitfor_event(c062f738,cb8fb020,2,0,0,0,0,1) at g_waitfor_event+0x73
sysctl_kern_geom_confxml(c0901b60,0,0,eaf9bbf4,c0901b60,...) at sysctl_kern_geom_confxml+0x26
sysctl_root(0,eaf9bc74,3,eaf9bbf4) at sysctl_root+0x11b
userland_sysctl(c88ce780,eaf9bc74,3,8050000,bfbfb4d8,0,0,0,eaf9bc70,0,c0971700,0,c0893789,521) at userland_sysctl+0xf4
__sysctl(c88ce780,eaf9bd04) at __sysctl+0x77
syscall(3b,3b,3b,3,bfbfb4d8,...) at syscall+0x25b
Xint0x80_syscall() at Xint0x80_syscall+0x1f
--- syscall (202, FreeBSD ELF32, __sysctl), eip = 0x2815509f, esp = 0xbfbfb3bc, ebp = 0xbfbfb3f8 ---


Sadly, there is no kernel thread for the two twa controllers, why?

Should I try with option PREEMTION to see if the deadlock vanishes? And
is there some way to get at the time of the deadlock (by looking at the
oldest cron job, this should be a good estimate).

The gmirror<->sysctl locks seems suspicious to me. Shall I reboot and
reproduce the deadlock to see if it occurs at the same spot?

Any help would be greatly appreciated!

Ulrich Spörlein
-- 
A: Yes.
>Q: Are you sure?
> >A: Because it reverses the logical flow of conversation.
> >>Q: Why is top posting frowned upon?



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20061028175240.GB1519>