Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 30 Apr 2009 10:36:34 +0400
From:      pluknet <pluknet@gmail.com>
To:        freebsd-stable <freebsd-stable@freebsd.org>
Subject:   lock up in 6.2 (procs massively stuck in Giant)
Message-ID:  <a31046fc0904292336w17aca317hefd32dad5bc28007@mail.gmail.com>

next in thread | raw e-mail | index | archive | help
Hi folks.

Today I got a new locking issue.
This is the first time I got it, and it's merely reproduced.

The box has lost both remote connection and local access.
No SIGINFO output on the local console even.
Jumping in ddb> shows the next:

1) first, this is a 8-way web server. No processes on runqueue except one httpd
(i.e. ps shows R in its state):

db> bt 9114
Tracing pid 9114 tid 100854 td 0xcabfe190
sched_switch(3401572752,3354648080,6) at sched_switch+323
mi_switch(6,3354648080,3354648424,3231755728,4013194124,...) at mi_switch+442
maybe_preempt(3354648080) at maybe_preempt+196
sched_add(3354648080,4,3401589408,3401589408,3401573096,...) at sched_add+600
setrunqueue(3231755728,0) at setrunqueue+99
kseq_load_add(3231755728,3401572752,3404215344,4013194200,2097222,...) at kseq_l
oad_add+95
sched_clock(3227874104,3231974076,0,3231974076,0,...) at sched_clock+341
_end(0,0,23872,22116064,2058572708,...) at 4013194220

2) although a simple bt shows the following:

db> bt
Tracing pid 16 tid 100000 td 0xc7cfe000
kdb_enter(c094016e) at kdb_enter+0x2b
siointr1(c7f93000) at siointr1+0xce
siointr(c7f93000) at siointr+0x5e
intr_execute_handlers(c7cf24c8,e687ac94,4,e687acd8,c0899013,...) at intr_execute
_handlers+0xe1
lapic_handle_intr(37) at lapic_handle_intr+0x2e
Xapic_isr1() at Xapic_isr1+0x33
--- interrupt, eip = 0xc0b94165, esp = 0xe687acd8, ebp = 0xe687acd8 ---
acpi_cpu_c1(ea4621f1,22d3014d,c7cfe000,c7cfe000,2,...) at acpi_cpu_c1+0x5
acpi_cpu_idle(e687ad10,c066c3f1,c7cfc000,c066c35c,e687ad24,...) at acpi_cpu_idle
+0x152
cpu_idle(c7cfc000,c066c35c,e687ad24,c066c0dd,0,...) at cpu_idle+0x28
idle_proc(0,e687ad38) at idle_proc+0x95
fork_exit(c066c35c,0,e687ad38) at fork_exit+0x71
fork_trampoline() at fork_trampoline+0x8
--- trap 0x1, eip = 0, esp = 0xe687ad6c, ebp = 0 ---


3) most httpd, sshd, and possibly some other processes stuck in Giant:

db> ps
  pid  ppid  pgrp   uid   state   wmesg     wchan    cmd
11568 98516 98436     0  L      *Giant    0xc8bf3880 exim-4.66-0
11456  7001  7001 14544  LL     *Giant    0xc8bf3880 httpd
11400 46597 46597 36386  L      *Giant    0xc8bf3880 httpd
11302  6998  6998 27116  L      *Giant    0xc8bf3880 httpd
11251  6711  6711  7638  L      *Giant    0xc8bf3880 httpd
11197  6711  6711  7638  L      *Giant    0xc8bf3880 httpd
11196  6711  6711  7638  L      *Giant    0xc8bf3880 httpd
11146  6711  6711  7638  L      *Giant    0xc8bf3880 httpd
11138 90916 90916 37117  L      *Giant    0xc8bf3880 httpd
11029  6711  6711  7638  L      *Giant    0xc8bf3880 httpd
11026  6711  6711  7638  L      *Giant    0xc8bf3880 httpd
10981  6711  6711  7638  L      *Giant    0xc8bf3880 httpd
10930  7969  7969 33194  S       accept   0xcd056302 httpd
10832  6711  6711  7638  L      *Giant    0xc8bf3880 httpd
10783  7001  7001 14544  L      *Giant    0xc8bf3880 httpd
10782  6998  6998 27116  S       lockf    0xd1e392c0 httpd
10688  6711  6711  7638  L      *Giant    0xc8bf3880 httpd
10681  6711  6711  7638  L      *Giant    0xc8bf3880 httpd
10675  7001  7001 14544  S       sbwait   0xd1312a64 httpd
10576 58722 58722  4868  L      *Giant    0xc8bf3880 httpd
10541 59002 59002 19140  L      *Giant    0xc8bf3880 httpd
10540 58722 58722  4868  L      *Giant    0xc8bf3880 httpd
10500  7001  7001 14544  S       sbwait   0xce1aae90 httpd
10403  8143  8143 38211  L      *Giant    0xc8bf3880 httpd
10357  7001  7001 14544  L      *Giant    0xc8bf3880 httpd
10050  7001  7001 14544  L      *Giant    0xc8bf3880 httpd
 9704 90238 90238 22123  S       select   0xc0a12944 httpd
 9515 86902 86902     0  L      *Giant    0xc8bf3880 proftpd
 9491 41120 41120 37989  L      *Giant    0xc8bf3880 httpd
 9399 41120 41120 37989  L      *Giant    0xc8bf3880 httpd
 9186  7062  7062 20966  S       accept   0xcb9b5892 httpd
 9185  7062  7062 20966  L      *Giant    0xc8bf3880 httpd
 9148  8550  8550 13034  L      *Giant    0xc8bf3880 httpd
 9147  8261  8261 26789  L      *Giant    0xc8bf3880 httpd
 9146  8261  8261 26789  L      *Giant    0xc8bf3880 httpd
 9114  8550  8550 13034  R       CPU 5               httpd
 9111  6842  6842 36260  L      *Giant    0xc8bf3880 httpd
 9081  6842  6842 36260  S       lockf    0xc8534780 httpd
 9018  8042  8042 36016  S       select   0xc0a12944 httpd
 8988  3412  3412  2583  L      *Giant    0xc8bf3880 httpd
 8915  7583  7583 36227  L      *Giant    0xc8bf3880 httpd
 8342 41404 41404 37685  L      *Giant    0xc8bf3880 httpd
 8031  6711  6711  7638  S       sbwait   0xd141454c httpd
 7862  8505  8505 36816  L      *Giant    0xc8bf3880 httpd
 7634  7344  7344  2488  L      *Giant    0xc8bf3880 httpd
 7556  7730  7730 21562  S       lockf    0xc836ca00 httpd
 7418  8543  8543 13369  S       accept   0xcd28303a httpd
 7257  8558  8558 36731  L      *Giant    0xc8bf3880 httpd
 7144  8058  8058 36611  L      *Giant    0xc8bf3880 httpd
 7089  7730  7730 21562  L      *Giant    0xc8bf3880 httpd
 6143  7684  7684 36144  S       accept   0xcc05c03a httpd
 5548  7349  7349 36645  L      *Giant    0xc8bf3880 httpd
 5383  7001  7001 14544  S       sbwait   0xd0a20900 httpd
 4851  6753  6753  1742  L      *Giant    0xc8bf3880 httpd
 4793  6753  6753  1742  L      *Giant    0xc8bf3880 httpd
 4571  7304  7304 28968  L      *Giant    0xc8bf3880 httpd
 4347  3412  3412  2583  L      *Giant    0xc8bf3880 httpd
 4186  8404  8404 36328  S       accept   0xcccaa466 httpd
 3736  7001  7001 14544  S       sbwait   0xd123c20c httpd
 3634 86902 86902     0  L      *Giant    0xc8bf3880 proftpd
 1999 11633 11633 13556  L      *Giant    0xc8bf3880 httpd
  879 90916 90916 37117  L      *Giant    0xc8bf3880 httpd
  713  6845  6845 36251  L      *Giant    0xc8bf3880 httpd
  322  6307  6307  4538  S       accept   0xc86b95ca httpd
98806  8404  8404 36328  S       accept   0xcccaa466 httpd
98516 98451 98436     0  S       wait     0xcc0d8430 exim-4.66-0
98502  7603  7603 36802  L      *Giant    0xc8bf3880 httpd
98485  7603  7603 36802  L      *Giant    0xc8bf3880 httpd
98471  7896  7896 36493  L      *Giant    0xc8bf3880 httpd
98451 98447 98436     0  S       wait     0xd0e12860 sh
98447 98436 98436     0  S       wait     0xcaf3cc90 lockf
98436 98423 98436     0  Ss      wait     0xc858a000 sh
98423  6726  6726     0  S       piperd   0xc9c0eb28 cron
98399  7896  7896 36493  L      *Giant    0xc8bf3880 httpd
98342 62229 62229 36247  L      *Giant    0xc8bf3880 httpd
98340 62229 62229 36247  S       accept   0xc9e839f6 httpd
98334  7304  7304 28968  S       lockf    0xc8200c40 httpd
98297  7061  7061 36233  S       accept   0xcbf7dcbe httpd
98289  7543  7543 36353  L      *Giant    0xc8bf3880 httpd
98277  7031  7031 36202  L      *Giant    0xc8bf3880 httpd
98276  7031  7031 36202  L      *Giant    0xc8bf3880 httpd
98248 97266 98248 36737  S+      select   0xc0a12944 mc
98223  7061  7061 36233  S       accept   0xcbf7dcbe httpd
98219  7977  7977 36183  L      *Giant    0xc8bf3880 httpd
98153  8045  8045  2394  S       accept   0xccf4f03a httpd
98040  8251  8251 36181  L      *Giant    0xc8bf3880 httpd
98012  8251  8251 36181  L      *Giant    0xc8bf3880 httpd
97983  8372  8372 36505  S       accept   0xcd3b8cbe httpd
97981  7618  7618 36616  S       accept   0xcc56f892 httpd
97958 15380 15380 36755  L      *Giant    0xc8bf3880 httpd
97956 15380 15380 36755  S       accept   0xd093c466 httpd
97921  7660  7660 36763  S       accept   0xcc3319f6 httpd
97898  7584  7584  7134  S       accept   0xcc05c5ca httpd
97886  7677  7677 36136  L      *Giant    0xc8bf3880 httpd
97877  7677  7677 36136  S       accept   0xcbc75e22 httpd
97823  7001  7001 14544  S       sbwait   0xd177bf08 httpd
97807  8344  8344 36737  L      *Giant    0xc8bf3880 httpd
97727  7543  7543 36353  L      *Giant    0xc8bf3880 httpd
97699  7651  7651 36240  L      *Giant    0xc8bf3880 httpd
97692  8484  8484 36632  S       accept   0xccca99f6 httpd
97682 80307 80307 36453  LL     *Giant    0xc8bf3880 httpd
97668  7823  7823 36122  S       accept   0xcca7f19e httpd
97647  7488  7488 36130  L      *Giant    0xc8bf3880 httpd
97521  7488  7488 36130  L      *Giant    0xc8bf3880 httpd
97492  8674  8674 27415  L      *Giant    0xc8bf3880 httpd
97489  6621  6621 36062  S       accept   0xcb1b59f6 httpd
97404  7524  7524 25829  S       accept   0xccf8072e httpd
97403  6842  6842 36260  S       sbwait   0xd1140c40 httpd
97396  8674  8674 27415  L      *Giant    0xc8bf3880 httpd
97349 80307 80307 36453  L      *Giant    0xc8bf3880 httpd
97342  7706  7706 14417  S       accept   0xccfdeb5a httpd
97324  7977  7977 36183  L      *Giant    0xc8bf3880 httpd
97319  8667  8667 36523  S       accept   0xcd6a672e httpd
97311  7120  7120 36257  L      *Giant    0xc8bf3880 httpd
97305  7120  7120 36257  L      *Giant    0xc8bf3880 httpd
97293  8595  8595 36266  S       accept   0xccf4d9f6 httpd
97266 97265 97266 36737  Ss+     wait     0xc8e3b648 bash
97265 97237 97237 36737  L      *Giant    0xc8bf3880 sshd
97237  1107 97237     0  Ss      sbwait   0xcd6a520c sshd


4) tracing random processes waiting in Giant:

db> bt 11568
Tracing pid 11568 tid 102623 td 0xce272000
sched_switch(3458670592,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3458670592,0,0,0) at _mtx_lock_sleep+252
msleep(3688690704,3231788928,76,3230830241,0) at msleep+716
bwait(3688690704,76,3230830241) at bwait+95
bufwait(3688690704,1,0,0,4053851052,...) at bufwait+26
breadn(3487414672,0,0,2048,0,...) at breadn+486
bread(3487414672,0,0,2048,0,...) at bread+32
ffs_read(4053851124) at ffs_read+539
VOP_READ_APV(3231536928,4053851124) at VOP_READ_APV+56
vn_read(3418793016,4053851324,3522853248,0,3458670592) at vn_read+561
dofileread(3458670592,6,3418793016,4053851324,4294967295,...) at dofileread+133
kern_readv(3458670592,6,4053851324,135356416,4096,...) at kern_readv+54
read(3458670592,4053851396) at read+69
syscall(3216965691,59,3216965691,0,675511712,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (3, FreeBSD ELF32, read), eip = 675361615, esp = 3217023628, ebp = 3
217023656 ---
db> bt 11456
Tracing pid 11456 tid 102521 td 0xd17c7190
sched_switch(3514593680,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3514593680,0,0,0) at _mtx_lock_sleep+252
vm_fault(3403956520,672894976,2,8) at vm_fault+421
trap_pfault(4046785848,1,672898616,672898616,0,...) at trap_pfault+291
trap(671744059,3216965691,3216965691,0,138252288,...) at trap+491
calltrap() at calltrap+5
--- trap 12, eip = 672436781, esp = 3217025340, ebp = 3217025416 ---
db> bt 11400
Tracing pid 11400 tid 101283 td 0xcbb4d640
sched_switch(3417626176,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3417626176,0,0,0) at _mtx_lock_sleep+252
namei(4021980160) at namei+492
kern_stat(3417626176,137444900,0,4021980276) at kern_stat+53
stat(3417626176,4021980420) at stat+27
syscall(3216965691,672923707,3216965691,671783368,3217026412,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (188, FreeBSD ELF32, stat), eip = 672785491, esp = 3217025548, ebp =
db> bt 11302
Tracing pid 11302 tid 102454 td 0xd17c3960
sched_switch(3514579296,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3514579296,0,0,0) at _mtx_lock_sleep+252
flock(3514579296,4046601476) at flock+115
syscall(3230203963,59,59,671795656,3217026436,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (131, FreeBSD ELF32, flock), eip = 672454479, esp = 3217025820, ebp
= 3217025848 ---
db> bt 97265
Tracing pid 97265 tid 102463 td 0xc83b67d0
sched_switch(3359336400,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3359336400,0,0,0) at _mtx_lock_sleep+252
giant_poll(3431429632,64,3359336400,3431429632,3231320352,...) at giant_poll+54
devfs_poll_f(3362780288,64,3520869760,3359336400) at devfs_poll_f+47
selscan(3359336400,4033203092,4033203076,9) at selscan+497
kern_select(3359336400,9,134724512,134724528,0,...) at kern_select+977
select(3359336400,4033203460) at select+68
syscall(3216965691,3216965691,3216965691,0,3217023260,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (93, FreeBSD ELF32, select), eip = 674546383, esp = 3217023148, ebp
= 3217023224 ---
db> bt 97305
Tracing pid 97305 tid 102153 td 0xd1b61190
sched_switch(3518370192,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3518370192,0,0,0) at _mtx_lock_sleep+252
namei(4043836416) at namei+492
kern_stat(3518370192,135532244,0,4043836532) at kern_stat+53
stat(3518370192,4043836676) at stat+27
syscall(135397435,59,3216965691,671783368,3217026408,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (188, FreeBSD ELF32, stat), eip = 672785491, esp = 3217025548, ebp =
 3217025608 ---
db> bt 97492
Tracing pid 97492 tid 102248 td 0xc82e44b0
sched_switch(3358475440,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3358475440,0,0,0) at _mtx_lock_sleep+252
namei(4039949312) at namei+492
kern_stat(3358475440,140045796,0,4039949428) at kern_stat+53
stat(3358475440,4039949572) at stat+27
syscall(671809595,3216965691,3216965691,671828424,3217026408,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (188, FreeBSD ELF32, stat), eip = 672830547, esp = 3217025548, ebp =
 3217025608 ---
db> bt 97807
Tracing pid 97807 tid 102519 td 0xce8497d0
sched_switch(3464796112,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3464796112,0,0,0) at _mtx_lock_sleep+252
namei(4049681408) at namei+492
kern_stat(3464796112,136572492,0,4049681524) at kern_stat+53
stat(3464796112,4049681668) at stat+27
syscall(671875131,59,3216965691,671783368,3217026436,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (188, FreeBSD ELF32, stat), eip = 672785491, esp = 3217025580, ebp =
 3217025640 ---

5) kernel is built with kdb/ddb and ctrl-alt-esc enabled, so this can
be a non-complete output.

db> show lockedvnods
Locked vnodes

0xcfddb990: tag ufs, type VREG
    usecount 1, writecount 0, refcount 3 mountedhere 0
    flags ()
    v_object 0xcf9f8d68 ref 0 pages 1
     lock type ufs: SHARED (count 1)
        ino 242117869, on dev aacdu0s1g


-- 
wbr,
pluknet



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?a31046fc0904292336w17aca317hefd32dad5bc28007>