Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 26 Apr 2016 22:18:24 +0200
From:      Svatopluk Kraus <onwahe@gmail.com>
To:        bob prohaska <fbsd@www.zefox.net>
Cc:        "freebsd-arm@freebsd.org" <freebsd-arm@freebsd.org>
Subject:   Re: Another reboot hang
Message-ID:  <CAFHCsPU94vrXgHRSNPvwAFNE0Hnd-6zzhsCSUGDGkVyGQ6jNMg@mail.gmail.com>
In-Reply-To: <20160426183030.GN71221@www.zefox.net>
References:  <20160426183030.GN71221@www.zefox.net>

next in thread | previous in thread | raw e-mail | index | archive | help
On Tue, Apr 26, 2016 at 8:30 PM, bob prohaska <fbsd@www.zefox.net> wrote:
> Recently I've been observing hangs on reboot for RPI2.
> Here's an example from r298542. It had just finished building 298591.
>
> The previous two or three reboots were successful, prior to those
> hangs were observed several times. All attempts to reboot via ssh
> have been successful.
>
> Here's the backtrace:
>
> root@www:/usr/src # shutdown -r now
> Shutdown NOW!
> shutdown: [pid 28505]
> root@www:/usr/src #
> *** FINAL System shutdown message from root@www.zefox.com ***
>
> System going down IMMEDIATELY
>
>
>
> System shutdown time has arrived
> Apr 26 11:15:56 www shutdown: reboot by root:
> Stopping cron.
> Waiting for PIDS: 620.
> Stopping sshd.
> Waiting for PIDS: 601.
> Stopping powerd.
> Waiting for PIDS: 562.
> Stopping casperd.
> Waiting for PIDS: 507.
> Stopping devd.
> Waiting for PIDS: 335.
> Writing entropy file:.
> .
> Terminated
> Apr 26 11:16:06 www syslogd: exiting on signal 15
> Waiting (max 60 seconds) for system process `vnlru' to stop...done
> Waiting (max 60 seconds) for system process `bufdaemon' to stop...done
> Waiting (max 60 seconds) for system process `syncer' to stop...
> Syncing disks, vnodes remaining...2 3 0 0 0 done
> All buffers synced.
> KDB: enter: Break to debugger
> [ thread pid 1 tid 100001 ]
> Stopped at      $d.8:   ldrb    r15, [r15, r15, ror r15]!
> db> bt
> Tracing pid 1 tid 100001 td 0xc3a5f000
> db_trace_self() at db_trace_self
>          pc = 0xc05659f4  lr = 0xc0147a8c (db_stack_trace+0x108)
>          sp = 0xd75eb650  fp = 0xd75eb668
> db_stack_trace() at db_stack_trace+0x108
>          pc = 0xc0147a8c  lr = 0xc01476d0 (db_command+0x270)
>          sp = 0xd75eb670  fp = 0xd75eb710
>          r4 = 0x00000001  r5 = 0x00000000
>          r6 = 0xc05fdd21 r10 = 0xc07bc738
> db_command() at db_command+0x270
>          pc = 0xc01476d0  lr = 0xc0147450 (db_command_loop+0x74)
>          sp = 0xd75eb718  fp = 0xd75eb728
>          r4 = 0xc05d4a0c  r5 = 0xc05f64b7
>          r6 = 0xc07bc724  r7 = 0xd75eb8f8
>          r8 = 0xc07adfc0  r9 = 0xc06c3318
>         r10 = 0xc07adfc4
> db_command_loop() at db_command_loop+0x74
>          pc = 0xc0147450  lr = 0xc014a14c (db_trap+0x120)
>          sp = 0xd75eb730  fp = 0xd75eb848
>          r4 = 0x00000001  r5 = 0x00000000
>          r6 = 0xc07bc730 r10 = 0xc07adfc4
> db_trap() at db_trap+0x120
>          pc = 0xc014a14c  lr = 0xc02fe450 (kdb_trap+0x180)
>          sp = 0xd75eb850  fp = 0xd75eb878
>          r4 = 0x00000000  r5 = 0x00000001
>          r6 = 0xc07adfe0 r10 = 0xc07adfc4
> kdb_trap() at kdb_trap+0x180
>          pc = 0xc02fe450  lr = 0xc0583570 (undefinedinstruction+0x318)
>          sp = 0xd75eb880  fp = 0xd75eb8f0
>          r4 = 0x00000000  r5 = 0x00000000
>          r6 = 0xc05831a8  r7 = 0xe7ffffff
>          r8 = 0xc3a5f000  r9 = 0xc02fdd7c
>         r10 = 0xd75eb8f8
> undefinedinstruction() at undefinedinstruction+0x318
>          pc = 0xc0583570  lr = 0xc05687fc (exception_exit)
>          sp = 0xd75eb8f8  fp = 0xd75eb998
>          r4 = 0x60000193  r5 = 0xc3b12088
>          r6 = 0x00000093  r7 = 0x00040000
>          r8 = 0x00000000  r9 = 0x00000000
>         r10 = 0xc06d2be8
> exception_exit() at exception_exit
>          pc = 0xc05687fc  lr = 0xc02fdd64 (kdb_alt_break_internal+0x1c4)
>          sp = 0xd75eb988  fp = 0xd75eb998
>          r0 = 0xc07adfd4  r1 = 0x00000000
>          r2 = 0x00000001  r3 = 0x60000193
>          r4 = 0xc3b12000  r5 = 0xc3b12088
>          r6 = 0x00000093  r7 = 0x00040000
>          r8 = 0x00000000  r9 = 0x00000000
>         r10 = 0xc06d2be8 r12 = 0x00000000
> $a.9() at $a.9
>          pc = 0xc02fdd80  lr = 0xc02fdb98 (kdb_alt_break+0x10)
>          sp = 0xd75eb9a0  fp = 0xd75eb9a0
>          r4 = 0xc3b12000  r5 = 0xc3b12088
>          r6 = 0x00000093  r7 = 0x00040000
> kdb_alt_break() at kdb_alt_break+0x10
>          pc = 0xc02fdb98  lr = 0xc01a089c (uart_intr_rxready+0x8c)
>          sp = 0xd75eb9a8  fp = 0xd75eb9b8
> uart_intr_rxready() at uart_intr_rxready+0x8c
>          pc = 0xc01a089c  lr = 0xc01a16bc (uart_intr+0x118)
>          sp = 0xd75eb9c0  fp = 0xd75eb9f8
>          r4 = 0xc3b37900  r5 = 0xc3b12000
>          r6 = 0xc3b12194 r10 = 0xc06d2be8
> uart_intr() at uart_intr+0x118
>          pc = 0xc01a16bc  lr = 0xc0289e6c (intr_event_handle+0x80)
>          sp = 0xd75eba00  fp = 0xd75eba20
>          r4 = 0xc3b37900  r5 = 0xd75ebaf8
>          r6 = 0xc3b2f058  r7 = 0xc3a5f000
>          r8 = 0x00000000  r9 = 0xc05effbb
>         r10 = 0xc3b26d80
> intr_event_handle() at intr_event_handle+0x80
>          pc = 0xc0289e6c  lr = 0xc056a350 (intr_isrc_dispatch+0x68)
>          sp = 0xd75eba28  fp = 0xd75eba30
>          r4 = 0xc3b2f058  r5 = 0xd75ebaf8
>          r6 = 0xc3b2f058  r7 = 0x00000041
>          r8 = 0xc062d2ab  r9 = 0xc3b2e018
>         r10 = 0xbc1fffff
> intr_isrc_dispatch() at intr_isrc_dispatch+0x68
>          pc = 0xc056a350  lr = 0xc0590c10 (bcm2835_intc_intr+0x8c)
>          sp = 0xd75eba38  fp = 0xd75eba58
>          r4 = 0xc3b2e000  r5 = 0x00000000
> bcm2835_intc_intr() at bcm2835_intc_intr+0x8c
>          pc = 0xc0590c10  lr = 0xc0289e6c (intr_event_handle+0x80)
>          sp = 0xd75eba60  fp = 0xd75eba80
>          r4 = 0xc3936100  r5 = 0xd75ebaf8
>          r6 = 0x00000000  r7 = 0xc3a5f000
>          r8 = 0x00000000  r9 = 0xc05effbb
>         r10 = 0xc3b24340
> intr_event_handle() at intr_event_handle+0x80
>          pc = 0xc0289e6c  lr = 0xc056a350 (intr_isrc_dispatch+0x68)
>          sp = 0xd75eba88  fp = 0xd75eba90
>          r4 = 0xc3a3c604  r5 = 0xd75ebaf8
>          r6 = 0x00000000  r7 = 0x00000000
>          r8 = 0x00000100  r9 = 0xc3fdb2b0
>         r10 = 0xd75ebaf8
> intr_isrc_dispatch() at intr_isrc_dispatch+0x68
>          pc = 0xc056a350  lr = 0xc0588cec (bcm_lintc_intr+0x230)
>          sp = 0xd75eba98  fp = 0xd75ebad8
>          r4 = 0xc3a3c400  r5 = 0x000000c0
> bcm_lintc_intr() at bcm_lintc_intr+0x230
>          pc = 0xc0588cec  lr = 0xc056a2c4 (intr_irq_handler+0x68)
>          sp = 0xd75ebae0  fp = 0xd75ebaf0
>          r4 = 0xd75ebaf8  r5 = 0xc07b7778
>          r6 = 0xc3a5f000  r7 = 0x00000000
>          r8 = 0x00000000  r9 = 0xc3fdb2b0
>         r10 = 0xd75ebbf0
> intr_irq_handler() at intr_irq_handler+0x68
>          pc = 0xc056a2c4  lr = 0xc05687fc (exception_exit)
>          sp = 0xd75ebaf8  fp = 0xd75ebba8
>          r4 = 0xc0620546  r5 = 0x00000001
>          r6 = 0xc3fdb2c0  r7 = 0x00000608
> exception_exit() at exception_exit
>          pc = 0xc05687fc  lr = 0xc0515a44 (ffs_sync+0x104)
>          sp = 0xd75ebb88  fp = 0xd75ebba8
>          r0 = 0x00000004  r1 = 0xc3a5f000
>          r2 = 0xc3fdb2c0  r3 = 0xc3a5f000
>          r4 = 0xc0620546  r5 = 0x00000001
>          r6 = 0xc3fdb2c0  r7 = 0x00000608
>          r8 = 0x00000000  r9 = 0xc3fdb2b0
>         r10 = 0xd75ebbf0 r12 = 0xc061cf05

The break to debugger happened here. So system was in
__mtx_lock_flags() called from ffs_sync(). It's impossible to say from
the backtrace if system was stuck or you just break it there. However,
interrupts was working, so system was not dead totally. One method how
to learn more is to break to debugger repeatedly to see if system is
stuck to one place or not. You also can try to type the following
commands in ddb to see if there is any deadlock.

show all pcpu
show all procs
show all locks

For the second command, kernel must be built with WITNESS option.



> __mtx_lock_flags() at __mtx_lock_flags+0xb4
>          pc = 0xc02a4f60  lr = 0xc0515a44 (ffs_sync+0x104)
>          sp = 0xd75ebbb0  fp = 0xd75ebc20
>          r4 = 0xc3a5f000  r5 = 0xc3fdb2c0
>          r6 = 0x00000000  r7 = 0xc0620546
>          r8 = 0xc3fdb2b0  r9 = 0xc3a5f000
> ffs_sync() at ffs_sync+0x104
>          pc = 0xc0515a44  lr = 0xc0380aa0 (vfs_write_suspend+0x19c)
>          sp = 0xd75ebc28  fp = 0xd75ebc58
>          r4 = 0xc3fdb2b0  r5 = 0xc3fdb2b0
>          r6 = 0x00000000  r7 = 0x00000002
>          r8 = 0x00000000  r9 = 0xc3fdb2c0
>         r10 = 0xc4351600
> vfs_write_suspend() at vfs_write_suspend+0x19c
>          pc = 0xc0380aa0  lr = 0xc0380cd0 (vfs_write_suspend_umnt+0x40)
>          sp = 0xd75ebc60  fp = 0xd75ebc70
>          r4 = 0xc3fdb2b0  r5 = 0xc3fdb2b0
>          r6 = 0xc3a5f000  r7 = 0x00000002
>          r8 = 0x00000000  r9 = 0xc44fa000
>         r10 = 0xc4351600
> vfs_write_suspend_umnt() at vfs_write_suspend_umnt+0x40
>          pc = 0xc0380cd0  lr = 0xc051533c (ffs_unmount+0x48)
>          sp = 0xd75ebc78  fp = 0xd75ebcb8
>          r4 = 0x00080000  r5 = 0xc3fdb2b0
>          r6 = 0xc3a5f000  r7 = 0x00000002
> ffs_unmount() at ffs_unmount+0x48
>          pc = 0xc051533c  lr = 0xc0368734 (dounmount+0x594)
>          sp = 0xd75ebcc0  fp = 0xd75ebd08
>          r4 = 0x00080000  r5 = 0x10201000
>          r6 = 0x00000000  r7 = 0x00000000
>          r8 = 0x00080000  r9 = 0xc4534360
>         r10 = 0xc3a5f000
> dounmount() at dounmount+0x594
>          pc = 0xc0368734  lr = 0xc0371dc8 (vfs_unmountall+0x7c)
>          sp = 0xd75ebd10  fp = 0xd75ebd30
>          r4 = 0xc07bed0c  r5 = 0x00000000
>          r6 = 0xc43a1ac0  r7 = 0xc3fdb2b0
>          r8 = 0xc06078f6  r9 = 0xc061a20b
>         r10 = 0xc06078df
> vfs_unmountall() at vfs_unmountall+0x7c
>          pc = 0xc0371dc8  lr = 0xc0352a90 (bufshutdown+0x39c)
>          sp = 0xd75ebd38  fp = 0xd75ebd78
>          r4 = 0x00000000  r5 = 0xd76146b8
>          r6 = 0xd76146b8  r7 = 0x00000000
>          r8 = 0xc07b1cc8  r9 = 0xc060353c
>         r10 = 0xc06034fb
> bufshutdown() at bufshutdown+0x39c
>          pc = 0xc0352a90  lr = 0xc02bd8e8 (kern_reboot+0x1e0)
>          sp = 0xd75ebd80  fp = 0xd75ebda8
>          r4 = 0x00000000  r5 = 0xc3afbb80
>          r6 = 0xc3afbb9c  r7 = 0xc05f6199
>          r8 = 0xc070ab5c  r9 = 0x00000000
>         r10 = 0x0000001f
> kern_reboot() at kern_reboot+0x1e0
>          pc = 0xc02bd8e8  lr = 0xc02bd694 (sys_reboot+0x3a8)
>          sp = 0xd75ebdb0  fp = 0xd75ebde0
>          r4 = 0x00000000  r5 = 0xd75ebe00
>          r6 = 0x00000000  r7 = 0xc07be170
>          r8 = 0x00000000  r9 = 0xd75ebdf8
> sys_reboot() at sys_reboot+0x3a8
>          pc = 0xc02bd694  lr = 0xc05822b0 (swi_handler+0x318)
>          sp = 0xd75ebde8  fp = 0xd75ebe48
>          r4 = 0xc3a5f000  r5 = 0xc3a5b000
>          r6 = 0x00000000  r7 = 0xc07be170
>          r8 = 0x00000000  r9 = 0xd75ebdf8
>         r10 = 0x0000001f
> swi_handler() at swi_handler+0x318
>          pc = 0xc05822b0  lr = 0xc056878c (swi_exit)
>          sp = 0xd75ebe50  fp = 0xbfbfe978
>          r4 = 0x00008ab8  r5 = 0x000ed82c
>          r6 = 0x0000004e  r7 = 0x00000037
>          r8 = 0x0000000f  r9 = 0x0000001e
>         r10 = 0x0000001f
> swi_exit() at swi_exit
>          pc = 0xc056878c  lr = 0xc056878c (swi_exit)
>          sp = 0xd75ebe50  fp = 0xbfbfe978
> db>
>
> Thanks for reading and any guidance.
> _______________________________________________
> freebsd-arm@freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/freebsd-arm
> To unsubscribe, send any mail to "freebsd-arm-unsubscribe@freebsd.org"



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAFHCsPU94vrXgHRSNPvwAFNE0Hnd-6zzhsCSUGDGkVyGQ6jNMg>