Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 8 Jul 2016 11:36:26 -0500
From:      Guy Helmer <guy.helmer@gmail.com>
To:        freebsd-stable@freebsd.org
Subject:   9-STABLE: Panic when destroying gmirror that is synchronizing
Message-ID:  <F096B10F-01E7-4253-A701-17824CE54784@gmail.com>

next in thread | raw e-mail | index | archive | help
Hi,

I=E2=80=99m able to replicate a problem where destroying a gmirror that =
is synchronizing causes a panic in 9-STABLE (^/stable/9 rev 302430) on =
amd64. I=E2=80=99ve forced a gmirror into an inconsistent state by =
forcing a reset, and then issued =E2=80=9Cgmirror destroy -f mirror1=E2=80=
=9D after the reboot and the system is synchronizing the two disks in =
the mirror. As a workaround, I can use =E2=80=9Cgeom mirror clear=E2=80=9D=
 on the providers before geom_mirror.ko is loaded.

It=E2=80=99s a rare issue but I wanted to document it in case it can be =
fixed. Kernel stack trace and code snippets follow. If anything other =
information would be useful, please let me know.

Guy


Unread portion of the kernel message buffer:
GEOM_MIRROR: Device mirror1: provider mirror/mirror1 destroyed.
GEOM_MIRROR: Device mirror1: rebuilding provider =
gptid/48c59e09-3c94-11e6-8928-000c29ce3c97 stopped.


Fatal trap 12: page fault while in kernel mode
cpuid =3D 0; apic id =3D 00
fault virtual address	=3D 0x98
fault code		=3D supervisor write data, page not present
instruction pointer	=3D 0x20:0xffffffff806df6ff
stack pointer	        =3D 0x28:0xffffff800024eb30
frame pointer	        =3D 0x28:0xffffff800024eb40
code segment		=3D base 0x0, limit 0xfffff, type 0x1b
			=3D DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags	=3D interrupt enabled, resume, IOPL =3D 0
current process		=3D 13 (g_up)
trap number		=3D 12
panic: page fault
cpuid =3D 0
KDB: stack backtrace:
#0 0xffffffff8072e336 at kdb_backtrace+0x66
#1 0xffffffff806f3d5e at panic+0x1ce
#2 0xffffffff80910ae7 at trap_fatal+0x277
#3 0xffffffff80910e31 at trap_pfault+0x211
#4 0xffffffff809113f9 at trap+0x329
#5 0xffffffff808fa311 at calltrap+0x8
#6 0xffffffff81412593 at g_mirror_sync_done+0x53
#7 0xffffffff8077439e at biodone+0xae
#8 0xffffffff806576ac at g_io_schedule_up+0xac
#9 0xffffffff80657e0c at g_up_procbody+0x5c
#10 0xffffffff806c0b4f at fork_exit+0x11f
#11 0xffffffff808fa83e at fork_trampoline+0xe
Uptime: 1m54s
GEOM_MIRROR: Device mirror0: rebuilding provider =
gptid/48ac38bf-3c94-11e6-8928-000c29ce3c97 stopped.
Dumping 87 out of 238 MB:..19%..37%..55%..74%..92%

Reading symbols from /boot/kernel/geom_mirror.ko...Reading symbols from =
/boot/kernel/geom_mirror.ko.symbols...done.
done.
Loaded symbols for /boot/kernel/geom_mirror.ko
#0  doadump (textdump=3D<value optimized out>) at pcpu.h:235
235	pcpu.h: No such file or directory.
	in pcpu.h
(kgdb) #0  doadump (textdump=3D<value optimized out>) at pcpu.h:235
#1  0xffffffff806f3836 in kern_reboot (howto=3D260)
    at ../../../kern/kern_shutdown.c:454
#2  0xffffffff806f3d37 in panic (fmt=3D0x1 <Address 0x1 out of bounds>)
    at ../../../kern/kern_shutdown.c:642
#3  0xffffffff80910ae7 in trap_fatal (frame=3D0xc, eva=3D<value =
optimized out>)
    at ../../../amd64/amd64/trap.c:876
#4  0xffffffff80910e31 in trap_pfault (frame=3D0xffffff800024ea80, =
usermode=3D0)
    at ../../../amd64/amd64/trap.c:798
#5  0xffffffff809113f9 in trap (frame=3D0xffffff800024ea80)
    at ../../../amd64/amd64/trap.c:462
#6  0xffffffff808fa311 in calltrap () at =
../../../amd64/amd64/exception.S:238
#7  0xffffffff806df6ff in _mtx_lock_flags (m=3D0x80, opts=3D0,=20
    file=3D0xffffffff8141d0d8 =
"/usr/src/sys/modules/geom/geom_mirror/../../../geom/mirror/g_mirror.c", =
line=3D990) at atomic.h:164
#8  0xffffffff81412593 in g_mirror_sync_done (bp=3D0xfffffe0004c963e0)
    at =
/usr/src/sys/modules/geom/geom_mirror/../../../geom/mirror/g_mirror.c:990
#9  0xffffffff8077439e in biodone (bp=3D0xfffffe0004c963e0)
    at ../../../kern/vfs_bio.c:3667
#10 0xffffffff806576ac in g_io_schedule_up (tp=3D<value optimized out>)
    at ../../../geom/geom_io.c:808
#11 0xffffffff80657e0c in g_up_procbody (arg=3D<value optimized out>)
    at ../../../geom/geom_kern.c:97
#12 0xffffffff806c0b4f in fork_exit (
    callout=3D0xffffffff80657db0 <g_up_procbody>, arg=3D0x0,=20
    frame=3D0xffffff800024ec40) at ../../../kern/kern_fork.c:1000
#13 0xffffffff808fa83e in fork_trampoline ()
    at ../../../amd64/amd64/exception.S:613
#14 0x0000000000000000 in ?? ()
(kgdb)=20


geom/mirror/g_mirror.c:990:
static void
g_mirror_sync_done(struct bio *bp)
{
	struct g_mirror_softc *sc;

	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
	sc =3D bp->bio_from->geom->softc;
	bp->bio_cflags =3D G_MIRROR_BIO_FLAG_SYNC;
	mtx_lock(&sc->sc_queue_mtx); <---
	bioq_insert_tail(&sc->sc_queue, bp);
	mtx_unlock(&sc->sc_queue_mtx);
	wakeup(sc);
}

kern/vfs_vio.c:3667:
	done =3D bp->bio_done;
	if (done =3D=3D NULL)
		wakeup(bp);
	mtx_unlock(mtxp);
	if (done !=3D NULL)
		done(bp); <---
	if (transient) {
		pmap_qremove(start, OFF_TO_IDX(end - start));
		vm_map_remove(bio_transient_map, start, end);
		atomic_add_int(&inflight_transient_maps, -1);
	}
}

geom/geom_io.c:808:
		bp =3D g_bioq_first(&g_bio_run_up);
		if (bp !=3D NULL) {
			g_bioq_unlock(&g_bio_run_up);
			THREAD_NO_SLEEPING();
			CTR4(KTR_GEOM, "g_up biodone bp %p provider %s =
off "
			    "%jd len %ld", bp, bp->bio_to->name,
			    bp->bio_offset, bp->bio_length);
			biodone(bp); <---
			THREAD_SLEEPING_OK();
			continue;
		}
		CTR0(KTR_GEOM, "g_up going to sleep");

geom/geom_kern.c:97:
static void
g_up_procbody(void *arg)
{

	mtx_assert(&Giant, MA_NOTOWNED);
	thread_lock(g_up_td);
	sched_prio(g_up_td, PRIBIO);
	thread_unlock(g_up_td);
	for(;;) {
		g_io_schedule_up(g_up_td); <---
	}
}




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?F096B10F-01E7-4253-A701-17824CE54784>