Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 20 Nov 2017 01:15:29 -0800
From:      Mark Millard <markmi@dsl-only.net>
To:        FreeBSD Current <freebsd-current@freebsd.org>, freebsd-amd64@freebsd.org, freebsd-hackers <freebsd-hackers@freebsd.org>
Subject:   Re: head -r325997: Fatal trap 12: page fault while in kernel mode (during a buildworld, virtualbox guest context) [2nd example]
Message-ID:  <2E7497BD-06C6-4C86-AA83-1150C735315B@dsl-only.net>
In-Reply-To: <3C5C0D1B-4990-426A-B622-6EC4CC6A1F3F@dsl-only.net>
References:  <2A312919-EF66-4FC3-85E4-A796315DB978@dsl-only.net> <3C5C0D1B-4990-426A-B622-6EC4CC6A1F3F@dsl-only.net>

next in thread | previous in thread | raw e-mail | index | archive | help
[Adding some analysis of where the 2 failures were in
source code terms.]

On 2017-Nov-19, at 9:07 PM, Mark Millard <markmi@dsl-only.net> wrote:

> [I got another of these. By the way: amd64 context.
> Again: buildworld was running.]
>=20
> On 2017-Nov-19, at 5:52 PM, Mark Millard <markmi at dsl-only.net> =
wrote:
>=20
>> Attempting a dump failed. I'm afraid all for
>> information is the below. The kernel was a
>> non-debug kernel (with debug information).
>>=20
>> The following is hand typed from a screen shot:
>>=20
>> Fatal trap 12: page fault while in kernel mode
>> cpuid =3D 0; apic id =3D 00
>> fault virtual address =3D 0xffffff53f000e2b0
>=20
> New one:                  0x806b49010
>=20
>> fault code            =3D supervisor read data, page not present
>=20
> New one:                  supervisor write data, page not present
>=20
>> instruction pointer   =3D 0x20:0xffffffff80f2b11e
>=20
> New one:                  0x20:0xffffffff80f2b21b
>=20
>> stack pointer         =3D 0x0:0xfffffe01aeb28970
>=20
> New one:                  0x28:0xfffffe01aeb28970
>=20
>> frame pointer         =3D 0x0:0xfffffe01aeb289f0
>=20
> New one:                  0x28:0xfffffe01aeb289f0
>=20
>> code segment          =3D base 0x0, limit 0xfffff, type 0x1b
>>                     =3D DPL 0, pres 1, long 1, def32 0, gran 1
>> processor eflags      =3D interrupt enabled, resume, IOPL =3D 0
>> current process       =3D 20 (pagedaemon)
>> [ thread pid 20 tid 100089 ]
>> Stopped at pmap_ts_referenced+0x72e: movq (%rcx,rdi,8),%rbx
>=20
> New one: pmap_ts_referenced+0x82b: movq %rcx,0x10(%rax)
>=20
>> bd > bt
>> Tracing pid 20 tid 100089 td 0xfffff80003eb3560
>=20
> New one:                    td 0xfffff80003df6000
>=20
>> pmap_ts_referenced() at pmap_ts_referenced_0x72e/frame =
0xfffffe01aeb289f0
> New one:
> pmap_ts_referenced() at pmap_ts_referenced_0x82b/frame =
0xfffffe01aeb289f0
>=20
>> vm_pageout() at vm_pageout+0xdeb/frame 0xfffffe01aeb28ab0
>=20
> Correction to original:            frame 0xfffffe01aeb28a70
> (new is the same)
>=20
>> fork_exit() at fork_exit+0x82/frame 0xfffffe01aeb28ab0
>> fork_trampoline() at fork_trampoline+0xe/frame 0xfffffe01aeb28ab0
>> --- trap 0, rip =3D 0, rsp =3D 0, rpb =3D 0 ---
>> db>
>>=20
>> The prior (cross) buildworld buildkernel had completed fine.
>>=20
>> Until yesterday, I'd been running -r325700 or before and had not
>> seen such an issue ever before. I'd been using the virtualbox
>> version for a while before this as well.


Taking the case of:

Stopped at pmap_ts_referenced+0x72e: movq (%rcx,rdi,8),%rbx:

ffffffff80f2b0fc <pmap_ts_referenced+0x70c> mov    %rax,%rsi
ffffffff80f2b0ff <pmap_ts_referenced+0x70f> shr    $0x1b,%rsi
ffffffff80f2b103 <pmap_ts_referenced+0x713> and    $0xff8,%esi
ffffffff80f2b109 <pmap_ts_referenced+0x719> mov    (%rcx,%rsi,1),%rcx
ffffffff80f2b10d <pmap_ts_referenced+0x71d> and    %r10,%rcx
ffffffff80f2b110 <pmap_ts_referenced+0x720> or     %r9,%rcx
ffffffff80f2b113 <pmap_ts_referenced+0x723> mov    %eax,%edi
ffffffff80f2b115 <pmap_ts_referenced+0x725> shr    $0x15,%edi
ffffffff80f2b118 <pmap_ts_referenced+0x728> and    $0x1ff,%edi
ffffffff80f2b11e <pmap_ts_referenced+0x72e> mov    (%rcx,%rdi,8),%rbx    =
<<<<<<=3D=3D=3D=3D=3D=3D=3D
ffffffff80f2b122 <pmap_ts_referenced+0x732> and    %r10,%rbx
ffffffff80f2b125 <pmap_ts_referenced+0x735> or     %r9,%rbx
ffffffff80f2b128 <pmap_ts_referenced+0x738> shr    $0x9,%rax
ffffffff80f2b12c <pmap_ts_referenced+0x73c> and    $0xff8,%eax
ffffffff80f2b131 <pmap_ts_referenced+0x741> lea    (%rbx,%rax,1),%rsi
ffffffff80f2b135 <pmap_ts_referenced+0x745> mov    (%rbx,%rax,1),%rbx
ffffffff80f2b139 <pmap_ts_referenced+0x749> mov    %rbx,%rax
ffffffff80f2b13c <pmap_ts_referenced+0x74c> and    %rdx,%rax
ffffffff80f2b13f <pmap_ts_referenced+0x74f> cmp    %rdx,%rax
ffffffff80f2b142 <pmap_ts_referenced+0x752> jne    ffffffff80f2b14f =
<pmap_ts_referenced+0x75f>

Which, if I understand right, is in the
"small_mappings:" code:

		PG_A =3D pmap_accessed_bit(pmap);
		PG_M =3D pmap_modified_bit(pmap);
		PG_RW =3D pmap_rw_bit(pmap);
		pde =3D pmap_pde(pmap, pv->pv_va);
		KASSERT((*pde & PG_PS) =3D=3D 0,
		    ("pmap_ts_referenced: found a 2mpage in page %p's pv =
list",
		    m));
		pte =3D pmap_pde_to_pte(pde, pv->pv_va);
		if ((*pte & (PG_M | PG_RW)) =3D=3D (PG_M | PG_RW))
			vm_page_dirty(m);
		if ((*pte & PG_A) !=3D 0) {


with the failure being during *pde in:

/* Return a pointer to the PT slot that corresponds to a VA */
static __inline pt_entry_t *
pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va)
{
	pt_entry_t *pte;

	pte =3D (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
	return (&pte[pmap_pte_index(va)]);
}



Taking the case of:

New one: pmap_ts_referenced+0x82b: movq %rcx,0x10(%rax)

ffffffff80f2b1fb <pmap_ts_referenced+0x80b> lock cmpxchg %rcx,(%rdx)
ffffffff80f2b200 <pmap_ts_referenced+0x810> sete   %cl
ffffffff80f2b203 <pmap_ts_referenced+0x813> test   %cl,%cl
ffffffff80f2b205 <pmap_ts_referenced+0x815> je     ffffffff80f2b27d =
<pmap_ts_referenced+0x88d>
ffffffff80f2b207 <pmap_ts_referenced+0x817> test   %r12,%r12
ffffffff80f2b20a <pmap_ts_referenced+0x81a> je     ffffffff80f2b255 =
<pmap_ts_referenced+0x865>
ffffffff80f2b20c <pmap_ts_referenced+0x81c> mov    0x8(%r12),%rax
ffffffff80f2b211 <pmap_ts_referenced+0x821> test   %rax,%rax
ffffffff80f2b214 <pmap_ts_referenced+0x824> je     ffffffff80f2b255 =
<pmap_ts_referenced+0x865>
ffffffff80f2b216 <pmap_ts_referenced+0x826> mov    0x10(%r12),%rcx
ffffffff80f2b21b <pmap_ts_referenced+0x82b> mov    %rcx,0x10(%rax)     =
<<<<<<<<<=3D=3D=3D=3D=3D=3D=3D=3D=3D
ffffffff80f2b21f <pmap_ts_referenced+0x82f> mov    0x8(%r12),%rax
ffffffff80f2b224 <pmap_ts_referenced+0x834> mov    0x10(%r12),%rcx
ffffffff80f2b229 <pmap_ts_referenced+0x839> mov    %rax,(%rcx)

Which, if I understand right, appears to be during
the TAILQ_REMOVE of:

		PMAP_UNLOCK(pmap);
		/* Rotate the PV list if it has more than one entry. */
		if (pv !=3D NULL && TAILQ_NEXT(pv, pv_next) !=3D NULL) {
			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
. . .

#define TAILQ_REMOVE(head, elm, field) do {                             =
\
        QMD_SAVELINK(oldnext, (elm)->field.tqe_next);                   =
\
        QMD_SAVELINK(oldprev, (elm)->field.tqe_prev);                   =
\
        QMD_TAILQ_CHECK_NEXT(elm, field);                               =
\
        QMD_TAILQ_CHECK_PREV(elm, field);                               =
\
        if ((TAILQ_NEXT((elm), field)) !=3D NULL)                        =
 \
                TAILQ_NEXT((elm), field)->field.tqe_prev =3D             =
 \
                    (elm)->field.tqe_prev;                              =
\
        else {                                                          =
\
                (head)->tqh_last =3D (elm)->field.tqe_prev;              =
 \
                QMD_TRACE_HEAD(head);                                   =
\
        }                                                               =
\
        *(elm)->field.tqe_prev =3D TAILQ_NEXT((elm), field);             =
 \
        TRASHIT(*oldnext);                                              =
\
        TRASHIT(*oldprev);                                              =
\
        QMD_TRACE_ELEM(&(elm)->field);                                  =
\
} while (0)

where the kernel was a non-debug kernel
(with debug symbols).

=3D=3D=3D
Mark Millard
markmi at dsl-only.net






Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?2E7497BD-06C6-4C86-AA83-1150C735315B>