Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 11 Feb 2017 03:19:25 -0800
From:      Mark Millard <markmi@dsl-only.net>
To:        Tom Vijlbrief <tvijlbrief@gmail.com>, freebsd-arm <freebsd-arm@freebsd.org>
Subject:   Re: Arm64 stack issues (was Re: FreeBSD status for/on ODroid-C2?)
Message-ID:  <2A1F1091-4115-46A1-8DB5-032099A49290@dsl-only.net>
In-Reply-To: <F6BF6129-357F-4F9E-8924-2A4E2112F0DC@gmail.com>
References:  <CAOQrpVfK-Dw_rSo_YVY5MT1wbc6Ah-Pj%2BWv8UGjeiUQ1b3%2B-mg@mail.gmail.com> <20170124191357.0ec0abfd@zapp> <20170128010138.iublazyrhhqycn37@mutt-hardenedbsd> <20170128010223.tjivldnh7pyenbg6@mutt-hardenedbsd> <CAOQrpVfxKvSR5PoahnqEsYspHhjjOGJ8iCBUetKxRV57oX_aUg@mail.gmail.com> <009857E3-35BB-4DE4-B3BB-5EC5DDBB5B06@dsl-only.net> <CAOQrpVdKyP2T0V77sfpuKbNP3ARoD1EcwtH6E9o7p5KF%2B=A56A@mail.gmail.com> <CB36F13F-85E9-41D2-A7F3-DA183BE5985A@dsl-only.net> <890B7D8A-27FF-41AC-8291-1858393EC7B1@gmail.com> <54642E5C-D5D6-45B7-BB74-2407CFB351C2@dsl-only.net> <EB1D79C2-CF5E-4C21-BA1B-EC9F34BB737E@gmail.com> <F6C3286F-46DF-4819-BDD2-10904018E70C@dsl-only.net> <A95CC1DC-36C4-4FC3-A8D4-BDBE6FCB136B@gmail.com> <7B5DF446-6740-43DE-823D-B6ECBECF0C32@dsl-only.net> <1B1EEC5E-9875-417F-9901-A66CB5885634@dsl-only.net> <25B9EBC8-147F-47C2-BC40-C449EF3AC3FE@gmail.com> <71B83856-654D-4F38-894F-1DF41681F0FC@dsl-only.net> <F6BF6129-357F-4F9E-8924-2A4E2112F0DC@gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help
On 2017-Feb-10, at 9:59 PM, Mark Millard <markmi at dsl-only.net> wrote:

> The stack pointer is messed up when fork returns,
> at least when it happens to be too large to be in
> the stack region.
>=20
> So I conclude that fork sometimes returns a
> corrupted state in the child-process, at least
> for the stack pointer.
>=20
>=20
>=20
> Supporting details:
>=20
> I've added stack checks for the stack pointer being
> in too large for the proper stack region after the
> fork in the child-process path
> ( /usr/src/bin/sh/jobs.c modification):
>=20
> extern void stack_check(void);
>=20
> pid_t
> forkshell(struct job *jp, union node *n, int mode)
> {
>        pid_t pid;
>        pid_t pgrp;
>=20
>        TRACE(("forkshell(%%%td, %p, %d) called\n", jp - jobtab, (void =
*)n,
>            mode));
>        INTOFF;
>        if (mode =3D=3D FORK_BG && (jp =3D=3D NULL || jp->nprocs =3D=3D =
0))
>                checkzombies();
>        flushall();
>        pid =3D fork();
>        if (pid =3D=3D -1) {
>                TRACE(("Fork failed, errno=3D%d\n", errno));
>                INTON;
>                error("Cannot fork: %s", strerror(errno));
>        }
>        if (pid =3D=3D 0) {
>                struct job *p;
>                int wasroot;
>                int i;
>=20
>                TRACE(("Child shell %d\n", (int)getpid()));
>                wasroot =3D rootshell;
>                rootshell =3D 0;
>                handler =3D &main_handler;
>                stack_check();         <<<<<<<=3D=3D=3D=3D=3D=3D this =
one catches the "too large" case
>                closescript();
>                stack_check();
>                INTON;
>                stack_check();
>                forcelocal =3D 0;
>                clear_traps();
>                stack_check();
> . . .
>=20
> (Note: TRACE is disabled.)
>=20
> In a separate .c file:
>=20
> void stack_check(void)
> {
>    volatile uintptr_t test =3D 0;
>    extern struct jmploc main_handler;
>    if (*(uintptr_t*)&main_handler.loc[0]._jb[6] < =
(uintptr_t)(void*)&test) abort();
> }
>=20
> (I happened to use /usr/src/bin/sh/miscbltin.c with
> a couple of includes added.)
>=20
> Sometimes the bad sp value is not too large for the
> stack region so this does not catch all the failures.
>=20
>=20
>=20
> (lldb) bt
> * thread #1: tid =3D 100144, 0x0000000040554e54 libc.so.7`_thr_kill + =
8, name =3D 'sh', stop reason =3D signal SIGABRT
>  * frame #0: 0x0000000040554e54 libc.so.7`_thr_kill + 8
>    frame #1: 0x0000000040554e18 libc.so.7`__raise(s=3D6) + 64 at =
raise.c:52
>    frame #2: 0x0000000040554d8c libc.so.7`abort + 84 at abort.c:65
>    frame #3: 0x0000000000411984 sh`stack_check + 88 at miscbltin.c:73
>    frame #4: 0x000000000040f33c sh`forkshell(jp=3D<unavailable>, =
n=3D<unavailable>, mode=3D<unavailable>) + 520 at jobs.c:865
>    frame #5: 0x0000000000405954 sh`evaltree [inlined] evalpipe + 164 =
at eval.c:596
>    frame #6: 0x00000000004058b0 sh`evaltree(n=3D<unavailable>, =
flags=3D<unavailable>) + 1044 at eval.c:286
>    frame #7: 0x0000000000406e28 sh`evalbackcmd(n=3D0x0000000040a50af8, =
result=3D0x0000ffffffffd2f0) + 340 at eval.c:702
>    frame #8: 0x0000000000409324 sh`argstr [inlined] =
expbackq(cmd=3D0x0000000040a50af8, flag=3D<unavailable>, =
dst=3D<unavailable>) + 40 at expand.c:461
>    frame #9: 0x00000000004092fc sh`argstr(p=3D"", flag=3D<unavailable>, =
dst=3D<unavailable>) + 392 at expand.c:315
>    frame #10: 0x0000000000408fa8 sh`expandarg(arg=3D<unavailable>, =
arglist=3D0x0000ffffffffd688, flag=3D<unavailable>) + 112 at =
expand.c:234
>    frame #11: 0x0000000000405f48 sh`evalcommand(cmd=3D<unavailable>, =
flags=3D<unavailable>, backcmd=3D<unavailable>) + 224 at eval.c:863
>    frame #12: 0x0000000000405570 sh`evaltree(n=3D0x0000000040a50bd0, =
flags=3D<unavailable>) + 212 at eval.c:290
>    frame #13: 0x0000000000405550 sh`evaltree(n=3D0x0000000040a50ee0, =
flags=3D<unavailable>) + 180 at eval.c:213
>    frame #14: 0x0000000000411050 sh`cmdloop(top=3D<unavailable>) + 252 =
at main.c:231
>    frame #15: 0x0000000000410ec4 sh`main(argc=3D<unavailable>, =
argv=3D<unavailable>) + 660 at main.c:178
>    frame #16: 0x0000000000402f30 sh`__start + 360
>    frame #17: 0x0000000040434658 ld-elf.so.1`.rtld_start + 24 at =
rtld_start.S:41
>=20
> (lldb) register read
> General Purpose Registers:
>        x0 =3D 0x0000000000000000
>        x1 =3D 0x0000000000000000
>        x2 =3D 0x0000000000000000
>        x3 =3D 0x0000000040573638  libc.so.7`_sigprocmask
>        x4 =3D 0x0000000040a50b2c
>        x5 =3D 0x0000000040a4f4f9
>        x6 =3D 0x0000000000646573
>        x7 =3D 0x0000000000646573
>        x8 =3D 0x00000000000001b1
>        x9 =3D 0x0000000000000000
>       x10 =3D 0x0000000000000000
>       x11 =3D 0x0000000000000018
>       x12 =3D 0x0000000000000004
>       x13 =3D 0x0000000000000427
>       x14 =3D 0x0000000000000001
>       x15 =3D 0x0000000000000000
>       x16 =3D 0x0000000040554e4c  libc.so.7`_thr_kill
>       x17 =3D 0x0000ffffffffe8e0
>       x18 =3D 0x0000000000000000
>       x19 =3D 0x0000000000000006
>       x20 =3D 0x0000000040a36180
>       x21 =3D 0x0000000000000000
>       x22 =3D 0x0000000000000000
>       x23 =3D 0x0000000000434000  sh..bss + 6336
>       x24 =3D 0x0000000000434000  sh..bss + 6336
>       x25 =3D 0x0000000040a36180
>       x26 =3D 0x0000000000000003
>       x27 =3D 0x0000000000434000  sh..bss + 6336
>       x28 =3D 0x0000000040a50b18
>        fp =3D 0x0000ffffffffe910
>        lr =3D 0x0000000040554e18  libc.so.7`__raise + 64 at raise.c:52
>        sp =3D 0x0000ffffffffe8f0
>        pc =3D 0x0000000040554e54  libc.so.7`_thr_kill + 8
>      cpsr =3D 0x80000000


The assembler code path between the fork call and the
stack_check recall is shown as part of the below:

    0x40f1c4 <+144>:  bl     0x414040                  ; flushall at =
output.c:236
    0x40f1c8 <+148>:  bl     0x402920                  ; symbol stub =
for: fork
    0x40f1cc <+152>:  mov    w19, w0
    0x40f1d0 <+156>:  cbz    w19, 0x40f31c             ; <+488> at =
jobs.c:862
. . .
    0x40f31c <+488>:  adrp   x8, 37
    0x40f320 <+492>:  adrp   x9, 37
    0x40f324 <+496>:  ldr    w22, [x8, #0xc08]
    0x40f328 <+500>:  str    wzr, [x8, #0xc08]
    0x40f32c <+504>:  adrp   x8, 37
    0x40f330 <+508>:  add    x8, x8, #0xa00            ; =3D0xa00=20
    0x40f334 <+512>:  str    x8, [x9, #0x978]
    0x40f338 <+516>:  bl     0x41192c                  ; stack_check at =
miscbltin.c:70
    0x40f33c <+520>:  bl     0x40d684                  ; closescript at =
input.c:512
    0x40f340 <+524>:  bl     0x41192c                  ; stack_check at =
miscbltin.c:70

(not much and not stack pointer manipulation).

Based on:

(lldb) print/x main_handler
(jmploc) $42 =3D {
  loc =3D {
    [0] =3D {
      _jb =3D {
        [0] =3D 0x0000ffffffffd900fb5d25837d7ff700
        [1] =3D 0x00000000000000030000ffffffffd9b0
        [2] =3D 0x00000000004320380000000000434a00
        [3] =3D 0x00000000000000000000000000000000
        [4] =3D 0x00000000000000000000000000000000
        [5] =3D 0x00000000000000000000000000000000
        [6] =3D 0x0000000000410c740000ffffffffd950
. . .

so that main's back-pointer to the prior frame is:

0x0000ffffffffd950

the high-address part of the (active) stack region looks
in part like:

0xffffffffd990: 0x0000000000000000 0x0000000040434658
ld-elf.so.1`.rtld_start:
    0x40434654 <+20>: bl     0x2e4c                    ; _rtld at =
rtld.c:339
    0x40434658 <+24>: mov    x8, x0

0xffffffffd950: 0x0000ffffffffd990 0x0000000000402f30
sh`__start:
    0x402f2c <+356>: bl     0x410c30                  ; main at =
main.c:97
    0x402f30 <+360>: bl     0x402ae0                  ; symbol stub for: =
exit


0xffffffffd8f0: 0x0000ffffffffd950 0x0000000000410ec4
sh`main:
    0x410ec0 <+656>: bl     0x410f54                  ; cmdloop at =
main.c:199
    0x410ec4 <+660>: adrp   x8, 36

. . .

0xffffffffce90: 0x0000ffffffffcf40 0x0000000000405954
sh`evaltree:
    0x405950 <+1204>: bl     0x40f134                  ; forkshell at =
jobs.c:840
    0x405954 <+1208>: cbnz   w0, 0x4059dc              ; <+1344> =
[inlined] evalpipe + 300 at eval.c:286

(The 0x0000ffffffffcf40 was recorded by forkshell
at 0xffffffffce90. The next frame-pointer/lr-value
pair is shown later.)

But looking around I found another area of the strings
that had been replaced by stack contents in
higher-addressed memory than just the obviously
forkshell related frames that I've referenced in
earlier notes. Showing both:

. . . (strings) . . .
0xffffffffe1e0: 0x4c454b414d00792d 0x5400323d4c455645
0xffffffffe1f0: 0x5f4c4556454c504f 0x52554749464e4f43
0xffffffffe200: 0x454d554752415f45 0x7273752f3d53544e
0xffffffffe210: 0x726f702f6a626f2f 0x752f6b726f777374
0xffffffffe220: 0x7374726f702f7273 0x612f6c657665642f
0xffffffffe230: 0x6e2d343668637261 0x2d666c652d656e6f
0xffffffffe240: 0x6b726f772f636367 0x332e362d6363672f
0xffffffffe250: 0x69666e6f632f302e 0x742d2d2065727567
0xffffffffe260: 0x61613d7465677261 0x6f6e2d3436686372
0xffffffffe270: 0x2d20666c652d656e 0x656c62617369642d
(frames below)
0xffffffffe280: 0x00000000404aed84 0x0000000000000001
0xffffffffe290: 0x0000ffffffffe590 0x0000000000000000
0xffffffffe2a0: 0x0000000000000000 0x0000000000000723
0xffffffffe2b0: 0x000000004044f800 0x0000ffffffffe370
0xffffffffe2c0: 0x0000ffffffffe360 0x0000000040438f70
0xffffffffe2d0: 0x000000004044f800 0x00000000000001c0
0xffffffffe2e0: 0x00000000404aed80 0x0000000000000000
0xffffffffe2f0: 0x0000ffff00000000 0x0000000040438f70
0xffffffffe300: 0x0000ffffffffe600 0x0000000000000000
0xffffffffe310: 0x0000000000000000 0x0000000000000000
0xffffffffe320: 0x0000000000000001 0x0000ffffffffe3e0
0xffffffffe330: 0x0000ffffffffe590 0x0000000000000000
0xffffffffe340: 0x0000000000000000 0x000000004044d180
0xffffffffe350: 0x0000ffffffffe4c0 0x0000ffffffffe3f0
0xffffffffe360: 0x0000ffffffffe3e0 0x000000004043c66c
0xffffffffe370: 0x00000000404bab29 0x0000000009d690dc
0xffffffffe380: 0x0000fffff2ee705a 0x000000004044e3c0
0xffffffffe390: 0x0000ffff00000001 0x000000004044f800
0xffffffffe3a0: 0x00000000404aed80 0x0000ffffffffe590
0xffffffffe3b0: 0x00000000404bab29 0x00000000404bab23
0xffffffffe3c0: 0x000000004044f800 0x0000ffffffffe7a8
0xffffffffe3d0: 0x0000ffffffffe4c0 0x0000ffffffffe4f0
0xffffffffe3e0: 0x0000ffffffffe450 0x000000004043c4b0
0xffffffffe3f0: 0x00000000404bab29 0x0000000009d690dc
0xffffffffe400: 0x00000000f2ee705a 0x000000004044e3c0
0xffffffffe410: 0x0000ffff00000001 0x000000004044f800
0xffffffffe420: 0x00000000404aed80 0x0000ffffffffe590
0xffffffffe430: 0x000000004044f800 0x0000ffffffffe7a8
0xffffffffe440: 0x000000004044f800 0x0000ffffffffe4f0
0xffffffffe450: 0x0000ffffffffe4e0 0x0000000040437440
0xffffffffe460: 0x0000000000000000 0x0000000000000000
0xffffffffe470: 0x0000000040a50b88 0x0000000000646573
0xffffffffe480: 0x0000000000000010 0x0000000040a50b68
0xffffffffe490: 0x8020080200000000 0x8020080280000000
0xffffffffe4a0: 0x8020080280200800 0x8020080200000000
0xffffffffe4b0: 0x0000000000000000 0x0000000000000000
0xffffffffe4c0: 0x0000000000000000 0x0000000000000000
0xffffffffe4d0: 0x8020080280200802 0x8020080280200802
0xffffffffe4e0: 0x0000ffffffffe570 0x00000000404bab29
0xffffffffe4f0: 0x000000004044d0e5 0x0000000040554e4c
0xffffffffe500: 0x000000004044d0e5 0x0000000000000000
0xffffffffe510: 0x0000008080808080 0xfefefefeff2f2d30
0xffffffffe520: 0x00000000404aed80 0x0000ffffffffe590
0xffffffffe530: 0x0000000000000003 0x0000000040a36180
0xffffffffe540: 0x0000000000434000 0x00000000001675e8
0xffffffffe550: 0x000000004049f000 0x0000ffffffffe590
0xffffffffe560: 0x00000000404ce9d0 0x0000000040554e4c
0xffffffffe570: 0x0000ffffffffe800 0x0000000040436edc
0xffffffffe580: 0x000000004049f000 0x0000ffffffffe5c0
0xffffffffe590: 0x0000000000000001 0x0000000040554dd8
0xffffffffe5a0: 0xfb5d25837d7ff700 0x0000ffffffffe580
0xffffffffe5b0: 0x000000004044f800 0x0000000000002a78
0xffffffffe5c0: 0x0000ffffffffe590 0x0000000000000000
0xffffffffe5d0: 0x0000000000434000 0x0000000000434000
0xffffffffe5e0: 0x0000000040a36180 0x0000000000000003
0xffffffffe5f0: 0x0000000000434000 0x000000004045b8c8
0xffffffffe600: 0x0000ffffffffe800 0x0000000040436de0
. . . (zeros) . . .
0xffffffffe6c0: 0x4b4d00646c697562 0x5f4c454e52454b5f
0xffffffffe6d0: 0x3d534c4f424d5953 0x52504e414d006f6e
0xffffffffe6e0: 0x73752f3d58494645 0x006c61636f6c2f72
. . . (strings) . .  .
0xffffffffe730: 0x6f702f7273752f6b 0x657665642f737472
0xffffffffe740: 0x3668637261612f6c 0x652d656e6f6e2d34
0xffffffffe750: 0x772f6363672d666c 0x6975622e2f6b726f
0xffffffffe760: 0x006c746e692f646c 0x5550435f504d535f
0xffffffffe770: 0x425f4d5000343d53 0x3d474e49444c4955
0xffffffffe780: 0x69646c6975626d70 0x42006e69616d676e
0xffffffffe790: 0x4154534e495f4453 0x50495243535f4c4c
0xffffffffe7a0: 0x6c6174736e693d54 0x000000004044f800
(frames below again)
0xffffffffe7b0: 0x0000000040a50b18 0x0000000000434000
0xffffffffe7c0: 0x0000000000000003 0x0000000040a36180
0xffffffffe7d0: 0x0000000000434000 0x0000000000434000
0xffffffffe7e0: 0x0000000000000000 0x0000000000000000
0xffffffffe7f0: 0x0000000040a36180 0x0000000000000006
0xffffffffe800: 0x0000ffffffffe910 0x00000000404346b4
0xffffffffe810: 0x0000000000000000 0x0000000000000000
0xffffffffe820: 0x8020080280200802 0x8020080280200802
0xffffffffe830: 0x8020080280200800 0x8020080200000000
0xffffffffe840: 0x0000000000000000 0x0000000000000000
0xffffffffe850: 0x0000000000000010 0x0000000040a50b68
0xffffffffe860: 0x8020080200000000 0x8020080280000000
0xffffffffe870: 0x2f2f2f2f2f2f2f2f 0x2f2f2f2f2f2f2f2f
0xffffffffe880: 0x0000000040a50b88 0x0000000000646573
0xffffffffe890: 0x00000000000001b0 0x0000000000000000
0xffffffffe8a0: 0x0000000000646573 0x0000000000646573
0xffffffffe8b0: 0x0000000040a50b2c 0x0000000040a4f4f9
0xffffffffe8c0: 0x0000000000000000 0x0000000040573638
0xffffffffe8d0: 0x0000000000018730 0x0000000000000006
0xffffffffe8e0: 0x00000000406065e8 0x0000000040554e18
0xffffffffe8f0: 0x0000000000018730 0xeb61b0294df66bbd
0xffffffffe900: 0x0000ffffffffe92c 0x0000000000000000
0xffffffffe910: 0x0000ffffffffe950 0x0000000040554d8c
0xffffffffe920: 0x0000000040a50b2c 0xffffffdf40a4f4f9
0xffffffffe930: 0xffffffffffffffff 0x00000000ffffffff
0xffffffffe940: 0x0000000000000000 0x0000000000000000
0xffffffffe950: 0x0000ffffffffe970 0x0000000000411984
0xffffffffe960: 0x0000000000000000 0xeb61b0294df66bbd
0xffffffffe970: 0x0000ffffffffce90 0x000000000040f33c
(back to strings below)
0xffffffffe980: 0x4c45444145520078 0x41545f524f465f46
0xffffffffe990: 0x73752f3d54454752 0x2f6c61636f6c2f72
0xffffffffe9a0: 0x2d34366863726161 0x666c652d656e6f6e
0xffffffffe9b0: 0x6165722f6e69622f 0x534f4800666c6564
0xffffffffe9c0: 0x003d5342494c5f54 0x455341434c415544
0xffffffffe9d0: 0x49505f4f4e00313d 0x5250007365793d45
0xffffffffe9e0: 0x73752f3d58494645 0x006c61636f6c2f72


The frames from this higher-address area are:

(strings at higher addresses near by)

0xffffffffe970: 0x0000ffffffffce90 0x000000000040f33c
sh`forkshell:
    0x40f338 <+516>: bl     0x41192c                  ; stack_check at =
miscbltin.c:70
    0x40f33c <+520>: bl     0x40d684                  ; closescript at =
input.c:512
    0x40f340 <+524>: bl     0x41192c                  ; stack_check at =
miscbltin.c:70

(The 0x0000ffffffffce90 was recorded by stack_check
but at 0xffffffffe970: sp is out of the active stack
region here [bad value].)

0xffffffffe950: 0x0000ffffffffe970 0x0000000000411984
sh`stack_check:
    0x411980 <+84>: bl     0x402770                  ; symbol stub for: =
abort
    0x411984 <+88>: bl     0x402bc0                  ; symbol stub for: =
__stack_chk_fail

0xffffffffe910: 0x0000ffffffffe950 0x0000000040554d8c
libc.so.7`abort:
    0x40554d88 <+80>: bl     0x332c0                   ; symbol stub =
for: acl_init
    0x40554d8c <+84>: mov    x0, x19

0xffffffffe800: 0x0000ffffffffe910 0x00000000404346b4
ld-elf.so.1`_rtld_bind_start:
    0x404346b0 <+64>: bl     0x4d90                    ; _rtld_bind at =
rtld.c:710
    0x404346b4 <+68>: ldp    xzr, x30, [sp, #0xd0]

(There were strings between above and below.)

0xffffffffe600: 0x0000ffffffffe800 0x0000000040436de0
(lldb) dis -s -4*4+0x0000000040436de0
ld-elf.so.1`_rtld_bind:
    0x40436ddc <+76>: bl     0x106d8                   ; sigsetjmp
    0x40436de0 <+80>: cbz    w0, 0x4df0                ; <+96> at =
rtld.c:721

(Nothing links back to 0xffffffffe600, as expected for sigsetjmp
without a matching siglongjmp.)

0xffffffffe570: 0x0000ffffffffe800 0x0000000040436edc
ld-elf.so.1`_rtld_bind:
    0x40436ed8 <+328>: bl     0xaf68                    ; lock_release =
at rtld_lock.c:228
    0x40436edc <+332>: mov    x0, x19

0xffffffffe4e0: 0x0000ffffffffe570 0x00000000404bab29
(0x00000000404bab29 is an odd value; address range
around it seems to not be code based on lldb dis
output. Aslo the core file does not contain 0xaf68
from the above.)

0xffffffffe450: 0x0000ffffffffe4e0 0x0000000040437440
ld-elf.so.1`symlook_default:
    0x4043743c <+172>: bl     0xa434                    ; symlook_global =
at rtld.c:3906
    0x40437440 <+176>: ldr    x20, [x20, #0x258]

0xffffffffe3e0: 0x0000ffffffffe450 0x000000004043c4b0
ld-elf.so.1`symlook_global:
    0x4043c4ac <+120>: bl     0xa5bc                    ; symlook_list =
at rtld.c:4005
    0x4043c4b0 <+124>: cbnz   w0, 0xa4e4                ; <+176> at =
rtld.c:3926

0xffffffffe360: 0x0000ffffffffe3e0 0x000000004043c66c
ld-elf.so.1`symlook_list:
    0x4043c668 <+172>: bl     0x6e44                    ; symlook_obj at =
rtld.c:4083
    0x4043c66c <+176>: cbnz   w0, 0xa698                ; <+220> at =
rtld.c:4014

0xffffffffe2c0: 0x0000ffffffffe360 0x0000000040438f70
ld-elf.so.1`symlook_obj:
    0x40438f6c <+296>: bl     0xa6cc                    ; matched_symbol =
at rtld.c:4132
    0x40438f70 <+300>: and    w8, w0, #0xff

(More strings at nearby smaller addresses.)

Example strings (increasing addresses) in the
area with out-of-place stack frames:

. . . (more strings) . . .
(lldb) print (char*)0xffffffffe1e3
(char *) $20 =3D 0x0000ffffffffe1e3 "MAKELEVEL=3D2"
(lldb) print (char*)0xffffffffe1ef
(char *) $22 =3D 0x0000ffffffffe1ef =
"TOPLEVEL_CONFIGURE_ARGUMENTS=3D/usr/obj/portswork/usr/ports/devel/aarch64=
-none-elf-gcc/work/gcc-6.3.0/configure --target=3Daarch64-none-elf =
--disable\x84\xedJ@"
. . . (stack frames then some zeros) . . .
(lldb) print (char*)0xffffffffe6c0
(char *) $13 =3D 0x0000ffffffffe6c0 "build"
(lldb) print (char*)0xffffffffe6c6
(char *) $19 =3D 0x0000ffffffffe6c6 "MK_KERNEL_SYMBOLS=3Dno"
. . . (more strings) . . .
(lldb) print (char*)0xffffffffe774
(char *) $39 =3D 0x0000ffffffffe774 "PM_BUILDING=3Dpmbuildingmain"
(lldb) print (char*)0xffffffffe78f
(char *) $40 =3D 0x0000ffffffffe78f "BSD_INSTALL_SCRIPT=3Dinstal"
. . . (stack frames) . . .
(lldb) print (char*)0xffffffffe982
(char *) $57 =3D 0x0000ffffffffe982 =
"READELF_FOR_TARGET=3D/usr/local/aarch64-none-elf/bin/readelf"
(lldb) print (char*)0xffffffffe9bd
(char *) $58 =3D 0x0000ffffffffe9bd "HOST_LIBS=3D"
. . . (more strings) . . .


The big sp jump in:

0xffffffffe970: 0x0000ffffffffce90 0x000000000040f33c

leaves some history showing. . .

0xffffffffce40: 0x0000ffffffffce90 0x000000000040f1c8
sh`forkshell:
    0x40f1c4 <+144>: bl     0x414040                  ; flushall at =
output.c:236
    0x40f1c8 <+148>: bl     0x402920                  ; symbol stub for: =
fork
    0x40f1cc <+152>: mov    w19, w0

shows that flushall's recording is before the
odd sp value shows up.


=3D=3D=3D
Mark Millard
markmi at dsl-only.net


> On 2017-Feb-8, at 8:53 PM, Mark Millard <markmi at dsl-only.net> =
wrote:
>=20
>> Another sh core, this one with non-zero "junk" around
>> the sp at the core-dump gives new information. The "junk"
>> is because the SP actually ends up in higher addressed
>> memory than the base frame (when .rtld_start does
>> "bl _rtld"). [Some sh core dumps have different sp
>> relationships than this but this can and does
>> happen.]
>>=20
>> With the below additional evidence I conclude that
>> either the stack pointer was messed up when fork
>> returned for the child path or shortly there after
>> (while sh's forkshell routine was still active).
>>=20
>>=20
>>=20
>>=20
>>=20
>> Supporting details:
>>=20
>> General Purpose Registers:
>> . . .
>>       sp =3D 0x0000ffffffffe600
>>=20
>> The sp =3D 0x0000ffffffffe600 is rather high in memory,
>> in fact outside the stack for what ld-elf.so.1`.rtld_start
>> calls. . .
>>=20
>> 0xffffffffd5b0: 0x0000ffffffffd5f0 0x0000000000402f30
>> 0xffffffffd5c0: 0x0000000000000000 0x0000000000000000
>> 0xffffffffd5d0: 0x0000000000000000 0x0000000000000000
>> 0xffffffffd5e0: 0x0000ffffffffd600 0x0000ffffffffd600
>> 0xffffffffd5f0: 0x0000000000000000 0x0000000040434658
>>=20
>> (Note: 0x0000ffffffffe600-0x1000=3D=3D0xffffffffd5f0+0x10
>> but other core files have widely varying distances.)
>>=20
>> For that last line:
>>=20
>> 0xffffffffd5f0: 0x0000000000000000 0x0000000040434658
>> (lldb) dis -s -4*4+0x0000000040434658
>> ld-elf.so.1`.rtld_start:
>>   0x40434648 <+8>:  sub    sp, sp, #0x10             ; =3D0x10=20
>>   0x4043464c <+12>: mov    x1, sp
>>   0x40434650 <+16>: add    x2, x1, #0x8              ; =3D0x8=20
>>   0x40434654 <+20>: bl     0x2e4c                    ; _rtld at =
rtld.c:339
>>   0x40434658 <+24>: mov    x8, x0
>>=20
>> (0x2e4c is not in the core file.)
>>=20
>> and for the other frame-pointer/lr-value pair:
>>=20
>> 0xffffffffd5b0: 0x0000ffffffffd5f0 0x0000000000402f30
>> (lldb) dis -s -4*4+0x0000000000402f30
>> sh`__start:
>>   0x402f20 <+344>: mov    w0, w21
>>   0x402f24 <+348>: mov    x1, x20
>>   0x402f28 <+352>: mov    x2, x19
>>   0x402f2c <+356>: bl     0x410c14                  ; main at =
main.c:97
>>   0x402f30 <+360>: bl     0x402ae0                  ; symbol stub =
for: exit
>>=20
>> Note: Anything higher addressed in memory than that=20
>> 0xffffffffd5ff I'll say is "higher than the stack
>> region" or some such phrase.
>>=20
>> Yet despite being higher than the stack region
>> there are some stack frames also near by (also
>> higher than the stack region). . .
>>=20
>> An area around the sp =3D 0x0000ffffffffe600 that lldb
>> reported for this core (with some notes used later):
>>=20
>> . . .
>> 0xffffffffe400: 0x6572662d6e776f6e 0x302e323164736265
>> 0xffffffffe410: 0x56454c454b414d00 0x42494c00323d4c45
>> 0xffffffffe420: 0x403d434e49464c45 0x6e69666c6562696c
>> 0xffffffffe430: 0x4f465f444c004063 0x5445475241545f52
>> 0xffffffffe440: 0x6f6c2f7273752f3d 0x637261612f6c6163
>> 0xffffffffe450: 0x656e6f6e2d343668 0x6e69622f666c652d
>> 0xffffffffe460: 0x3d62647000646c2f 0x2f62642f7261762f
>> 0xffffffffe470: 0x74726f7000676b70 0x2f3d72696462645f
>> 0xffffffffe480: 0x702f62642f726176 0x5f4d50007374726f
>> 0xffffffffe490: 0x505f544e45524150 0x657665643d54524f
>> 0xffffffffe4a0: 0x3668637261612f6c 0x652d656e6f6e2d34
>> 0xffffffffe4b0: 0x43006363672d666c 0x5243530063633d43
>> 0xffffffffe4c0: 0x6f6f722f3d545049 0x5f7374726f702f74
>> 0xffffffffe4d0: 0x0000000000000078 0x637261612f737470 ("junk" (text) =
temporarily stops here)
>> 0xffffffffe4e0: 0x00000000004345c8 0x0000000000434000 (beginning of =
what looks like stack frames)
>> 0xffffffffe4f0: 0x0000000000434000 0x0000000040a903e0
>> 0xffffffffe500: 0x0000ffffffffe540 0x000000004054cd94
>> 0xffffffffe510: 0x696d6b72616d2f6c 0x0000000000000000
>> 0xffffffffe520: 0x0000000000000000 0x0000000000000000
>> 0xffffffffe530: 0x0000000000000000 0xe8021690dc1f70b8
>> 0xffffffffe540: 0x00000000004345c8 0x0000000000434000
>> 0xffffffffe550: 0x0000000000434000 0x000000000000000f
>> 0xffffffffe560: 0x0000ffffffffe5a0 0x000000000041aef0
>> 0xffffffffe570: 0x0000000000434c38 0x732f7273752f3a6e
>> 0xffffffffe580: 0x0000000000000001 0x0000000000000005
>> 0xffffffffe590: 0x0000000040a33180 0x0000000000000000
>> 0xffffffffe5a0: 0x0000ffffffffc5c0 0x000000000040f490 ("junk" (text) =
starts again after this line)
>> 0xffffffffe5b0: 0x54494445003d4854 0x41500069763d524f x26, x25 values =
(see below)
>> 0xffffffffe5c0: 0x2f3d534547414b43 0x74726f702f727375 x24, x23 values =
(see below)
>> 0xffffffffe5d0: 0x67616b6361702f73 0x414c46444c007365 x22, x21 values =
(see below)
>> 0xffffffffe5e0: 0x58584300203d5347 0x662d3d5347414c46 x20, x19 values =
(see below)
>> 0xffffffffe5f0: 0x2d74656b63617262 0x31353d6874706564  fp,  lr values =
(see below); pc=3D=3Dlr eventually.
>> 0xffffffffe600: 0x346d673d344d0032 0x73752f3d564e4500
>> 0xffffffffe610: 0x6d2f656d6f682f72 0x732e2f696d6b7261
>> 0xffffffffe620: 0x2f3d647000637268 0x74726f702f727375
>> 0xffffffffe630: 0x4441524750550073 0x703d4c4f4f545f45
>> 0xffffffffe640: 0x657473616d74726f 0x5254524f46470072
>> 0xffffffffe650: 0x4552534f003d4e41 0x4f00302e32313d4c
>> 0xffffffffe660: 0x752f3d445750444c 0x702f6a626f2f7273
>> . . .
>>=20
>> So at this point we have that the stack pointer was
>> messed up somewhat prior to the core-dump.
>>=20
>> x19-x26 in the below are from the locations indicated
>> to the side above:
>>=20
>> General Purpose Registers:
>>       x0 =3D 0x0000000000000000
>>       x1 =3D 0x0000000000000000
>>       x2 =3D 0x0000000000000000
>>       x3 =3D 0x00000000405735c8  libc.so.7`__sys_sigaction
>>       x4 =3D 0x0000000000000090
>>       x5 =3D 0x2080002000200000
>>       x6 =3D 0x0000000000434c28  sh..bss + 9448
>>       x7 =3D 0x00000000000c590d
>>       x8 =3D 0x0000000000000000
>>       x9 =3D 0x0000000000000000
>>      x10 =3D 0x0000000000434000  sh..bss + 6336
>>      x11 =3D 0x0000000000000000
>>      x12 =3D 0x0000000000434c38  sh..bss + 9464
>>      x13 =3D 0x0000000000000001
>>      x14 =3D 0x0000000000000063
>>      x15 =3D 0x0000000000000010
>>      x16 =3D 0x0000000000432280 =20
>>      x17 =3D 0x0000000040573554  libc.so.7`sigaction at =
sigaction.c:49
>>      x18 =3D 0x0000000000000000
>>      x19 =3D 0x662d3d5347414c46
>>      x20 =3D 0x58584300203d5347
>>      x21 =3D 0x414c46444c007365
>>      x22 =3D 0x67616b6361702f73
>>      x23 =3D 0x74726f702f727375
>>      x24 =3D 0x2f3d534547414b43
>>      x25 =3D 0x41500069763d524f
>>      x26 =3D 0x54494445003d4854
>>      x27 =3D 0x0000ffffffffc658
>>      x28 =3D 0x0000000000000000
>>       fp =3D 0x2d74656b63617262
>>       lr =3D 0x31353d6874706564
>>       sp =3D 0x0000ffffffffe600
>>       pc =3D 0x31353d6874706564
>>     cpsr =3D 0x20000000
>>=20
>> Note the:
>>=20
>> 0xffffffffe5a0: 0x0000ffffffffc5c0 0x000000000040f490
>>=20
>> is somewhat before 0x0000ffffffffe600. It looks to be
>> a frame-pointer/lr-value pair. (And the 0x0000ffffffffc5c0
>> does point to a frame-pointer/lr-value pair that is
>> part of a coherent chain of them inside the stack region.)
>>=20
>> It seems likely that despite the long distance
>> framepointer reference in the fp/lr value pair:
>>=20
>> 0xffffffffe5a0: 0x0000ffffffffc5c0 0x000000000040f490
>>=20
>> that sp =3D 0x0000ffffffffe600 was the result of an
>> increment by a small fixed amount from an sp near
>> 0xffffffffe5a0, such as by code like the following
>> when forkshell tries to return:
>>=20
>> sh`forkshell:
>>   0x40f520 <+1004>: ldp    x29, x30, [sp, #0x40]
>>   0x40f524 <+1008>: ldp    x20, x19, [sp, #0x30]
>>   0x40f528 <+1012>: ldp    x22, x21, [sp, #0x20]
>>   0x40f52c <+1016>: ldp    x24, x23, [sp, #0x10]
>>   0x40f530 <+1020>: ldp    x26, x25, [sp], #0x50
>>   0x40f534 <+1024>: ret   =20
>>=20
>> So the prior is sp=3D0xffffffffe5b0 for the above code.
>> Also note that for SP=3D0xffffffffe5b0 initially that
>> code would fill in x19-x26 as they were actually
>> filled in: solid evidence of the sp that exit code
>> started with.
>>=20
>> Note that forkshell had started with:
>>=20
>> sh`forkshell:
>>   0x40f134 <+0>:  stp    x26, x25, [sp, #-0x50]!
>>   0x40f138 <+4>:  stp    x24, x23, [sp, #0x10]
>>   0x40f13c <+8>:  stp    x22, x21, [sp, #0x20]
>>   0x40f140 <+12>: stp    x20, x19, [sp, #0x30]
>>   0x40f144 <+16>: stp    x29, x30, [sp, #0x40]
>>   0x40f148 <+20>: add    x29, sp, #0x40            ; =3D0x40=20
>>=20
>> And that indicates that sp had a big change after:
>>=20
>>   0x40f148 <+20>: add    x29, sp, #0x40            ; =3D0x40=20
>>=20
>> in order for x29=3D0x0000ffffffffc5c0 to have later
>> been written out as it was at 0xffffffffe5a0.
>>=20
>> But by the freejob call (that is in forkshell's child
>> process path) that is indicated by the below the
>> sp had changed to be higher than the stack region:
>>=20
>> 0xffffffffe5a0: 0x0000ffffffffc5c0 0x000000000040f490
>> (lldb) dis -s -4*4+0x000000000040f490
>> sh`forkshell:
>>   0x40f480 <+844>: ldrb   w8, [x20, #0x21]
>>   0x40f484 <+848>: cbz    w8, 0x40f490              ; <+860> at =
jobs.c:907
>>   0x40f488 <+852>: mov    x0, x20
>>   0x40f48c <+856>: bl     0x40e65c                  ; freejob at =
jobs.c:463
>>   0x40f490 <+860>: add    x20, x20, #0x30           ; =3D0x30=20
>> . . .
>>=20
>> where freejob started with:
>>=20
>> (lldb) dis -s freejob
>> sh`freejob:
>>   0x40e65c <+0>:  str    x23, [sp, #-0x40]!
>>   0x40e660 <+4>:  stp    x22, x21, [sp, #0x10]
>>   0x40e664 <+8>:  stp    x20, x19, [sp, #0x20]
>>   0x40e668 <+12>: stp    x29, x30, [sp, #0x30]
>>   0x40e66c <+16>: add    x29, sp, #0x30            ; =3D0x30=20
>> . . .
>>=20
>> and the contained:
>>=20
>>   0x40e668 <+12>: stp    x29, x30, [sp, #0x30]
>>=20
>> wrote the frame-pointer/lr-value pair:
>>=20
>> 0xffffffffe5a0: 0x0000ffffffffc5c0 0x000000000040f490
>>=20
>> after freejob was called. freejob returns with:
>>=20
>> (lldb) dis -s freejob -c 128
>> sh`freejob:
>> . . .
>>   0x40e748 <+236>: ldp    x29, x30, [sp, #0x30]
>>   0x40e74c <+240>: ldp    x20, x19, [sp, #0x20]
>>   0x40e750 <+244>: ldp    x22, x21, [sp, #0x10]
>>   0x40e754 <+248>: ldr    x23, [sp], #0x40
>>   0x40e758 <+252>: ret   =20
>> . . .
>>=20
>> The lr-value from:
>>=20
>> 0xffffffffc5c0: 0x0000ffffffffc8f0 0x0000000000406648
>>=20
>> refers to:
>>=20
>> sh`evalcommand:
>>   0x406644 <+2012>: bl     0x40f134                  ; forkshell at =
jobs.c:838
>>   0x406648 <+2016>: cbz    w0, 0x40666c              ; <+2052> at =
eval.c:1175
>>=20
>> as expected. This is in the stack region.
>>=20
>> There is evidence of the following frame-pointer/lr-value
>> pair still in the stack-region from just before the fork but
>> while forkshell was active and before the big change in
>> sp value:
>>=20
>> 0xffffffffc570: 0x0000ffffffffc5c0 0x000000000040f1c8
>> sh`forkshell:
>>   0x40f1c4 <+144>: bl     0x413fc8                  ; flushall at =
output.c:236
>>   0x40f1c8 <+148>: bl     0x402920                  ; symbol stub =
for: fork
>>=20
>> The parent process did not crash and so there is no evdence
>> that its sp value was ever wrong. So going into fork
>> things seem to have been okay.
>>=20
>> So far it does not appear to me that there is information
>> left for inside or after the fork but before the freejob call
>> on the child process path. So the fork itself might have
>> returned with the wrong sp value or the problem might have
>> occurred a little later.
>>=20
>> As far as the bad sp values in this example core file. . .
>>=20
>> The content of what I've historically called "junk"
>> areas that are actually outside the stack region are
>> interesting:
>>=20
>> . . .
>> 0xffffffffe400: 0x6572662d6e776f6e 0x302e323164736265
>> 0xffffffffe410: 0x56454c454b414d00 0x42494c00323d4c45
>> 0xffffffffe420: 0x403d434e49464c45 0x6e69666c6562696c
>> 0xffffffffe430: 0x4f465f444c004063 0x5445475241545f52
>> 0xffffffffe440: 0x6f6c2f7273752f3d 0x637261612f6c6163
>> 0xffffffffe450: 0x656e6f6e2d343668 0x6e69622f666c652d
>> 0xffffffffe460: 0x3d62647000646c2f 0x2f62642f7261762f
>> 0xffffffffe470: 0x74726f7000676b70 0x2f3d72696462645f
>> 0xffffffffe480: 0x702f62642f726176 0x5f4d50007374726f
>> 0xffffffffe490: 0x505f544e45524150 0x657665643d54524f
>> 0xffffffffe4a0: 0x3668637261612f6c 0x652d656e6f6e2d34
>> 0xffffffffe4b0: 0x43006363672d666c 0x5243530063633d43
>> 0xffffffffe4c0: 0x6f6f722f3d545049 0x5f7374726f702f74
>> 0xffffffffe4d0: 0x0000000000000078 0x637261612f737470 ("junk" (text) =
temporarily stops here)
>>=20
>> In other words ('\0' terminated strings):
>>=20
>> . . .
>> (lldb) print (char*)0xffffffffe400
>> (char *) $67 =3D 0x0000ffffffffe400 "nown-freebsd12.0"
>> (lldb) print (char*)0xffffffffe411
>> (char *) $66 =3D 0x0000ffffffffe411 "MAKELEVEL=3D2"
>> (lldb) print (char*)0xffffffffe433
>> (char *) $68 =3D 0x0000ffffffffe433 =
"LD_FOR_TARGET=3D/usr/local/aarch64-none-elf/bin/ld"
>> (lldb) print (char*)0xffffffffe464
>> (char *) $69 =3D 0x0000ffffffffe464 "pdb=3D/var/db/pkg"
>> (lldb) print (char*)0xffffffffe474
>> (char *) $71 =3D 0x0000ffffffffe474 "port_dbdir=3D/var/db/ports"
>> (lldb) print (char*)0xffffffffe48d
>> (char *) $60 =3D 0x0000ffffffffe48d =
"PM_PARENT_PORT=3Ddevel/aarch64-none-elf-gcc"
>> (lldb) print (char*)0xffffffffe4b7
>> (char *) $29 =3D 0x0000ffffffffe4b7 "CC=3Dcc"
>> (lldb) print (char*)0xffffffffe4bd
>> (char *) $30 =3D 0x0000ffffffffe4bd "SCRIPT=3D/root/ports_x"
>>=20
>> As for:
>>=20
>> 0xffffffffe5a0: 0x0000ffffffffc5c0 0x000000000040f490 ("junk" (text) =
starts again after this line)
>> 0xffffffffe5b0: 0x54494445003d4854 0x41500069763d524f x26, x25 values =
(see below)
>> 0xffffffffe5c0: 0x2f3d534547414b43 0x74726f702f727375 x24, x23 values =
(see below)
>> 0xffffffffe5d0: 0x67616b6361702f73 0x414c46444c007365 x22, x21 values =
(see below)
>> 0xffffffffe5e0: 0x58584300203d5347 0x662d3d5347414c46 x20, x19 values =
(see below)
>> 0xffffffffe5f0: 0x2d74656b63617262 0x31353d6874706564  fp,  lr values =
(see below); pc=3D=3Dlr as well.
>> 0xffffffffe600: 0x346d673d344d0032 0x73752f3d564e4500
>> 0xffffffffe610: 0x6d2f656d6f682f72 0x732e2f696d6b7261
>> 0xffffffffe620: 0x2f3d647000637268 0x74726f702f727375
>> 0xffffffffe630: 0x4441524750550073 0x703d4c4f4f545f45
>> 0xffffffffe640: 0x657473616d74726f 0x5254524f46470072
>> 0xffffffffe650: 0x4552534f003d4e41 0x4f00302e32313d4c
>> 0xffffffffe660: 0x752f3d445750444c 0x702f6a626f2f7273
>> . . .
>>=20
>> In other words:
>>=20
>> (lldb) print (char*)0xffffffffe5b0
>> (char *) $72 =3D 0x0000ffffffffe5b0 "TH=3D"
>> (The above is likely missing its beginning, having been replaced
>> by the frame-popinter/lr-value pair.)
>> (lldb) print (char*)0xffffffffe5be
>> (char *) $73 =3D 0x0000ffffffffe5be "PACKAGES=3D/usr/ports/packages"
>> (lldb) print (char*)0xffffffffe5db
>> (char *) $74 =3D 0x0000ffffffffe5db "LDFLAGS=3D "
>> (lldb) print (char*)0xffffffffe5e5
>> (char *) $75 =3D 0x0000ffffffffe5e5 "CXXFLAGS=3D-fbracket-depth=3D512"
>> (lldb) print (char*)0xffffffffe602
>> (char *) $76 =3D 0x0000ffffffffe602 "M4=3Dgm4"
>> (lldb) print (char*)0xffffffffe624
>> (char *) $77 =3D 0x0000ffffffffe624 "pd=3D/usr/ports"
>> (lldb) print (char*)0xffffffffe632
>> (char *) $79 =3D 0x0000ffffffffe632 "UPGRADE_TOOL=3Dportmaster"
>> (lldb) print (char*)0xffffffffe64a
>> (char *) $81 =3D 0x0000ffffffffe64a "GFORTRAN=3D"
>> (lldb) print (char*)0xffffffffe654
>> (char *) $82 =3D 0x0000ffffffffe654 "OSREL=3D12.0"
>> (lldb) print (char*)0xffffffffe65f
>> (char *) $83 =3D 0x0000ffffffffe65f =
"OLDPWD=3D/usr/obj/portswork/usr/ports/devel/aarch64-none-elf-gcc/work/.bu=
ild"
>> . . .
>>=20
>> So the middle range with the stack frames:
>>=20
>> 0xffffffffe4d0: 0x0000000000000078 0x637261612f737470 ("junk" (text) =
temporarily stops here)
>> 0xffffffffe4e0: 0x00000000004345c8 0x0000000000434000
>> 0xffffffffe4f0: 0x0000000000434000 0x0000000040a903e0
>> 0xffffffffe500: 0x0000ffffffffe540 0x000000004054cd94
>> 0xffffffffe510: 0x696d6b72616d2f6c 0x0000000000000000
>> 0xffffffffe520: 0x0000000000000000 0x0000000000000000
>> 0xffffffffe530: 0x0000000000000000 0xe8021690dc1f70b8
>> 0xffffffffe540: 0x00000000004345c8 0x0000000000434000
>> 0xffffffffe550: 0x0000000000434000 0x000000000000000f
>> 0xffffffffe560: 0x0000ffffffffe5a0 0x000000000041aef0
>> 0xffffffffe570: 0x0000000000434c38 0x732f7273752f3a6e
>> 0xffffffffe580: 0x0000000000000001 0x0000000000000005
>> 0xffffffffe590: 0x0000000040a33180 0x0000000000000000
>> 0xffffffffe5a0: 0x0000ffffffffc5c0 0x000000000040f490 ("junk" (text) =
starts again after this line)
>>=20
>> has stomped on strings that are outside the stack
>> region. (The stack frames are the actual junk.)
>>=20
>>=20
>> =3D=3D=3D
>> Mark Millard
>> markmi at dsl-only.net
>=20
> On 2017-Feb-7, at 2:44 AM, Mark Millard <markmi at dsl-only.net> =
wrote:
>=20
>> Another core. The register read reported:
>>=20
>>      fp =3D 0x0000000000000000
>>      lr =3D 0x0000000000000000
>>      sp =3D 0x0000fffffffee630
>>      pc =3D 0x0000000000000000
>>=20
>> And looking around (most nested to outer most):
>>=20
>> 0xfffffffee530: 0x0000fffffffee570 0x000000004054cd94
>> libc.so.7`__free:
>>  0x4054cd90 <+144>: bl     0xad6fc                   ; ifree at =
jemalloc_jemalloc.c:1876
>>  0x4054cd94 <+148>: adrp   x9, 185
>> 0xfffffffee570: 0x0000fffffffee590 0x0000000000411300
>> sh`ckfree:
>>  0x4112fc <+28>: bl     0x4027e0                  ; symbol stub for: =
free
>>  0x411300 <+32>: ldr    x8, [x19, #0x970]
>> 0xfffffffee590: 0x0000fffffffee5d0 0x000000000040e6e8
>> sh`freejob:
>>  0x40e6e4 <+136>: bl     0x4112e0                  ; ckfree at =
memalloc.c:86
>>  0x40e6e8 <+140>: adrp   x8, 38
>> 0xfffffffee5d0: 0x0000ffffffffcaa0 0x000000000040f490
>> sh`forkshell:
>>  0x40f48c <+856>: bl     0x40e65c                  ; freejob at =
jobs.c:463
>>  0x40f490 <+860>: add    x20, x20, #0x30           ; =3D0x30
>> (Note that sp=3D=3D0x0000fffffffee630 is fairly close to =
0xfffffffee5d0.)
>>=20
>> (sizable frame jump from 0xfffffffee5d0 to 0x0000ffffffffcaa0, size =
0xE4D0=3D=3D58576 bytes)
>> (0xfffffffee5e0 up to 0xffffffffa890 (not inclusive) are all =
0x0000000000000000)
>> (The prior trace example did not have such a large area.)
>>=20
>> 0xffffffffca50: 0x0000ffffffffcaa0 0x000000000040f1c8
>> sh`forkshell:
>>  0x40f1c4 <+144>: bl     0x413fc8                  ; flushall at =
output.c:236
>>  0x40f1c8 <+148>: bl     0x402920                  ; symbol stub for: =
fork
>>  0x40f1cc <+152>: mov    w19, w0
>> (flushall a voids returning to 0x40f1c8 directly, instead making
>> the last routine it calls return there instead of to flushall.)
>>=20
>> 0xffffffffcaa0: 0x0000ffffffffcb50 0x0000000000405954
>> sh`evaltree:
>>  0x405950 <+1204>: bl     0x40f134                  ; forkshell at =
jobs.c:838
>>  0x405954 <+1208>: cbnz   w0, 0x4059dc              ; <+1344> =
[inlined] evalpipe + 300 at eval.c:286
>> 0xffffffffcb50: 0x0000ffffffffcde0 0x0000000000406e28
>> sh`evalbackcmd:
>>  0x406e24 <+336>: bl     0x40549c                  ; evaltree at =
eval.c:193
>>  0x406e28 <+340>: ldur   w0, [x29, #-0x5c]
>> 0xffffffffcde0: 0x0000ffffffffcf90 0x0000000000409324
>> sh`argstr:
>>  0x409320 <+428>: bl     0x406cd4                  ; evalbackcmd at =
eval.c:646
>>  0x409324 <+432>: mov    x0, x26
>> 0xffffffffcf90: 0x0000ffffffffcff0 0x0000000000408fa8
>> sh`expandarg:
>>  0x408fa4 <+108>: bl     0x409174                  ; argstr at =
expand.c:267
>>  0x408fa8 <+112>: cbz    x19, 0x409020             ; <+232> at =
expand.c:236
>> 0xffffffffcff0: 0x0000ffffffffd320 0x0000000000405f48
>> sh`evalcommand:
>>  0x405f44 <+220>: bl     0x408f38                  ; expandarg at =
expand.c:225
>>  0x405f48 <+224>: ldr    x24, [x24, #0x8]
>>=20
>> 0xffffffffd0f0: 0x0000ffffffffd320 0x00000000004068e4
>> sh`evalcommand:
>>  0x4068e0 <+2680>: bl     0x402be0                  ; symbol stub =
for: _setjmp
>>  0x4068e4 <+2684>: cbz    w0, 0x406a04              ; <+2972> at =
eval.c:1101
>>=20
>> 0xffffffffd320: 0x0000ffffffffd3d0 0x0000000000405570
>> sh`evaltree:
>>  0x40556c <+208>: bl     0x405e68                  ; evalcommand at =
eval.c:825
>>  0x405570 <+212>: b      0x405a9c                  ; <+1536> at =
eval.c:623
>>  0x405574 <+216>: ldr    x8, [x24, #0x8]
>> 0xffffffffd3d0: 0x0000ffffffffd480 0x0000000000405550
>> sh`cmdloop:
>>  0x411030 <+248>: bl     0x40549c                  ; evaltree at =
eval.c:193
>>  0x411034 <+252>: mov    w27, wzr
>> 0xffffffffd480: 0x0000ffffffffd7b0 0x00000000004067d0
>> sh`evalcommand:
>>  0x4067cc <+2404>: bl     0x40549c                  ; evaltree at =
eval.c:193
>>  0x4067d0 <+2408>: ldr    x8, [x24, #0x970]
>> 0xffffffffd7b0: 0x0000ffffffffd860 0x0000000000405570
>> sh`evaltree:
>>  0x40556c <+208>: bl     0x405e68                  ; evalcommand at =
eval.c:825
>>  0x405570 <+212>: b      0x405a9c                  ; <+1536> at =
eval.c:623
>>  0x405574 <+216>: ldr    x8, [x24, #0x8]
>> 0xffffffffd860: 0x0000ffffffffd910 0x0000000000405550
>> sh`evaltree:
>>  0x40554c <+176>: bl     0x40549c                  ; <+0> at =
eval.c:193
>>  0x405550 <+180>: ldr    w8, [x22, #0x994]
>> 0xffffffffd910: 0x0000ffffffffdc40 0x00000000004067d0
>> sh`evalcommand:
>>  0x4067cc <+2404>: bl     0x40549c                  ; evaltree at =
eval.c:193
>>  0x4067d0 <+2408>: ldr    x8, [x24, #0x970]
>>=20
>> 0xffffffffda10: 0x0000ffffffffdc40 0x000000000040673c
>> sh`evalcommand:
>>  0x406738 <+2256>: bl     0x402be0                  ; symbol stub =
for: _setjmp
>>  0x40673c <+2260>: cbnz   w0, 0x406c60              ; <+3576> at =
eval.c:1042
>>=20
>> 0xffffffffdc40: 0x0000ffffffffdcf0 0x0000000000405570
>> sh`evaltree:
>>  0x40556c <+208>: bl     0x405e68                  ; evalcommand at =
eval.c:825
>>  0x405570 <+212>: b      0x405a9c                  ; <+1536> at =
eval.c:623
>>  0x405574 <+216>: ldr    x8, [x24, #0x8]
>> 0xffffffffdcf0: 0x0000ffffffffdd70 0x0000000000411034
>> sh`cmdloop:
>>  0x411030 <+248>: bl     0x40549c                  ; evaltree at =
eval.c:193
>>  0x411034 <+252>: mov    w27, wzr
>> 0xffffffffdd70: 0x0000ffffffffddd0 0x0000000000410ea8
>> sh`main:
>>  0x410ea4 <+656>: bl     0x410f38                  ; cmdloop at =
main.c:199
>>  0x410ea8 <+660>: adrp   x8, 36
>> 0xffffffffddd0: 0x0000ffffffffde10 0x0000000000402f30
>> sh`__start:
>>  0x402f2c <+356>: bl     0x410c14                  ; main at =
main.c:97
>>  0x402f30 <+360>: bl     0x402ae0                  ; symbol stub for: =
exit
>>=20
>> (_rtld is not in the core file)
>> 0xffffffffde10: 0x0000000000000000 0x0000000040434658
>> ld-elf.so.1`.rtld_start:
>>  0x40434654 <+20>: bl     0x2e4c                    ; _rtld at =
rtld.c:339
>>  0x40434658 <+24>: mov    x8, x0
>>=20
>> So again the problem is associated with the forkshell/fork/freejob
>> related materials.
>>=20
>> (I mistakenly left out the evalcommand/_setjmp material
>> when I made the trace in the below. The same for flushall.
>> I've inserted some of that below, at least for
>> the flushall context.)
>=20
> On 2017-Feb-6, at 8:05 PM, Mark Millard <markmi@dsl-only.net> wrote:
>=20
>> [I got a lucky sh core dump with more stack context/content
>> available to look at for an example sh crash. This helps
>> narrow things down.]
>>=20
>> On 2017-Feb-5, at 1:12 AM, Mark Millard <markmi at dsl-only.net> =
wrote:
>>=20
>>> [Top post of a new result.]
>>>=20
>>> Using lldb to look at the memory for the stack around
>>> sh failure points has some apparently fixed structure.
>>> Example:
>>>=20
>>> . . . junk values . . .
>>> 0xffffffffe4d0: 0x0000000000000078 0x637261612f737470
>>> 0xffffffffe4e0: 0x00000000004345c8 0x0000000000434000
>>> 0xffffffffe4f0: 0x0000000000434000 0x0000000040a903e0
>>> 0xffffffffe500: 0x0000ffffffffe540 0x000000004054cd94
>>> 0xffffffffe510: 0x696d6b72616d2f6c 0x0000000000000000
>>> 0xffffffffe520: 0x0000000000000000 0x0000000000000000
>>> 0xffffffffe530: 0x0000000000000000 0xe8021690dc1f70b8
>>> 0xffffffffe540: 0x00000000004345c8 0x0000000000434000
>>> 0xffffffffe550: 0x0000000000434000 0x000000000000000f
>>> 0xffffffffe560: 0x0000ffffffffe5a0 0x000000000041aef0
>>> 0xffffffffe570: 0x0000000000434c38 0x732f7273752f3a6e
>>> 0xffffffffe580: 0x0000000000000001 0x0000000000000005
>>> 0xffffffffe590: 0x0000000040a33180 0x0000000000000000
>>> 0xffffffffe5a0: 0x0000ffffffffc5c0 0x000000000040f490
>>> . . .  junk values . . .
>>=20
>> I got lucky and got a core dump that did not have the junk
>> areas and could trace the stack's frame pointer chain
>> between main and libc.so.7`__free (through freejob along
>> the way). See later.
>>=20
>>> where "register read" showed:
>>>=20
>>>    sp =3D 0x0000ffffffffe600
>>>=20
>>> (The distance and direction to the last non-junk line
>>> from the reported sp in each example is the same.)
>>> Looking around that 0x000000000040f490:
>>>=20
>>> 0x40f48c: 0x97fffc74   bl     0x40e65c                  ; freejob at =
jobs.c:463
>>> 0x40f490: 0x9100c294   add    x20, x20, #0x30           ; =3D0x30=20
>>>=20
>>> It is the same address and code in each case.
>>=20
>> I should have originally noted that 0x40f48c is in
>> forkshell, along the child process code-path:
>>=20
>> pid_t
>> forkshell(struct job *jp, union node *n, int mode)
>> {
>> . . . (see /usr/src/bin/sh/jobs.c for this) . . .
>>     INTOFF;
>>     if (mode =3D=3D FORK_BG && (jp =3D=3D NULL || jp->nprocs =3D=3D =
0))
>>             checkzombies();
>>     flushall();
>>     pid =3D fork();
>>     if (pid =3D=3D -1) {
>>             TRACE(("Fork failed, errno=3D%d\n", errno));
>>             INTON;
>>             error("Cannot fork: %s", strerror(errno));
>>     }
>>     if (pid =3D=3D 0) {
>>             struct job *p;
>>             int wasroot;
>>             int i;
>>=20
>>             TRACE(("Child shell %d\n", (int)getpid()));
>>             wasroot =3D rootshell;
>>             rootshell =3D 0;
>>             handler =3D &main_handler;
>>             closescript();
>>             INTON;
>>             forcelocal =3D 0;
>>             clear_traps();
>> #if JOBS
>> . . . (see /usr/src/bin/sh/jobs.c for this) . . .
>> #else
>> . . . (see /usr/src/bin/sh/jobs.c for this) . . .
>> #endif
>>             INTOFF;
>>             for (i =3D njobs, p =3D jobtab ; --i >=3D 0 ; p++)
>>                     if (p->used)
>>                             freejob(p);
>>             INTON;
>>             if (wasroot && iflag) {
>>                     setsignal(SIGINT);
>>                     setsignal(SIGQUIT);
>>                     setsignal(SIGTERM);
>>             }
>>             return pid;
>>     }
>> . . . (see /usr/src/bin/sh/jobs.c for this) . . .
>>=20
>>> Sometimes the junk values are all zeros over sizable
>>> distances. Sometimes the sizable areas seem to have
>>> random data.
>>>=20
>>> /usr/src/bin/sh/jobs.c 's freejobs is:
>>>=20
>>> static void
>>> freejob(struct job *jp)
>>> {
>>>    struct procstat *ps;
>>>    int i;
>>>=20
>>>    INTOFF;
>>>    if (bgjob =3D=3D jp)
>>>            bgjob =3D NULL;
>>>    for (i =3D jp->nprocs, ps =3D jp->ps ; --i >=3D 0 ; ps++) {
>>>            if (ps->cmd !=3D nullstr)
>>>                    ckfree(ps->cmd);
>>>    }
>>>    if (jp->ps !=3D &jp->ps0)
>>>            ckfree(jp->ps);
>>>    jp->used =3D 0;
>>> #if JOBS
>>>    deljob(jp);
>>> #endif
>>>    INTON;
>>> }
>>>=20
>>> /usr/src/bin/sh/error.h defines INTOFF and INTON:
>>>=20
>>> #define EXINT 0         /* SIGINT received */
>>> #define EXERROR 1       /* a generic error */
>>> #define EXEXEC 2        /* command execution failed */
>>> #define EXEXIT 3        /* call exitshell(exitstatus) */
>>>=20
>>> . . .
>>>=20
>>> extern struct jmploc *handler;
>>> extern volatile sig_atomic_t exception;
>>>=20
>>> . . .
>>>=20
>>> extern volatile sig_atomic_t suppressint;
>>> extern volatile sig_atomic_t intpending;
>>>=20
>>> #define INTOFF suppressint++
>>> #define INTON { if (--suppressint =3D=3D 0 && intpending) onint(); }
>>> #define is_int_on() suppressint
>>> #define SETINTON(s) suppressint =3D (s)
>>> #define FORCEINTON {suppressint =3D 0; if (intpending) onint();}
>>> #define SET_PENDING_INT intpending =3D 1
>>> #define CLEAR_PENDING_INT intpending =3D 0
>>> #define int_pending() intpending
>>>=20
>>> void exraise(int) __dead2;
>>> void onint(void) __dead2;
>>>=20
>>> /usr/src/bin/sh/error.c hAS:
>>>=20
>>> void
>>> exraise(int e)
>>> {
>>>    INTOFF;
>>>    if (handler =3D=3D NULL)
>>>            abort();
>>>    exception =3D e;
>>>    longjmp(handler->loc, 1);
>>> }
>>> . . .
>>> void
>>> onint(void)
>>> {
>>>    sigset_t sigs;
>>>=20
>>>    intpending =3D 0;
>>>    sigemptyset(&sigs);
>>>    sigprocmask(SIG_SETMASK, &sigs, NULL);
>>>=20
>>>    /*
>>>     * This doesn't seem to be needed, since main() emits a newline.
>>>     */
>>> #if 0
>>>    if (tcgetpgrp(0) =3D=3D getpid())
>>>            write(STDERR_FILENO, "\n", 1);
>>> #endif
>>>    if (rootshell && iflag)
>>>            exraise(EXINT);
>>>    else {
>>>            signal(SIGINT, SIG_DFL);
>>>            kill(getpid(), SIGINT);
>>>            _exit(128 + SIGINT);
>>>    }
>>> }
>>>=20
>>> # grep setjmp /usr/src/bin/sh/*
>>> /usr/src/bin/sh/TOUR:so I implement it using setjmp and longjmp.  =
The global variable
>>> /usr/src/bin/sh/error.h:#include <setjmp.h>
>>> /usr/src/bin/sh/error.h: * BSD setjmp saves the signal mask, which =
violates ANSI C and takes time,
>>> /usr/src/bin/sh/error.h: * so we use _setjmp instead.
>>> /usr/src/bin/sh/error.h:#define setjmp(jmploc)	_setjmp(jmploc)
>>> /usr/src/bin/sh/eval.c:	if (setjmp(jmploc.loc)) {
>>> /usr/src/bin/sh/eval.c:	if (setjmp(jmploc.loc))
>>> /usr/src/bin/sh/eval.c:		if (setjmp(jmploc.loc)) {
>>> /usr/src/bin/sh/eval.c:		if (setjmp(jmploc.loc)) {
>>> /usr/src/bin/sh/eval.c:		if (setjmp(jmploc.loc)) {
>>> /usr/src/bin/sh/histedit.c:		if (setjmp(jmploc.loc)) {
>>> /usr/src/bin/sh/jobs.c:		if (setjmp(jmploc.loc))
>>> /usr/src/bin/sh/main.c: * commands.  The setjmp call sets up the =
location to jump to when an
>>> /usr/src/bin/sh/main.c:	if (setjmp(main_handler.loc)) {
>>> /usr/src/bin/sh/parser.c:	if (setjmp(jmploc.loc)) {
>>> /usr/src/bin/sh/parser.c:	if (!setjmp(jmploc.loc)) {
>>> /usr/src/bin/sh/trap.c:	if (!setjmp(loc1.loc)) {
>>> /usr/src/bin/sh/trap.c:	if (!setjmp(loc2.loc)) {
>>> /usr/src/bin/sh/var.c:	if (setjmp(jmploc.loc))
>>=20
>> Here is the call chain that I was able to trace
>> in the newer core dump:
>> (most nested first to least nested last;
>> showing frame pointer and lr value pairs
>> and calls/return-places)
>>=20
>> (ifree is not in the core file)
>> 0xffffffffcc60: 0x0000ffffffffcca0 0x000000004054cd94
>> libc.so.7`__free:
>> 0x4054cd90 <+144>: bl     0xad6fc                   ; ifree at =
jemalloc_jemalloc.c:1876
>> 0x4054cd94 <+148>: adrp   x9, 185
>> 0xffffffffcca0: 0x0000ffffffffccc0 0x0000000000411300
>> sh`ckfree:
>> 0x4112fc <+28>: bl     0x4027e0                  ; symbol stub for: =
free
>> 0x411300 <+32>: ldr    x8, [x19, #0x970]
>> 0xffffffffccc0: 0x0000ffffffffcd00 0x000000000040e6e8
>> sh`freejob:
>> 0x40e6e4 <+136>: bl     0x4112e0                  ; ckfree at =
memalloc.c:86
>> 0x40e6e8 <+140>: adrp   x8, 38
>> 0xffffffffcd00: 0x0000ffffffffce20 0x000000000040f490
>> sh`forkshell:
>> 0x40f48c <+856>: bl     0x40e65c                  ; freejob at =
jobs.c:463
>> 0x40f490 <+860>: add    x20, x20, #0x30           ; =3D0x30=20
>=20
> 0xffffffffcdd0: 0x0000ffffffffce20 0x000000000040f1c8
> sh`forkshell:
>  0x40f1c4 <+144>: bl     0x413fc8                  ; flushall at =
output.c:236
>  0x40f1c8 <+148>: bl     0x402920                  ; symbol stub for: =
fork
>  0x40f1cc <+152>: mov    w19, w0
> (flushall a voids returning to 0x40f1c8 directly, instead making
> the last routine it calls return there instead of to flushall.)
>=20
>> 0xffffffffce20: 0x0000ffffffffced0 0x0000000000405954
>> sh`evaltree:
>> 0x405950 <+1204>: bl     0x40f134                  ; forkshell at =
jobs.c:838
>> 0x405954 <+1208>: cbnz   w0, 0x4059dc              ; <+1344> =
[inlined] evalpipe + 300 at eval.c:286
>> 0xffffffffced0: 0x0000ffffffffd160 0x0000000000406e28
>> sh`evalbackcmd:
>> 0x406e24 <+336>: bl     0x40549c                  ; evaltree at =
eval.c:193
>> 0x406e28 <+340>: ldur   w0, [x29, #-0x5c]0xffffffffcd60: =
0x0000ffffffffcf90 0x00000000004068e4
>=20
>> 0xffffffffd160: 0x0000ffffffffd310 0x0000000000409324
>> sh`argstr:
>> 0x409320 <+428>: bl     0x406cd4                  ; evalbackcmd at =
eval.c:646
>> 0x409324 <+432>: mov    x0, x26
>> 0xffffffffd310: 0x0000ffffffffd370 0x0000000000408fa8
>> sh`expandarg:
>> 0x408fa4 <+108>: bl     0x409174                  ; argstr at =
expand.c:267
>> 0x408fa8 <+112>: cbz    x19, 0x409020             ; <+232> at =
expand.c:236
>> 0xffffffffd370: 0x0000ffffffffd5f0 0x0000000000407530
>> sh`exphere:
>> 0x40752c <+212>: bl     0x408f38                  ; expandarg at =
expand.c:225
>> 0x407530 <+216>: ldr    x8, [x20]
>> 0xffffffffd5f0: 0x0000ffffffffd630 0x00000000004073f0
>> sh`expredir:
>> 0x4073ec <+112>: bl     0x407458                  ; exphere at =
eval.c:494
>> 0x4073f0 <+116>: b      0x407428                  ; <+172> at =
eval.c:535
>> 0xffffffffd630: 0x0000ffffffffd960 0x0000000000406154
>> sh`evalcommand:
>> 0x406150 <+744>: bl     0x40737c                  ; expredir at =
eval.c:532
>> 0x406154 <+748>: ldur   w27, [x29, #-0x68]
>> 0xffffffffd960: 0x0000ffffffffda10 0x0000000000405570
>> sh`evaltree:
>> 0x40556c <+208>: bl     0x405e68                  ; evalcommand at =
eval.c:825
>> 0x405570 <+212>: b      0x405a9c                  ; <+1536> at =
eval.c:623
>> 0x405574 <+216>: ldr    x8, [x24, #0x8]
>> 0xffffffffda10: 0x0000ffffffffdac0 0x00000000004056b4
>> sh`evaltree:
>> 0x4056b0 <+532>: bl     0x40549c                  ; <+0> at =
eval.c:193
>> 0x4056b4 <+536>: ldr    w8, [x19, #0x990]
>> 0xffffffffdac0: 0x0000ffffffffdb70 0x0000000000405550
>> sh`evaltree:
>> 0x40554c <+176>: bl     0x40549c                  ; <+0> at =
eval.c:193
>> 0x405550 <+180>: ldr    w8, [x22, #0x994]
>> 0xffffffffdb70: 0x0000ffffffffdbf0 0x0000000000411034
>> sh`cmdloop:
>> 0x411030 <+248>: bl     0x40549c                  ; evaltree at =
eval.c:193
>> 0x411034 <+252>: mov    w27, wzr
>> 0xffffffffdbf0: 0x0000ffffffffdc50 0x0000000000410ea8
>> sh`main:
>> 0x410ea4 <+656>: bl     0x410f38                  ; cmdloop at =
main.c:199
>> 0x410ea8 <+660>: adrp   x8, 36
>> 0xffffffffdc50: 0x0000ffffffffdc90 0x0000000000402f30
>> sh`__start:
>> 0x402f2c <+356>: bl     0x410c14                  ; main at main.c:97
>> 0x402f30 <+360>: bl     0x402ae0                  ; symbol stub for: =
exit
>>=20
>> (_rtld is not in the core file)
>> 0xffffffffdc90: 0x0000000000000000 0x0000000040434658
>> ld-elf.so.1`.rtld_start:
>> 0x40434654 <+20>: bl     0x2e4c                    ; _rtld at =
rtld.c:339
>> 0x40434658 <+24>: mov    x8, x0
>>=20
>> Some of the most nested possibly had returned. But the
>> forkshell / freejob general time frame seem to match
>> everything that I've seen.
>>=20
>> [The details of the middle "eval*" related layers vary
>> from what I can tell.]
>>=20
>> "register read" shows fp, lr, and pc majorly
>> messed up.
>>=20
>> General Purpose Registers:
>>     x0 =3D 0x0000000000000000
>>     x1 =3D 0x00000000404346e8  ld-elf.so.1`_rtld_tlsdesc
>>     x2 =3D 0x0000000040a00000
>>     x3 =3D 0x0000000000000002
>>     x4 =3D 0x0000000000000096
>>     x5 =3D 0x0000000040a5fd10
>>     x6 =3D 0x0000000000434c28  sh..bss + 9448
>>     x7 =3D 0x0000000000434c28  sh..bss + 9448
>>     x8 =3D 0x0000000000000001
>>     x9 =3D 0x0000000000000000
>>    x10 =3D 0x0000000000000000
>>    x11 =3D 0x0000000040a350c0
>>    x12 =3D 0x0000000040a0e770
>>    x13 =3D 0x0000000000000072
>>    x14 =3D 0x000000000000006f
>>    x15 =3D 0x0000000000000010
>>    x16 =3D 0x0000000000432340 =20
>>    x17 =3D 0x000000004054cd00  libc.so.7`__free at =
jemalloc_jemalloc.c:2007
>>    x18 =3D 0x0000000000000000
>>    x19 =3D 0x0000000000000000
>>    x20 =3D 0x0000000000000000
>>    x21 =3D 0x0000000000000001
>>    x22 =3D 0x0000000040a5ff10
>>    x23 =3D 0x0000ffffffffd190
>>    x24 =3D 0x0000000000434000  sh..bss + 6336
>>    x25 =3D 0x0000000000434000  sh..bss + 6336
>>    x26 =3D 0x0000ffffffffcd00
>>    x27 =3D 0x0000000000434000  sh..bss + 6336
>>    x28 =3D 0x0000000040a6f5e0
>>     fp =3D 0x0000000040a5fed8
>>     lr =3D 0x0000000000000000
>>     sp =3D 0x0000ffffffffcd60
>>     pc =3D 0x0000000000000000
>>   cpsr =3D 0x60000000
>>=20
>> sp is also odd by being in the middle of the stack range
>> for:
>>=20
>> 0xffffffffcd00: 0x0000ffffffffce20 0x000000000040f490
>> sh`forkshell:
>> 0x40f48c <+856>: bl     0x40e65c                  ; freejob at =
jobs.c:463
>> 0x40f490 <+860>: add    x20, x20, #0x30           ; =3D0x30=20
>> 0xffffffffce20: 0x0000ffffffffced0 0x0000000000405954
>> sh`evaltree:
>> 0x405950 <+1204>: bl     0x40f134                  ; forkshell at =
jobs.c:838
>> 0x405954 <+1208>: cbnz   w0, 0x4059dc              ; <+1344> =
[inlined] evalpipe + 300 at eval.c:286
>>=20
>> NOTE: The fork happened earlier in sh`forkshell and this
>> is the child process that has the odd value.
>>=20
>> [It leaves me wondering if 0x0000ffffffffcd60 is a stack
>> pointer value associated with a call to something
>> earlier than the sh`forkshell call that is called by
>> sh`forkshell .]
>>=20
>> Also: in the ones with only a small section of the junk
>> areas the equivalent of:
>>=20
>> 0xffffffffcd00: 0x0000ffffffffce20 0x000000000040f490
>>=20
>> is the largest addressed non-junk content in the area
>> and the equivalent of:
>>=20
>> 0xffffffffce20: 0x0000ffffffffced0 0x0000000000405954
>>=20
>> would instead show zeros or "random" garbage values.
>>=20
>> In this case, however that range of the stack looks like:
>>=20
>> . . .
>> 0xffffffffcd00: 0x0000ffffffffce20 0x000000000040f490
>> 0xffffffffcd10: 0x0000ffffffffcd00 0x0000000000434000
>> 0xffffffffcd20: 0x0000000000434000 0x0000ffffffffd190
>> 0xffffffffcd30: 0x0000000040a5ff10 0x0000000000000001
>> 0xffffffffcd40: 0x0000000000000000 0x0000000000000000
>> 0xffffffffcd50: 0x0000000040a5fed8 0x0000000000000000
>> 0xffffffffcd60: 0x0000ffffffffcf90 0x00000000004068e4
>> 0xffffffffcd70: 0x0000000000000000 0x827a80ccb3228215
>> 0xffffffffcd80: 0x0000000040a6f5c0 0x0000000000434000
>> 0xffffffffcd90: 0x0000000000434000 0x0000000000434000
>> 0xffffffffcda0: 0x0000000000434000 0x0000000000434000
>> 0xffffffffcdb0: 0x0000000040a6f638 0x0000000000000000
>> 0xffffffffcdc0: 0x0000000040a350c0 0x0000000000434000
>> 0xffffffffcdd0: 0x0000ffffffffce20 0x000000000040f1c8
>> 0xffffffffcde0: 0x0000000000000003 0x0000000040a350c0
>> 0xffffffffcdf0: 0x0000000040a6f5c0 0x0000000000434000
>> 0xffffffffce00: 0x0000000000434000 0x0000000040a6f638
>> 0xffffffffce10: 0x0000000000000000 0x0000000000434000
>> 0xffffffffce20: 0x0000ffffffffced0 0x0000000000405954
>> . . .
>>=20
>> Interestingly 0xffffffffcd60 reported for the sp looks
>> like it has a frame-pointer/lr-value pair that does not
>> fit with the overall call chain that ties together but
>> is some fragment of a prior(?) call chain:
>>=20
>> 0xffffffffcd60: 0x0000ffffffffcf90 0x00000000004068e4
>> sh`evalcommand:
>> 0x4068e0 <+2680>: bl     0x402be0                  ; symbol stub for: =
_setjmp
>> 0x4068e4 <+2684>: cbz    w0, 0x406a04              ; <+2972> at =
eval.c:1101
>>=20
>> It looks like it is a record from calling _setjmp in
>> sh`evalcommand .
>>=20
>> (sh uses _setjmp/_longjmp via macros that turn
>> into them for setjmp/longjmp references in
>> sh's source code.)
>>=20
>> Interestingly (likely junk relative to the above):
>>=20
>> 0xffffffffcf90: 0x0000000000000000 0x0000000000432000
>>=20
>> where:
>>=20
>> (lldb) dis -s 0x0000000000432000
>> sh`__frame_dummy_init_array_entry:
>> 0x432000 <+0>: .long  0x00402fac                ; unknown opcode
>> 0x432004 <+4>: .long  0x00000000                ; unknown opcode
>> (lldb) dis -s __frame_dummy_init_array_entry -c32
>> sh`frame_dummy:
>> 0x402fac <+0>:  adrp   x8, 48
>> 0x402fb0 <+4>:  adrp   x1, 48
>> 0x402fb4 <+8>:  ldr    x8, [x8, #0x30]
>> 0x402fb8 <+12>: ldr    x1, [x1, #0x228]
>> 0x402fbc <+16>: cmp    x8, #0x0                  ; =3D0x0=20
>> 0x402fc0 <+20>: ccmp   x1, #0x0, #0x4, ne
>> 0x402fc4 <+24>: b.ne   0x402fcc                  ; <+32>
>> 0x402fc8 <+28>: ret   =20
>> 0x402fcc <+32>: adrp   x0, 48
>> 0x402fd0 <+36>: add    x0, x0, #0x30             ; =3D0x30=20
>> 0x402fd4 <+40>: br     x1
>>=20
>> sh`lookupalias:
>> . . .
>>=20
>>=20
>> Ohter notes:
>>=20
>> Some examples of funcnest=3D=3D0 others have (e.g.) funcnest=3D=3D2.
>> This one had funcnest=3D=3D0.
>>=20
>> commandname varies. In this case it was:
>>=20
>> (lldb) print commandname
>> (char *) $74 =3D 0x0000ffffffffe210 =
"/usr/obj/portswork/usr/ports/devel/aarch64-none-elf-gcc/work/gcc-6.3.0/li=
biberty/configure"
>>=20
>> Other examples include:
>>=20
>> (lldb) print commandname
>> (char *) $0 =3D 0x0000ffffffffdc40 =
"/usr/obj/portswork/usr/ports/devel/aarch64-none-elf-gcc/work/gcc-6.3.0/fi=
xincludes/configure"
>>=20
>> (lldb) print commandname
>> (char *) $0 =3D 0x0000ffffffffe498 =
"/usr/obj/portswork/usr/ports/devel/aarch64-none-elf-gcc/work/gcc-6.3.0/li=
biberty/../config.sub"
>>=20
>> (lldb) print commandname
>> (char *) $0 =3D 0x0000ffffffffe398 "../libtool"
>>=20
>>=20
>> So far the forkshell/fork/freejob and associated materials always =
seeming
>> to be involved is all that I've found that is common (at least that =
is
>> suggested by what I see so far) within the sh context.
>>=20
>>> Other notes:
>>>=20
>>> As a personal investigation I've temporarily changed to using
>>> something not fully generic but based on gic-400 specifics:
>>>=20
>>> # svnlite diff /usr/src/sys/arm/arm/gic.c
>>> Index: /usr/src/sys/arm/arm/gic.c
>>> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
>>> --- /usr/src/sys/arm/arm/gic.c  (revision 312982)
>>> +++ /usr/src/sys/arm/arm/gic.c  (working copy)
>>> @@ -672,9 +672,13 @@
>>>=20
>>>    if (irq >=3D sc->nirqs) {
>>> #ifdef GIC_DEBUG_SPURIOUS
>>> +#define EXPECTED_SPURIOUS_IRQ 1023
>>> +           if (irq !=3D EXPECTED_SPURIOUS_IRQ) {
>>>            device_printf(sc->gic_dev,
>>> -                   "Spurious interrupt detected: last irq: %d on =
CPU%d\n",
>>> +                   "Spurious interrupt %d detected of %d: last irq: =
%d on CPU%d\n",
>>> +                   irq, sc->nirqs,
>>>                sc->last_irq[PCPU_GET(cpuid)], PCPU_GET(cpuid));
>>> +            }
>>> #endif
>>>            return (FILTER_HANDLED);
>>>    }
>>> @@ -720,6 +724,16 @@
>>>    if (irq < sc->nirqs)
>>>            goto dispatch_irq;
>>>=20
>>> +       if (irq !=3D EXPECTED_SPURIOUS_IRQ) {
>>> +#undef EXPECTED_SPURIOUS_IRQ
>>> +#ifdef GIC_DEBUG_SPURIOUS
>>> +               device_printf(sc->gic_dev,
>>> +                   "Spurious end interrupt %d detected of %d: last =
irq: %d on CPU%d\n",
>>> +                   irq, sc->nirqs,
>>> +                   sc->last_irq[PCPU_GET(cpuid)], PCPU_GET(cpuid));
>>> +#endif
>>> +       }
>>> +
>>>    return (FILTER_HANDLED);
>>> }
>>>=20
>>>=20
>>> The result was no notices of Spurious interrupts have been =
generated:
>>> All of the odd interrupts were the special 1023 value.
>>>=20
>>> [As far as I could tell from the code the configuration is such that
>>> 1022 should not be generated --and were not. 1020 and 1021 are
>>> reserved and should not be generated.]




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?2A1F1091-4115-46A1-8DB5-032099A49290>