Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 7 Feb 2017 02:44:32 -0800
From:      Mark Millard <markmigm@gmail.com>
To:        Tom Vijlbrief <tvijlbrief@gmail.com>, freebsd-arm <freebsd-arm@freebsd.org>
Subject:   Re: Arm64 stack issues (was Re: FreeBSD status for/on ODroid-C2?)
Message-ID:  <25B9EBC8-147F-47C2-BC40-C449EF3AC3FE@gmail.com>
In-Reply-To: <1B1EEC5E-9875-417F-9901-A66CB5885634@dsl-only.net>
References:  <CAOQrpVfK-Dw_rSo_YVY5MT1wbc6Ah-Pj%2BWv8UGjeiUQ1b3%2B-mg@mail.gmail.com> <20170124191357.0ec0abfd@zapp> <20170128010138.iublazyrhhqycn37@mutt-hardenedbsd> <20170128010223.tjivldnh7pyenbg6@mutt-hardenedbsd> <CAOQrpVfxKvSR5PoahnqEsYspHhjjOGJ8iCBUetKxRV57oX_aUg@mail.gmail.com> <009857E3-35BB-4DE4-B3BB-5EC5DDBB5B06@dsl-only.net> <CAOQrpVdKyP2T0V77sfpuKbNP3ARoD1EcwtH6E9o7p5KF%2B=A56A@mail.gmail.com> <CB36F13F-85E9-41D2-A7F3-DA183BE5985A@dsl-only.net> <890B7D8A-27FF-41AC-8291-1858393EC7B1@gmail.com> <54642E5C-D5D6-45B7-BB74-2407CFB351C2@dsl-only.net> <EB1D79C2-CF5E-4C21-BA1B-EC9F34BB737E@gmail.com> <F6C3286F-46DF-4819-BDD2-10904018E70C@dsl-only.net> <A95CC1DC-36C4-4FC3-A8D4-BDBE6FCB136B@gmail.com> <7B5DF446-6740-43DE-823D-B6ECBECF0C32@dsl-only.net> <1B1EEC5E-9875-417F-9901-A66CB5885634@dsl-only.net>

next in thread | previous in thread | raw e-mail | index | archive | help
Another core. The register read reported:

        fp =3D 0x0000000000000000
        lr =3D 0x0000000000000000
        sp =3D 0x0000fffffffee630
        pc =3D 0x0000000000000000

And looking around (most nested to outer most):

0xfffffffee530: 0x0000fffffffee570 0x000000004054cd94
libc.so.7`__free:
    0x4054cd90 <+144>: bl     0xad6fc                   ; ifree at =
jemalloc_jemalloc.c:1876
    0x4054cd94 <+148>: adrp   x9, 185
0xfffffffee570: 0x0000fffffffee590 0x0000000000411300
sh`ckfree:
    0x4112fc <+28>: bl     0x4027e0                  ; symbol stub for: =
free
    0x411300 <+32>: ldr    x8, [x19, #0x970]
0xfffffffee590: 0x0000fffffffee5d0 0x000000000040e6e8
sh`freejob:
    0x40e6e4 <+136>: bl     0x4112e0                  ; ckfree at =
memalloc.c:86
    0x40e6e8 <+140>: adrp   x8, 38
0xfffffffee5d0: 0x0000ffffffffcaa0 0x000000000040f490
sh`forkshell:
    0x40f48c <+856>: bl     0x40e65c                  ; freejob at =
jobs.c:463
    0x40f490 <+860>: add    x20, x20, #0x30           ; =3D0x30
(Note that sp=3D=3D0x0000fffffffee630 is fairly close to =
0xfffffffee5d0.)

(sizable frame jump from 0xfffffffee5d0 to 0x0000ffffffffcaa0, size =
0xE4D0=3D=3D58576 bytes)
(0xfffffffee5e0 up to 0xffffffffa890 (not inclusive) are all =
0x0000000000000000)
(The prior trace example did not have such a large area.)

0xffffffffca50: 0x0000ffffffffcaa0 0x000000000040f1c8
sh`forkshell:
    0x40f1c4 <+144>: bl     0x413fc8                  ; flushall at =
output.c:236
    0x40f1c8 <+148>: bl     0x402920                  ; symbol stub for: =
fork
    0x40f1cc <+152>: mov    w19, w0
(flushall a voids returning to 0x40f1c8 directly, instead making
the last routine it calls return there instead of to flushall.)

0xffffffffcaa0: 0x0000ffffffffcb50 0x0000000000405954
sh`evaltree:
    0x405950 <+1204>: bl     0x40f134                  ; forkshell at =
jobs.c:838
    0x405954 <+1208>: cbnz   w0, 0x4059dc              ; <+1344> =
[inlined] evalpipe + 300 at eval.c:286
0xffffffffcb50: 0x0000ffffffffcde0 0x0000000000406e28
sh`evalbackcmd:
    0x406e24 <+336>: bl     0x40549c                  ; evaltree at =
eval.c:193
    0x406e28 <+340>: ldur   w0, [x29, #-0x5c]
0xffffffffcde0: 0x0000ffffffffcf90 0x0000000000409324
sh`argstr:
    0x409320 <+428>: bl     0x406cd4                  ; evalbackcmd at =
eval.c:646
    0x409324 <+432>: mov    x0, x26
0xffffffffcf90: 0x0000ffffffffcff0 0x0000000000408fa8
sh`expandarg:
    0x408fa4 <+108>: bl     0x409174                  ; argstr at =
expand.c:267
    0x408fa8 <+112>: cbz    x19, 0x409020             ; <+232> at =
expand.c:236
0xffffffffcff0: 0x0000ffffffffd320 0x0000000000405f48
sh`evalcommand:
    0x405f44 <+220>: bl     0x408f38                  ; expandarg at =
expand.c:225
    0x405f48 <+224>: ldr    x24, [x24, #0x8]

0xffffffffd0f0: 0x0000ffffffffd320 0x00000000004068e4
sh`evalcommand:
    0x4068e0 <+2680>: bl     0x402be0                  ; symbol stub =
for: _setjmp
    0x4068e4 <+2684>: cbz    w0, 0x406a04              ; <+2972> at =
eval.c:1101

0xffffffffd320: 0x0000ffffffffd3d0 0x0000000000405570
sh`evaltree:
    0x40556c <+208>: bl     0x405e68                  ; evalcommand at =
eval.c:825
    0x405570 <+212>: b      0x405a9c                  ; <+1536> at =
eval.c:623
    0x405574 <+216>: ldr    x8, [x24, #0x8]
0xffffffffd3d0: 0x0000ffffffffd480 0x0000000000405550
sh`cmdloop:
    0x411030 <+248>: bl     0x40549c                  ; evaltree at =
eval.c:193
    0x411034 <+252>: mov    w27, wzr
0xffffffffd480: 0x0000ffffffffd7b0 0x00000000004067d0
sh`evalcommand:
    0x4067cc <+2404>: bl     0x40549c                  ; evaltree at =
eval.c:193
    0x4067d0 <+2408>: ldr    x8, [x24, #0x970]
0xffffffffd7b0: 0x0000ffffffffd860 0x0000000000405570
sh`evaltree:
    0x40556c <+208>: bl     0x405e68                  ; evalcommand at =
eval.c:825
    0x405570 <+212>: b      0x405a9c                  ; <+1536> at =
eval.c:623
    0x405574 <+216>: ldr    x8, [x24, #0x8]
0xffffffffd860: 0x0000ffffffffd910 0x0000000000405550
sh`evaltree:
    0x40554c <+176>: bl     0x40549c                  ; <+0> at =
eval.c:193
    0x405550 <+180>: ldr    w8, [x22, #0x994]
0xffffffffd910: 0x0000ffffffffdc40 0x00000000004067d0
sh`evalcommand:
    0x4067cc <+2404>: bl     0x40549c                  ; evaltree at =
eval.c:193
    0x4067d0 <+2408>: ldr    x8, [x24, #0x970]

0xffffffffda10: 0x0000ffffffffdc40 0x000000000040673c
sh`evalcommand:
    0x406738 <+2256>: bl     0x402be0                  ; symbol stub =
for: _setjmp
    0x40673c <+2260>: cbnz   w0, 0x406c60              ; <+3576> at =
eval.c:1042

0xffffffffdc40: 0x0000ffffffffdcf0 0x0000000000405570
sh`evaltree:
    0x40556c <+208>: bl     0x405e68                  ; evalcommand at =
eval.c:825
    0x405570 <+212>: b      0x405a9c                  ; <+1536> at =
eval.c:623
    0x405574 <+216>: ldr    x8, [x24, #0x8]
0xffffffffdcf0: 0x0000ffffffffdd70 0x0000000000411034
sh`cmdloop:
    0x411030 <+248>: bl     0x40549c                  ; evaltree at =
eval.c:193
    0x411034 <+252>: mov    w27, wzr
0xffffffffdd70: 0x0000ffffffffddd0 0x0000000000410ea8
sh`main:
    0x410ea4 <+656>: bl     0x410f38                  ; cmdloop at =
main.c:199
    0x410ea8 <+660>: adrp   x8, 36
0xffffffffddd0: 0x0000ffffffffde10 0x0000000000402f30
sh`__start:
    0x402f2c <+356>: bl     0x410c14                  ; main at =
main.c:97
    0x402f30 <+360>: bl     0x402ae0                  ; symbol stub for: =
exit

(_rtld is not in the core file)
0xffffffffde10: 0x0000000000000000 0x0000000040434658
ld-elf.so.1`.rtld_start:
    0x40434654 <+20>: bl     0x2e4c                    ; _rtld at =
rtld.c:339
    0x40434658 <+24>: mov    x8, x0

So again the problem is associated with the forkshell/fork/freejob
related materials.

(I mistakenly left out the evalcommand/_setjmp material
 when I made the trace in the below. The same for flushall.
 I've inserted some of that below, at least for
 the flushall context.)

On 2017-Feb-6, at 8:05 PM, Mark Millard <markmi@dsl-only.net> wrote:

> [I got a lucky sh core dump with more stack context/content
> available to look at for an example sh crash. This helps
> narrow things down.]
>=20
> On 2017-Feb-5, at 1:12 AM, Mark Millard <markmi at dsl-only.net> =
wrote:
>=20
>> [Top post of a new result.]
>>=20
>> Using lldb to look at the memory for the stack around
>> sh failure points has some apparently fixed structure.
>> Example:
>>=20
>> . . . junk values . . .
>> 0xffffffffe4d0: 0x0000000000000078 0x637261612f737470
>> 0xffffffffe4e0: 0x00000000004345c8 0x0000000000434000
>> 0xffffffffe4f0: 0x0000000000434000 0x0000000040a903e0
>> 0xffffffffe500: 0x0000ffffffffe540 0x000000004054cd94
>> 0xffffffffe510: 0x696d6b72616d2f6c 0x0000000000000000
>> 0xffffffffe520: 0x0000000000000000 0x0000000000000000
>> 0xffffffffe530: 0x0000000000000000 0xe8021690dc1f70b8
>> 0xffffffffe540: 0x00000000004345c8 0x0000000000434000
>> 0xffffffffe550: 0x0000000000434000 0x000000000000000f
>> 0xffffffffe560: 0x0000ffffffffe5a0 0x000000000041aef0
>> 0xffffffffe570: 0x0000000000434c38 0x732f7273752f3a6e
>> 0xffffffffe580: 0x0000000000000001 0x0000000000000005
>> 0xffffffffe590: 0x0000000040a33180 0x0000000000000000
>> 0xffffffffe5a0: 0x0000ffffffffc5c0 0x000000000040f490
>> . . .  junk values . . .
>=20
> I got lucky and got a core dump that did not have the junk
> areas and could trace the stack's frame pointer chain
> between main and libc.so.7`__free (through freejob along
> the way). See later.
>=20
>> where "register read" showed:
>>=20
>>       sp =3D 0x0000ffffffffe600
>>=20
>> (The distance and direction to the last non-junk line
>> from the reported sp in each example is the same.)
>> Looking around that 0x000000000040f490:
>>=20
>>   0x40f48c: 0x97fffc74   bl     0x40e65c                  ; freejob =
at jobs.c:463
>>   0x40f490: 0x9100c294   add    x20, x20, #0x30           ; =3D0x30=20=

>>=20
>> It is the same address and code in each case.
>=20
> I should have originally noted that 0x40f48c is in
> forkshell, along the child process code-path:
>=20
> pid_t
> forkshell(struct job *jp, union node *n, int mode)
> {
> . . . (see /usr/src/bin/sh/jobs.c for this) . . .
>        INTOFF;
>        if (mode =3D=3D FORK_BG && (jp =3D=3D NULL || jp->nprocs =3D=3D =
0))
>                checkzombies();
>        flushall();
>        pid =3D fork();
>        if (pid =3D=3D -1) {
>                TRACE(("Fork failed, errno=3D%d\n", errno));
>                INTON;
>                error("Cannot fork: %s", strerror(errno));
>        }
>        if (pid =3D=3D 0) {
>                struct job *p;
>                int wasroot;
>                int i;
>=20
>                TRACE(("Child shell %d\n", (int)getpid()));
>                wasroot =3D rootshell;
>                rootshell =3D 0;
>                handler =3D &main_handler;
>                closescript();
>                INTON;
>                forcelocal =3D 0;
>                clear_traps();
> #if JOBS
> . . . (see /usr/src/bin/sh/jobs.c for this) . . .
> #else
> . . . (see /usr/src/bin/sh/jobs.c for this) . . .
> #endif
>                INTOFF;
>                for (i =3D njobs, p =3D jobtab ; --i >=3D 0 ; p++)
>                        if (p->used)
>                                freejob(p);
>                INTON;
>                if (wasroot && iflag) {
>                        setsignal(SIGINT);
>                        setsignal(SIGQUIT);
>                        setsignal(SIGTERM);
>                }
>                return pid;
>        }
> . . . (see /usr/src/bin/sh/jobs.c for this) . . .
>=20
>> Sometimes the junk values are all zeros over sizable
>> distances. Sometimes the sizable areas seem to have
>> random data.
>>=20
>> /usr/src/bin/sh/jobs.c 's freejobs is:
>>=20
>> static void
>> freejob(struct job *jp)
>> {
>>       struct procstat *ps;
>>       int i;
>>=20
>>       INTOFF;
>>       if (bgjob =3D=3D jp)
>>               bgjob =3D NULL;
>>       for (i =3D jp->nprocs, ps =3D jp->ps ; --i >=3D 0 ; ps++) {
>>               if (ps->cmd !=3D nullstr)
>>                       ckfree(ps->cmd);
>>       }
>>       if (jp->ps !=3D &jp->ps0)
>>               ckfree(jp->ps);
>>       jp->used =3D 0;
>> #if JOBS
>>       deljob(jp);
>> #endif
>>       INTON;
>> }
>>=20
>> /usr/src/bin/sh/error.h defines INTOFF and INTON:
>>=20
>> #define EXINT 0         /* SIGINT received */
>> #define EXERROR 1       /* a generic error */
>> #define EXEXEC 2        /* command execution failed */
>> #define EXEXIT 3        /* call exitshell(exitstatus) */
>>=20
>> . . .
>>=20
>> extern struct jmploc *handler;
>> extern volatile sig_atomic_t exception;
>>=20
>> . . .
>>=20
>> extern volatile sig_atomic_t suppressint;
>> extern volatile sig_atomic_t intpending;
>>=20
>> #define INTOFF suppressint++
>> #define INTON { if (--suppressint =3D=3D 0 && intpending) onint(); }
>> #define is_int_on() suppressint
>> #define SETINTON(s) suppressint =3D (s)
>> #define FORCEINTON {suppressint =3D 0; if (intpending) onint();}
>> #define SET_PENDING_INT intpending =3D 1
>> #define CLEAR_PENDING_INT intpending =3D 0
>> #define int_pending() intpending
>>=20
>> void exraise(int) __dead2;
>> void onint(void) __dead2;
>>=20
>> /usr/src/bin/sh/error.c hAS:
>>=20
>> void
>> exraise(int e)
>> {
>>       INTOFF;
>>       if (handler =3D=3D NULL)
>>               abort();
>>       exception =3D e;
>>       longjmp(handler->loc, 1);
>> }
>> . . .
>> void
>> onint(void)
>> {
>>       sigset_t sigs;
>>=20
>>       intpending =3D 0;
>>       sigemptyset(&sigs);
>>       sigprocmask(SIG_SETMASK, &sigs, NULL);
>>=20
>>       /*
>>        * This doesn't seem to be needed, since main() emits a =
newline.
>>        */
>> #if 0
>>       if (tcgetpgrp(0) =3D=3D getpid())
>>               write(STDERR_FILENO, "\n", 1);
>> #endif
>>       if (rootshell && iflag)
>>               exraise(EXINT);
>>       else {
>>               signal(SIGINT, SIG_DFL);
>>               kill(getpid(), SIGINT);
>>               _exit(128 + SIGINT);
>>       }
>> }
>>=20
>> # grep setjmp /usr/src/bin/sh/*
>> /usr/src/bin/sh/TOUR:so I implement it using setjmp and longjmp.  The =
global variable
>> /usr/src/bin/sh/error.h:#include <setjmp.h>
>> /usr/src/bin/sh/error.h: * BSD setjmp saves the signal mask, which =
violates ANSI C and takes time,
>> /usr/src/bin/sh/error.h: * so we use _setjmp instead.
>> /usr/src/bin/sh/error.h:#define setjmp(jmploc)	_setjmp(jmploc)
>> /usr/src/bin/sh/eval.c:	if (setjmp(jmploc.loc)) {
>> /usr/src/bin/sh/eval.c:	if (setjmp(jmploc.loc))
>> /usr/src/bin/sh/eval.c:		if (setjmp(jmploc.loc)) {
>> /usr/src/bin/sh/eval.c:		if (setjmp(jmploc.loc)) {
>> /usr/src/bin/sh/eval.c:		if (setjmp(jmploc.loc)) {
>> /usr/src/bin/sh/histedit.c:		if (setjmp(jmploc.loc)) {
>> /usr/src/bin/sh/jobs.c:		if (setjmp(jmploc.loc))
>> /usr/src/bin/sh/main.c: * commands.  The setjmp call sets up the =
location to jump to when an
>> /usr/src/bin/sh/main.c:	if (setjmp(main_handler.loc)) {
>> /usr/src/bin/sh/parser.c:	if (setjmp(jmploc.loc)) {
>> /usr/src/bin/sh/parser.c:	if (!setjmp(jmploc.loc)) {
>> /usr/src/bin/sh/trap.c:	if (!setjmp(loc1.loc)) {
>> /usr/src/bin/sh/trap.c:	if (!setjmp(loc2.loc)) {
>> /usr/src/bin/sh/var.c:	if (setjmp(jmploc.loc))
>=20
> Here is the call chain that I was able to trace
> in the newer core dump:
> (most nested first to least nested last;
> showing frame pointer and lr value pairs
> and calls/return-places)
>=20
> (ifree is not in the core file)
> 0xffffffffcc60: 0x0000ffffffffcca0 0x000000004054cd94
> libc.so.7`__free:
>    0x4054cd90 <+144>: bl     0xad6fc                   ; ifree at =
jemalloc_jemalloc.c:1876
>    0x4054cd94 <+148>: adrp   x9, 185
> 0xffffffffcca0: 0x0000ffffffffccc0 0x0000000000411300
> sh`ckfree:
>    0x4112fc <+28>: bl     0x4027e0                  ; symbol stub for: =
free
>    0x411300 <+32>: ldr    x8, [x19, #0x970]
> 0xffffffffccc0: 0x0000ffffffffcd00 0x000000000040e6e8
> sh`freejob:
>    0x40e6e4 <+136>: bl     0x4112e0                  ; ckfree at =
memalloc.c:86
>    0x40e6e8 <+140>: adrp   x8, 38
> 0xffffffffcd00: 0x0000ffffffffce20 0x000000000040f490
> sh`forkshell:
>    0x40f48c <+856>: bl     0x40e65c                  ; freejob at =
jobs.c:463
>    0x40f490 <+860>: add    x20, x20, #0x30           ; =3D0x30=20

0xffffffffcdd0: 0x0000ffffffffce20 0x000000000040f1c8
sh`forkshell:
    0x40f1c4 <+144>: bl     0x413fc8                  ; flushall at =
output.c:236
    0x40f1c8 <+148>: bl     0x402920                  ; symbol stub for: =
fork
    0x40f1cc <+152>: mov    w19, w0
(flushall a voids returning to 0x40f1c8 directly, instead making
the last routine it calls return there instead of to flushall.)

> 0xffffffffce20: 0x0000ffffffffced0 0x0000000000405954
> sh`evaltree:
>    0x405950 <+1204>: bl     0x40f134                  ; forkshell at =
jobs.c:838
>    0x405954 <+1208>: cbnz   w0, 0x4059dc              ; <+1344> =
[inlined] evalpipe + 300 at eval.c:286
> 0xffffffffced0: 0x0000ffffffffd160 0x0000000000406e28
> sh`evalbackcmd:
>    0x406e24 <+336>: bl     0x40549c                  ; evaltree at =
eval.c:193
>    0x406e28 <+340>: ldur   w0, [x29, #-0x5c]0xffffffffcd60: =
0x0000ffffffffcf90 0x00000000004068e4

> 0xffffffffd160: 0x0000ffffffffd310 0x0000000000409324
> sh`argstr:
>    0x409320 <+428>: bl     0x406cd4                  ; evalbackcmd at =
eval.c:646
>    0x409324 <+432>: mov    x0, x26
> 0xffffffffd310: 0x0000ffffffffd370 0x0000000000408fa8
> sh`expandarg:
>    0x408fa4 <+108>: bl     0x409174                  ; argstr at =
expand.c:267
>    0x408fa8 <+112>: cbz    x19, 0x409020             ; <+232> at =
expand.c:236
> 0xffffffffd370: 0x0000ffffffffd5f0 0x0000000000407530
> sh`exphere:
>    0x40752c <+212>: bl     0x408f38                  ; expandarg at =
expand.c:225
>    0x407530 <+216>: ldr    x8, [x20]
> 0xffffffffd5f0: 0x0000ffffffffd630 0x00000000004073f0
> sh`expredir:
>    0x4073ec <+112>: bl     0x407458                  ; exphere at =
eval.c:494
>    0x4073f0 <+116>: b      0x407428                  ; <+172> at =
eval.c:535
> 0xffffffffd630: 0x0000ffffffffd960 0x0000000000406154
> sh`evalcommand:
>    0x406150 <+744>: bl     0x40737c                  ; expredir at =
eval.c:532
>    0x406154 <+748>: ldur   w27, [x29, #-0x68]
> 0xffffffffd960: 0x0000ffffffffda10 0x0000000000405570
> sh`evaltree:
>    0x40556c <+208>: bl     0x405e68                  ; evalcommand at =
eval.c:825
>    0x405570 <+212>: b      0x405a9c                  ; <+1536> at =
eval.c:623
>    0x405574 <+216>: ldr    x8, [x24, #0x8]
> 0xffffffffda10: 0x0000ffffffffdac0 0x00000000004056b4
> sh`evaltree:
>    0x4056b0 <+532>: bl     0x40549c                  ; <+0> at =
eval.c:193
>    0x4056b4 <+536>: ldr    w8, [x19, #0x990]
> 0xffffffffdac0: 0x0000ffffffffdb70 0x0000000000405550
> sh`evaltree:
>    0x40554c <+176>: bl     0x40549c                  ; <+0> at =
eval.c:193
>    0x405550 <+180>: ldr    w8, [x22, #0x994]
> 0xffffffffdb70: 0x0000ffffffffdbf0 0x0000000000411034
> sh`cmdloop:
>    0x411030 <+248>: bl     0x40549c                  ; evaltree at =
eval.c:193
>    0x411034 <+252>: mov    w27, wzr
> 0xffffffffdbf0: 0x0000ffffffffdc50 0x0000000000410ea8
> sh`main:
>    0x410ea4 <+656>: bl     0x410f38                  ; cmdloop at =
main.c:199
>    0x410ea8 <+660>: adrp   x8, 36
> 0xffffffffdc50: 0x0000ffffffffdc90 0x0000000000402f30
> sh`__start:
>    0x402f2c <+356>: bl     0x410c14                  ; main at =
main.c:97
>    0x402f30 <+360>: bl     0x402ae0                  ; symbol stub =
for: exit
>=20
> (_rtld is not in the core file)
> 0xffffffffdc90: 0x0000000000000000 0x0000000040434658
> ld-elf.so.1`.rtld_start:
>    0x40434654 <+20>: bl     0x2e4c                    ; _rtld at =
rtld.c:339
>    0x40434658 <+24>: mov    x8, x0
>=20
> Some of the most nested possibly had returned. But the
> forkshell / freejob general time frame seem to match
> everything that I've seen.
>=20
> [The details of the middle "eval*" related layers vary
> from what I can tell.]
>=20
> "register read" shows fp, lr, and pc majorly
> messed up.
>=20
> General Purpose Registers:
>        x0 =3D 0x0000000000000000
>        x1 =3D 0x00000000404346e8  ld-elf.so.1`_rtld_tlsdesc
>        x2 =3D 0x0000000040a00000
>        x3 =3D 0x0000000000000002
>        x4 =3D 0x0000000000000096
>        x5 =3D 0x0000000040a5fd10
>        x6 =3D 0x0000000000434c28  sh..bss + 9448
>        x7 =3D 0x0000000000434c28  sh..bss + 9448
>        x8 =3D 0x0000000000000001
>        x9 =3D 0x0000000000000000
>       x10 =3D 0x0000000000000000
>       x11 =3D 0x0000000040a350c0
>       x12 =3D 0x0000000040a0e770
>       x13 =3D 0x0000000000000072
>       x14 =3D 0x000000000000006f
>       x15 =3D 0x0000000000000010
>       x16 =3D 0x0000000000432340 =20
>       x17 =3D 0x000000004054cd00  libc.so.7`__free at =
jemalloc_jemalloc.c:2007
>       x18 =3D 0x0000000000000000
>       x19 =3D 0x0000000000000000
>       x20 =3D 0x0000000000000000
>       x21 =3D 0x0000000000000001
>       x22 =3D 0x0000000040a5ff10
>       x23 =3D 0x0000ffffffffd190
>       x24 =3D 0x0000000000434000  sh..bss + 6336
>       x25 =3D 0x0000000000434000  sh..bss + 6336
>       x26 =3D 0x0000ffffffffcd00
>       x27 =3D 0x0000000000434000  sh..bss + 6336
>       x28 =3D 0x0000000040a6f5e0
>        fp =3D 0x0000000040a5fed8
>        lr =3D 0x0000000000000000
>        sp =3D 0x0000ffffffffcd60
>        pc =3D 0x0000000000000000
>      cpsr =3D 0x60000000
>=20
> sp is also odd by being in the middle of the stack range
> for:
>=20
> 0xffffffffcd00: 0x0000ffffffffce20 0x000000000040f490
> sh`forkshell:
>    0x40f48c <+856>: bl     0x40e65c                  ; freejob at =
jobs.c:463
>    0x40f490 <+860>: add    x20, x20, #0x30           ; =3D0x30=20
> 0xffffffffce20: 0x0000ffffffffced0 0x0000000000405954
> sh`evaltree:
>    0x405950 <+1204>: bl     0x40f134                  ; forkshell at =
jobs.c:838
>    0x405954 <+1208>: cbnz   w0, 0x4059dc              ; <+1344> =
[inlined] evalpipe + 300 at eval.c:286
>=20
> NOTE: The fork happened earlier in sh`forkshell and this
> is the child process that has the odd value.
>=20
> [It leaves me wondering if 0x0000ffffffffcd60 is a stack
> pointer value associated with a call to something
> earlier than the sh`forkshell call that is called by
> sh`forkshell .]
>=20
> Also: in the ones with only a small section of the junk
> areas the equivalent of:
>=20
> 0xffffffffcd00: 0x0000ffffffffce20 0x000000000040f490
>=20
> is the largest addressed non-junk content in the area
> and the equivalent of:
>=20
> 0xffffffffce20: 0x0000ffffffffced0 0x0000000000405954
>=20
> would instead show zeros or "random" garbage values.
>=20
> In this case, however that range of the stack looks like:
>=20
> . . .
> 0xffffffffcd00: 0x0000ffffffffce20 0x000000000040f490
> 0xffffffffcd10: 0x0000ffffffffcd00 0x0000000000434000
> 0xffffffffcd20: 0x0000000000434000 0x0000ffffffffd190
> 0xffffffffcd30: 0x0000000040a5ff10 0x0000000000000001
> 0xffffffffcd40: 0x0000000000000000 0x0000000000000000
> 0xffffffffcd50: 0x0000000040a5fed8 0x0000000000000000
> 0xffffffffcd60: 0x0000ffffffffcf90 0x00000000004068e4
> 0xffffffffcd70: 0x0000000000000000 0x827a80ccb3228215
> 0xffffffffcd80: 0x0000000040a6f5c0 0x0000000000434000
> 0xffffffffcd90: 0x0000000000434000 0x0000000000434000
> 0xffffffffcda0: 0x0000000000434000 0x0000000000434000
> 0xffffffffcdb0: 0x0000000040a6f638 0x0000000000000000
> 0xffffffffcdc0: 0x0000000040a350c0 0x0000000000434000
> 0xffffffffcdd0: 0x0000ffffffffce20 0x000000000040f1c8
> 0xffffffffcde0: 0x0000000000000003 0x0000000040a350c0
> 0xffffffffcdf0: 0x0000000040a6f5c0 0x0000000000434000
> 0xffffffffce00: 0x0000000000434000 0x0000000040a6f638
> 0xffffffffce10: 0x0000000000000000 0x0000000000434000
> 0xffffffffce20: 0x0000ffffffffced0 0x0000000000405954
> . . .
>=20
> Interestingly 0xffffffffcd60 reported for the sp looks
> like it has a frame-pointer/lr-value pair that does not
> fit with the overall call chain that ties together but
> is some fragment of a prior(?) call chain:
>=20
> 0xffffffffcd60: 0x0000ffffffffcf90 0x00000000004068e4
> sh`evalcommand:
>    0x4068e0 <+2680>: bl     0x402be0                  ; symbol stub =
for: _setjmp
>    0x4068e4 <+2684>: cbz    w0, 0x406a04              ; <+2972> at =
eval.c:1101
>=20
> It looks like it is a record from calling _setjmp in
> sh`evalcommand .
>=20
> (sh uses _setjmp/_longjmp via macros that turn
> into them for setjmp/longjmp references in
> sh's source code.)
>=20
> Interestingly (likely junk relative to the above):
>=20
> 0xffffffffcf90: 0x0000000000000000 0x0000000000432000
>=20
> where:
>=20
> (lldb) dis -s 0x0000000000432000
> sh`__frame_dummy_init_array_entry:
>    0x432000 <+0>: .long  0x00402fac                ; unknown opcode
>    0x432004 <+4>: .long  0x00000000                ; unknown opcode
> (lldb) dis -s __frame_dummy_init_array_entry -c32
> sh`frame_dummy:
>    0x402fac <+0>:  adrp   x8, 48
>    0x402fb0 <+4>:  adrp   x1, 48
>    0x402fb4 <+8>:  ldr    x8, [x8, #0x30]
>    0x402fb8 <+12>: ldr    x1, [x1, #0x228]
>    0x402fbc <+16>: cmp    x8, #0x0                  ; =3D0x0=20
>    0x402fc0 <+20>: ccmp   x1, #0x0, #0x4, ne
>    0x402fc4 <+24>: b.ne   0x402fcc                  ; <+32>
>    0x402fc8 <+28>: ret   =20
>    0x402fcc <+32>: adrp   x0, 48
>    0x402fd0 <+36>: add    x0, x0, #0x30             ; =3D0x30=20
>    0x402fd4 <+40>: br     x1
>=20
> sh`lookupalias:
> . . .
>=20
>=20
> Ohter notes:
>=20
> Some examples of funcnest=3D=3D0 others have (e.g.) funcnest=3D=3D2.
> This one had funcnest=3D=3D0.
>=20
> commandname varies. In this case it was:
>=20
> (lldb) print commandname
> (char *) $74 =3D 0x0000ffffffffe210 =
"/usr/obj/portswork/usr/ports/devel/aarch64-none-elf-gcc/work/gcc-6.3.0/li=
biberty/configure"
>=20
> Other examples include:
>=20
> (lldb) print commandname
> (char *) $0 =3D 0x0000ffffffffdc40 =
"/usr/obj/portswork/usr/ports/devel/aarch64-none-elf-gcc/work/gcc-6.3.0/fi=
xincludes/configure"
>=20
> (lldb) print commandname
> (char *) $0 =3D 0x0000ffffffffe498 =
"/usr/obj/portswork/usr/ports/devel/aarch64-none-elf-gcc/work/gcc-6.3.0/li=
biberty/../config.sub"
>=20
> (lldb) print commandname
> (char *) $0 =3D 0x0000ffffffffe398 "../libtool"
>=20
>=20
> So far the forkshell/fork/freejob and associated materials always =
seeming
> to be involved is all that I've found that is common (at least that is
> suggested by what I see so far) within the sh context.
>=20
>> Other notes:
>>=20
>> As a personal investigation I've temporarily changed to using
>> something not fully generic but based on gic-400 specifics:
>>=20
>> # svnlite diff /usr/src/sys/arm/arm/gic.c
>> Index: /usr/src/sys/arm/arm/gic.c
>> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
>> --- /usr/src/sys/arm/arm/gic.c  (revision 312982)
>> +++ /usr/src/sys/arm/arm/gic.c  (working copy)
>> @@ -672,9 +672,13 @@
>>=20
>>       if (irq >=3D sc->nirqs) {
>> #ifdef GIC_DEBUG_SPURIOUS
>> +#define EXPECTED_SPURIOUS_IRQ 1023
>> +           if (irq !=3D EXPECTED_SPURIOUS_IRQ) {
>>               device_printf(sc->gic_dev,
>> -                   "Spurious interrupt detected: last irq: %d on =
CPU%d\n",
>> +                   "Spurious interrupt %d detected of %d: last irq: =
%d on CPU%d\n",
>> +                   irq, sc->nirqs,
>>                   sc->last_irq[PCPU_GET(cpuid)], PCPU_GET(cpuid));
>> +            }
>> #endif
>>               return (FILTER_HANDLED);
>>       }
>> @@ -720,6 +724,16 @@
>>       if (irq < sc->nirqs)
>>               goto dispatch_irq;
>>=20
>> +       if (irq !=3D EXPECTED_SPURIOUS_IRQ) {
>> +#undef EXPECTED_SPURIOUS_IRQ
>> +#ifdef GIC_DEBUG_SPURIOUS
>> +               device_printf(sc->gic_dev,
>> +                   "Spurious end interrupt %d detected of %d: last =
irq: %d on CPU%d\n",
>> +                   irq, sc->nirqs,
>> +                   sc->last_irq[PCPU_GET(cpuid)], PCPU_GET(cpuid));
>> +#endif
>> +       }
>> +
>>       return (FILTER_HANDLED);
>> }
>>=20
>>=20
>> The result was no notices of Spurious interrupts have been generated:
>> All of the odd interrupts were the special 1023 value.
>>=20
>> [As far as I could tell from the code the configuration is such that
>> 1022 should not be generated --and were not. 1020 and 1021 are
>> reserved and should not be generated.]



=3D=3D=3D
Mark Millard
markmi at dsl-only.net




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?25B9EBC8-147F-47C2-BC40-C449EF3AC3FE>