Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 10 May 2019 14:02:35 -0700
From:      Mark Millard <marklmi@yahoo.com>
To:        Justin Hibbits <chmeeedalf@gmail.com>
Cc:        FreeBSD PowerPC ML <freebsd-ppc@freebsd.org>, Dennis Clarke <dclarke@blastwave.org>, Nathan Whitehorn <nwhitehorn@freebsd.org>
Subject:   Re: 970/PowerMac G5 cpudep_ap_bootstrap slb-related hangup *solved* . . .
Message-ID:  <241A2C8D-2E9D-4E2E-8A78-3E4A17F0C46A@yahoo.com>
In-Reply-To: <EFC7B7DC-3C97-47A4-9A8F-6A43A95F42E8@yahoo.com>
References:  <2E7A0894-E5B0-4776-95F2-76B7EE0EE93C@yahoo.com> <C3272E01-41D8-4E4C-8F4B-CF9DC6E37B3D@yahoo.com> <CAHSQbTD3ZvBp_%2BgWcoSZ=nOY%2BG0xLdp4qBCJ2Ty0DXi4x608VA@mail.gmail.com> <EFC7B7DC-3C97-47A4-9A8F-6A43A95F42E8@yahoo.com>

next in thread | previous in thread | raw e-mail | index | archive | help
[For head -r347463  I'll still have to have =
lib/libc/powerpc64/string/strcmp.S
patched to avoid cmpb instructions. No other patches.]

On 2019-May-10, at 13:11, Mark Millard <marklmi at yahoo.com> wrote:

> On 2019-May-10, at 12:38, Justin Hibbits <chmeeedalf at gmail.com> =
wrote:
>=20
>> Hi Mark,
>>=20
>> On Fri, May 10, 2019 at 6:23 AM Mark Millard <marklmi@yahoo.com> =
wrote:
>>>=20
>>> [Having removed all my prior investigatory material, I include
>>> a svnlite diff that I've booted based on, a comparatively
>>> minimal diff from the head -r347003 that I started from.]
>>>=20
>>> On 2019-May-10, at 02:15, Mark Millard <marklmi at yahoo.com> wrote:
>>>=20
>>>> [This continues a prior message, but I choose a new subject
>>>> text for the testing that showed the kind of material working.]
>>>>=20
>>>> I have the slbtrap/handle_kernel_slb_spill working instead
>>>> of hanging up when it has an slb-miss (and well as when there
>>>> is no miss).
>>>>=20
>>>> In /usr/src/sys/powerpc/aim/mp_cpudep.c I moved the
>>>> 970 code for HID0 and HID1 from cpudep_ap_setup, code
>>>> that looks like,
>>>>=20
>>>>              /* Set HIOR to 0 */
>>>>              __asm __volatile("mtspr 311,%0" :: "r"(0));
>>>>              powerpc_sync();
>>>>=20
>>>>              /*
>>>>               * The 970 has strange rules about how to update HID =
registers.
>>>>               * See Table 2-3, 970MP manual
>>>>               *
>>>>               * Note: HID4 and HID5 restored already in
>>>>               * cpudep_ap_early_bootstrap()
>>>>               */
>>>>=20
>>>>              __asm __volatile("mtasr %0; sync" :: "r"(0));
>>>>      #ifdef __powerpc64__
>>>>              __asm __volatile(" \
>>>>                      sync; isync;                                   =
 \
>>>>                      mtspr   %1, %0;                                =
 \
>>>>                      mfspr   %0, %1; mfspr   %0, %1; mfspr   %0, =
%1; \
>>>>                      mfspr   %0, %1; mfspr   %0, %1; mfspr   %0, =
%1; \
>>>>                      sync; isync"
>>>>                  :: "r"(bsp_state[0]), "K"(SPR_HID0));
>>>>              __asm __volatile("sync; isync;  \
>>>>                  mtspr %1, %0; mtspr %1, %0; sync; isync"
>>>>                  :: "r"(bsp_state[1]), "K"(SPR_HID1));
>>>>      #else
>>>>              __asm __volatile(" \
>>>>                      ld      %0,0(%2);                              =
 \
>>>>                      sync; isync;                                   =
 \
>>>>                      mtspr   %1, %0;                                =
 \
>>>>                      mfspr   %0, %1; mfspr   %0, %1; mfspr   %0, =
%1; \
>>>>                      mfspr   %0, %1; mfspr   %0, %1; mfspr   %0, =
%1; \
>>>>                      sync; isync"
>>>>                  : "=3Dr"(reg) : "K"(SPR_HID0), "b"(bsp_state));
>>>>              __asm __volatile("ld %0, 8(%2); sync; isync;    \
>>>>                  mtspr %1, %0; mtspr %1, %0; sync; isync"
>>>>                  : "=3Dr"(reg) : "K"(SPR_HID1), "b"(bsp_state));
>>>>      #endif
>>>>=20
>>>>              powerpc_sync();
>>>>=20
>>>> Here to? moved it to cpudep_ap_early_bootstrap, just before the
>>>> code for HID4 and HID5, and I commented out 2 #if/endif lines:
>>>>=20
>>>> void
>>>> cpudep_ap_early_bootstrap(void)
>>>> {
>>>> //#ifndef __powerpc64__
>>>>      register_t reg;
>>>> //#endif
>>>>=20
>>>>      switch (mfpvr() >> 16) {
>>>>      case IBM970:
>>>>      case IBM970FX:
>>>>      case IBM970MP:
>>>>> .>.> INSERT CODE HERE <.<.<.
>>>>=20
>>>>              /* Restore HID4 and HID5, which are necessary for the =
MMU */
>>>>=20
>>>> #ifdef __powerpc64__
>>>>              mtspr(SPR_HID4, bsp_state[2]); powerpc_sync(); =
isync();
>>>>              mtspr(SPR_HID5, bsp_state[3]); powerpc_sync(); =
isync();
>>>> #else
>>>>              __asm __volatile("ld %0, 16(%2); sync; isync;   \
>>>>                  mtspr %1, %0; sync; isync;"
>>>>                  : "=3Dr"(reg) : "K"(SPR_HID4), "b"(bsp_state));
>>>>              __asm __volatile("ld %0, 24(%2); sync; isync;   \
>>>>                  mtspr %1, %0; sync; isync;"
>>>>                  : "=3Dr"(reg) : "K"(SPR_HID5), "b"(bsp_state));
>>>> #endif
>>>>              powerpc_sync();
>>>>              break;
>>>> . . .
>>>>=20
>>>> This does the initialization before cpudep_ap_bootstrap,
>>>> instead of after.
>>>>=20
>>>> With things then sufficiently initialized for PSL_IR|PSL_DR
>>>> code to doing things like pcpup->pc_curthread->td_pcb->
>>>> that sometimes have slb misses, it boots fine,
>>>> loading into the slb as needed. No more checkstop status
>>>> (or whatever it was).
>>>>=20
>>>> I do not know if non-970 contexts should have similar
>>>> changes in the ordering of initializations or not.
>>>> But, clearly, the 970 family members do need such.
>>>>=20
>>>> I'm not claiming that other material from other notes
>>>> that I sent out should be ignored, only that the above
>>>> changes the observed failing behavior, and so is a big
>>>> gain all by itself. And it is simple to do without
>>>> other investigations that might be involved in the
>>>> more overall context.
>>>=20
>>> Of course, whitespace details, may not be well preserved
>>> below. (The commenting out of the two #if/#endif lines
>>> was unnecessary and is not done in the below.)
>>>=20
>> <diff in PR 233863>
>>=20
>> Good sleuthing.
>>=20
>> I think the whole diff could be reduced to just moving the HIOR.  Can
>> you give r347463 a shot? It's the reduced diff of just moving HIOR.
>> If that's not sufficient, then I can move the HID0/HID1
>> initializations, but they didn't look relevant for early boot
>> stability when I reviewed.
>=20
> I can try later today.
>=20
> I'll note that the bsp does not use the relative ordering
> the ap's use for HID0 and HID1 vs. code analogous to
> cpudep_ap_bootstrap as far as I could tell: it does HID0
> earlier and makes no HID1 assigments at all (depending
> on openfirmware or the loader to have given appropriate
> assignments).
>=20
> (OpenFirmware does not seem to do much for configuring the
> ap's, just the bsp. Depending on defaults is more of an
> issue for the ap's.)
>=20
> Also, some HID0 and HID1 points to consider:
>=20
> HID0 controls the TBR behavaior, and mftb() is in use in the
> slb replacement code:
>=20
> bit 18 is: tb_ctrl Enable time-base counting when the processor is =
stopped.
>=20
> bit 19 is: ext_tb_en External time-base enable. With:
> 	=E2=80=A2 0  Use TBEN input as enable. TB is clocked at 1/8 of =
the full processor frequency.
> 	=E2=80=A2 1  Use TBEN input to clock time base (external clock).
>=20
> (I've seen other material claiming 1/16th instead of 1/8th.)
>=20
> There is also:
>=20
> bit 32 is: en_mck Enable external machine check interrupts (preferred =
state equals =E2=80=981=E2=80=99).
>=20
> HID1 has (note the "must be 1 for proper functioning" example):
>=20
> bit 5 is: en_ic Enable instruction cache (must be =E2=80=981=E2=80=99 =
for proper functioning).
>=20
> bit 10 is: en_if_cach Enable instruction fetch cacheability control. =
With:
> 	=E2=80=A2 0  All instruction fetch accesses are treated as cache =
inhibited regardless of
> the state of the I bit in the page table.
> 	=E2=80=A2 1  Instruction fetch cacheability is controlled by the =
state of the I bit in the
> page table (preferred state).
>=20
> (I'll not list other cache/link-stack//tablewalks related material. =
There
> are some with "preferred state equals '1'".)
>=20
>=20
> I do not see why either of HID0 or  HID1 has a reason to be later than
> where I put them (relative to other activities). Why do you want them
> to be later?


My test will still have changes to allow world to operate
on the 970MP (by avoiding cmpb instructions):

# svnlite diff /mnt/usr/src/
Index: /mnt/usr/src/lib/libc/powerpc64/string/strcmp.S
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- /mnt/usr/src/lib/libc/powerpc64/string/strcmp.S	(revision =
347463)
+++ /mnt/usr/src/lib/libc/powerpc64/string/strcmp.S	(working copy)
@@ -88,9 +88,16 @@
 .Lstrcmp_compare_by_word:
 	ld	%r5,0(%r3)	/* Load double words. */
 	ld	%r6,0(%r4)
-	xor	%r8,%r8,%r8	/* %r8 <- Zero. */
+	lis     %r8,32639	/* 0x7f7f */
+	ori     %r8,%r8,32639	/* 0x7f7f7f7f */
+	rldimi  %r8,%r8,32,0	/* 0x7f7f7f7f'7f7f7f7f */
 	xor	%r0,%r5,%r6	/* Check if double words are different. =
*/
-	cmpb	%r7,%r5,%r8	/* Check if double words contain zero. =
*/
+				/* Check for zero vs. not bytes: */
+	and	%r9,%r5,%r8	/* 0x00->0x00, 0x80->0x00, =
other->ms-bit-in-byte=3D=3D0 */
+	add	%r9,%r9,%r8	/*     ->0x7f,     ->0x7f,      =
->ms-bit-in-byte=3D=3D1 */
+	nor	%r7,%r9,%r5	/*     ->0x80,     ->0x00,      =
->ms-bit-in-byte=3D=3D0 */
+	andc	%r7,%r7,%r8	/*     ->0x80,     ->0x00,      ->0x00 =
*/
+				/* sort of like cmpb %r7,%r5,%r8 for %r8 =
being zero */
=20
 	/*
 	 * If double words are different or contain zero,
@@ -104,7 +111,12 @@
 	ldu	%r5,8(%r3)	/* Load double words. */
 	ldu	%r6,8(%r4)
 	xor	%r0,%r5,%r6	/* Check if double words are different. =
*/
-	cmpb	%r7,%r5,%r8	/* Check if double words contain zero. =
*/
+				/* Check for zero vs. not bytes: */
+	and	%r9,%r5,%r8	/* 0x00->0x00, 0x80->0x00, =
other->ms-bit-in-byte=3D=3D0 */
+	add	%r9,%r9,%r8	/*     ->0x7f,     ->0x7f,      =
->ms-bit-in-byte=3D=3D1 */
+	nor	%r7,%r9,%r5	/*     ->0x80,     ->0x00,      =
->ms-bit-in-byte=3D=3D0 */
+	andc	%r7,%r7,%r8	/*     ->0x80,     ->0x00,      ->0x00 =
*/
+				/* sort of like cmpb %r7,%r5,%r8 for %r8 =
being zero */
=20
 	/*
 	 * If double words are different or contain zero,



=3D=3D=3D
Mark Millard
marklmi at yahoo.com
( dsl-only.net went
away in early 2018-Mar)




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?241A2C8D-2E9D-4E2E-8A78-3E4A17F0C46A>