Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 10 Jun 2013 00:56:16 +0200
From:      Olivier Houchard <cognet@ci0.org>
To:        Andrew Turner <andrew@fubar.geek.nz>
Cc:        freebsd-arm@freebsd.org
Subject:   Re: RFC: Patches with AXP support and pmap&smp fixes.
Message-ID:  <20130609225615.GA42548@ci0.org>
In-Reply-To: <20130602130713.70b0b9f0@bender.Home>
References:  <517E8610.5050204@semihalf.com> <20130430142701.5bbfec2b@bender.lan> <20130430143311.GA71966@ci0.org> <518775B4.1010308@semihalf.com> <20130506134129.GA60131@ci0.org> <20130602130713.70b0b9f0@bender.Home>

next in thread | previous in thread | raw e-mail | index | archive | help

--7JfCtLOvnd9MIVvH
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Sun, Jun 02, 2013 at 01:07:13PM +0100, Andrew Turner wrote:
> On Mon, 6 May 2013 15:41:29 +0200
> Olivier Houchard <cognet@ci0.org> wrote:
> 
> > On Mon, May 06, 2013 at 11:19:48AM +0200, Grzegorz Bernacki wrote:
> > > 
> > > Hi,
> > > 
> > > Our patch fixes only initialization of pcpu in pcpu_init(). In 4
> > > cores setup it is possible the at least two cores simultaneously
> > > updating queue of pcpus which causes corruption.
> > > I am not aware of any other problems with SMP. Let us try setup
> > > with WITNESS enabled and we'll see if we have the same problem.
> > > Olivier, could you share your patch? In case we have the problem we 
> > > would like to have it.
> > > 
> > 
> > Hi,
> > 
> > Sure I will dust it off, and send it. Maybe not before Sunday though,
> > I'll be mostly MIA for the week. I'm quite interested in you guys
> > having a look at it.
> 
> Did you manage to extract the patch? I don't remember seeing it.
> 
> Andrew

Hi Andrew,

Sorry for the long delay. I finally took time to update it to the latest
-CURRENT, and check that it boots.

Regards,

Olivier

--7JfCtLOvnd9MIVvH
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="pcpu-patch.diff"

Index: arm/arm/cpufunc_asm_armv7.S
===================================================================
--- arm/arm/cpufunc_asm_armv7.S	(revision 251584)
+++ arm/arm/cpufunc_asm_armv7.S	(working copy)
@@ -57,9 +57,9 @@
 #define PT_OUTER_WBWA	(1 << 3)
 	
 #ifdef SMP
-#define PT_ATTR	(PT_S|PT_INNER_WT|PT_OUTER_WT|PT_NOS)
+#define PT_ATTR	(PT_S|PT_INNER_WBWA|PT_OUTER_WBWA|PT_NOS)
 #else
-#define PT_ATTR	(PT_INNER_WT|PT_OUTER_WT)
+#define PT_ATTR	(PT_INNER_WBWA|PT_OUTER_WBWA)
 #endif
 
 ENTRY(armv7_setttb)
@@ -98,7 +98,7 @@
 	ldr	r1, .Lpage_mask
 	bic	r0, r0, r1
 #ifdef SMP
-	mcr	p15, 0, r0, c8, c3, 1	/* flush D tlb single entry Inner Shareable*/
+	mcr	p15, 0, r0, c8, c3, 3	/* flush D tlb single entry Inner Shareable*/
 	mcr	p15, 0, r0, c7, c1, 6	/* flush BTB Inner Shareable */
 #else
 	mcr	p15, 0, r0, c8, c7, 1	/* flush D tlb single entry */
@@ -113,6 +113,7 @@
 ENTRY(armv7_dcache_wbinv_all)
 	stmdb	sp!, {r4, r5, r6, r7, r8, r9}
 
+	dsb
 	/* Get cache level */
 	ldr	r0, .Lcoherency_level
 	ldr	r3, [r0]
@@ -188,6 +189,7 @@
 	and	r2, r0, r3
 	add	r1, r1, r2
 	bic	r0, r0, r3
+	dsb
 .Larmv7_wb_next:
 	mcr	p15, 0, r0, c7, c10, 1	/* Clean D cache SE with VA */
 	add	r0, r0, ip
@@ -203,6 +205,7 @@
 	and     r2, r0, r3
 	add     r1, r1, r2
 	bic     r0, r0, r3
+	dsb
 .Larmv7_wbinv_next:
 	mcr	p15, 0, r0, c7, c14, 1	/* Purge D cache SE with VA */
 	add	r0, r0, ip
@@ -222,6 +225,7 @@
 	and     r2, r0, r3
 	add     r1, r1, r2
 	bic     r0, r0, r3
+	dsb
 .Larmv7_inv_next:
 	mcr	p15, 0, r0, c7, c6, 1	/* Invalidate D cache SE with VA */
 	add	r0, r0, ip
@@ -237,6 +241,7 @@
 	and     r2, r0, r3
 	add     r1, r1, r2
 	bic     r0, r0, r3
+	dsb
 .Larmv7_id_wbinv_next:
 	mcr	p15, 0, r0, c7, c5, 1	/* Invalidate I cache SE with VA */
 	mcr	p15, 0, r0, c7, c14, 1	/* Purge D cache SE with VA */
Index: arm/arm/copystr.S
===================================================================
--- arm/arm/copystr.S	(revision 251584)
+++ arm/arm/copystr.S	(working copy)
@@ -51,14 +51,12 @@
 	.align	0
 
 #ifdef _ARM_ARCH_6
-#define GET_PCB(tmp) \
-	mrc p15, 0, tmp, c13, c0, 4; \
-	add	tmp, tmp, #(PC_CURPCB)
+KSTACK_LOCALS
 #else
 .Lpcb:
 	.word	_C_LABEL(__pcpu) + PC_CURPCB
 
-#define GET_PCB(tmp) \
+#define GET_PCB(tmp, tmp2) \
 	ldr	tmp, .Lpcb
 #endif
 
@@ -114,8 +112,7 @@
 	moveq	r0, #ENAMETOOLONG
 	beq	2f
 
-	GET_PCB(r4)
-	ldr	r4, [r4]
+	GET_PCB(r4, r5)
 
 #ifdef DIAGNOSTIC
 	teq	r4, #0x00000000
@@ -162,8 +159,7 @@
 	moveq	r0, #ENAMETOOLONG
 	beq	2f
 
-	GET_PCB(r4)
-	ldr	r4, [r4]
+	GET_PCB(r4, r5)
 
 #ifdef DIAGNOSTIC
 	teq	r4, #0x00000000
Index: arm/arm/genassym.c
===================================================================
--- arm/arm/genassym.c	(revision 251584)
+++ arm/arm/genassym.c	(working copy)
@@ -25,6 +25,8 @@
  *
  */
 
+#include "opt_kstack_max_pages.h"
+
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
@@ -69,6 +71,7 @@
 ASSYM(PCB_R12, offsetof(struct pcb, un_32.pcb32_r12));
 ASSYM(PCB_PC, offsetof(struct pcb, un_32.pcb32_pc));
 ASSYM(PCB_SP, offsetof(struct pcb, un_32.pcb32_sp));
+ASSYM(PCB_CURTHREAD, offsetof(struct pcb, un_32.pcb32_curthread));
 
 ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
 ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
@@ -138,3 +141,6 @@
 
 ASSYM(MAXCOMLEN, MAXCOMLEN);
 ASSYM(NIRQ, NIRQ);
+
+ASSYM(KSTACK_ADDR_MASK, KSTACK_MAX_PAGES * PAGE_SIZE - 1);
+ASSYM(KSTACK_PCB_ADDR, KSTACK_MAX_PAGES * PAGE_SIZE - sizeof(struct pcb));
Index: arm/arm/mp_machdep.c
===================================================================
--- arm/arm/mp_machdep.c	(revision 251584)
+++ arm/arm/mp_machdep.c	(working copy)
@@ -160,12 +160,13 @@
 }
 
 extern vm_paddr_t pmap_pa;
+
+void init_secondary_finish(void);
+
 void
 init_secondary(int cpu)
 {
 	struct pcpu *pc;
-	uint32_t loop_counter;
-	int start = 0, end = 0;
 
 	cpu_setup(NULL);
 	setttb(pmap_pa);
@@ -195,7 +196,6 @@
 		;
 
 	/* Initialize curthread */
-	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	pc->pc_curthread = pc->pc_idlethread;
 	pc->pc_curpcb = pc->pc_idlethread->td_pcb;
 #ifdef ARM_VFP_SUPPORT
@@ -203,7 +203,16 @@
 
 	vfp_init();
 #endif
+	__asm __volatile("mov sp, %0\n"
+	    		"b init_secondary_finish\n" : : "r" (pc->pc_idlethread->td_pcb->un_32.pcb32_sp));
+}
 
+void 
+init_secondary_finish(void)
+{
+	int start = 0, end = 0;
+	uint32_t loop_counter;
+
 	mtx_lock_spin(&ap_boot_mtx);
 
 	atomic_add_rel_32(&smp_cpus, 1);
Index: arm/arm/machdep.c
===================================================================
--- arm/arm/machdep.c	(revision 251584)
+++ arm/arm/machdep.c	(working copy)
@@ -42,6 +42,7 @@
  * Updated	: 18/04/01 updated for new wscons
  */
 
+#include "opt_kstack_max_pages.h"
 #include "opt_compat.h"
 #include "opt_ddb.h"
 #include "opt_platform.h"
@@ -106,6 +107,7 @@
 #include <dev/ofw/openfirm.h>
 #endif
 
+#define DEBUG
 #ifdef DEBUG
 #define	debugf(fmt, args...) printf(fmt, ##args)
 #else
@@ -398,6 +400,7 @@
 	    USPACE_UNDEF_STACK_TOP;
 	pcb->un_32.pcb32_sp = (u_int)thread0.td_kstack +
 	    USPACE_SVC_STACK_TOP;
+	pcb->un_32.pcb32_curthread = &thread0;
 	vector_page_setprot(VM_PROT_READ);
 	pmap_set_pcb_pagedir(pmap_kernel(), pcb);
 	pmap_postinit();
@@ -874,9 +877,9 @@
 	set_pcpu(pcpup);
 #endif
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
-	PCPU_SET(curthread, &thread0);
+	pcpup->pc_curthread = &thread0;
 #ifdef ARM_VFP_SUPPORT
-	PCPU_SET(cpu, 0);
+	pcpup->pc_cpu = 0;
 #endif
 }
 
@@ -1028,6 +1031,7 @@
 	thread0.td_pcb = (struct pcb *)
 		(thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
 	thread0.td_pcb->pcb_flags = 0;
+	thread0.td_pcb->un_32.pcb32_curthread = &thread0;
 	thread0.td_frame = &proc0_tf;
 	pcpup->pc_curpcb = thread0.td_pcb;
 }
@@ -1181,6 +1185,12 @@
 	phys_avail[j + 1] = 0;
 }
 
+void do_putc(char);
+void do_putc(char c)
+{
+		*(volatile char *)0x48020000 = c;
+}
+
 void *
 initarm(struct arm_boot_params *abp)
 {
@@ -1232,8 +1242,10 @@
 
 	/* Grab physical memory regions information from device tree. */
 	if (fdt_get_mem_regions(memory_regions, &memory_regions_sz,
-	    &memsize) != 0)
+	    &memsize) != 0) {
+		do_putc('a');
 		while(1);
+	}
 
 	/* Grab physical memory regions information from device tree. */
 	if (fdt_get_reserved_regions(reserved_regions, &reserved_regions_sz) != 0)
@@ -1364,8 +1376,20 @@
 	valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU);
 	valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU);
 	valloc_pages(undstack, UND_STACK_SIZE * MAXCPU);
-	valloc_pages(kernelstack, KSTACK_PAGES * MAXCPU);
 	valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
+	if ((KSTACK_MAX_PAGES * PAGE_SIZE) - 
+	    (freemempos & (KSTACK_MAX_MASK)) >= KSTACK_PAGES)
+		freemempos = (freemempos &~ KSTACK_MAX_MASK) +
+		    (KSTACK_MAX_PAGES * PAGE_SIZE) - 
+		    (KSTACK_PAGES * PAGE_SIZE);
+	
+	else
+		freemempos = (freemempos &~ KSTACK_MAX_MASK) + 
+		(KSTACK_MAX_PAGES * PAGE_SIZE) +
+		2 * (KSTACK_MAX_PAGES * PAGE_SIZE) -
+		(KSTACK_PAGES * PAGE_SIZE);
+		
+	valloc_pages(kernelstack, KSTACK_PAGES);
 
 	/*
 	 * Now we start construction of the L1 page table
@@ -1486,12 +1510,14 @@
 	init_proc0(kernelstack.pv_va);
 
 	arm_intrnames_init();
+	printf("hoho\n");
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
+	printf("hehe\n");
 	arm_dump_avail_init(memsize, sizeof(dump_avail) / sizeof(dump_avail[0]));
+	printf("plop\n");
 	pmap_bootstrap(freemempos, &kernel_l1pt);
+	printf("unplop\n");
 	msgbufp = (void *)msgbufpv.pv_va;
-	msgbufinit(msgbufp, msgbufsize);
-	mutex_init();
 
 	/*
 	 * Prepare map of physical memory regions available to vm subsystem.
@@ -1500,6 +1526,7 @@
 
 	init_param2(physmem);
 	kdb_init();
+	msgbufinit(msgbufp, msgbufsize);
 
 	return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP -
 	    sizeof(struct pcb)));
Index: arm/arm/fusu.S
===================================================================
--- arm/arm/fusu.S	(revision 251584)
+++ arm/arm/fusu.S	(working copy)
@@ -40,14 +40,13 @@
 __FBSDID("$FreeBSD$");
 
 #ifdef _ARM_ARCH_6
-#define GET_PCB(tmp) \
-	mrc p15, 0, tmp, c13, c0, 4; \
-	add	tmp, tmp, #(PC_CURPCB)
+KSTACK_LOCALS
 #else
 .Lcurpcb:
 	.word	_C_LABEL(__pcpu) + PC_CURPCB
-#define GET_PCB(tmp) \
-	ldr	tmp, .Lcurpcb
+#define GET_PCB(tmp, tmp2) \
+	ldr	tmp, .Lcurpcb; \
+	ldr	tmp, [tmp]
 #endif
 
 /*
@@ -57,21 +56,28 @@
 
 ENTRY_NP(casuword32)
 ENTRY(casuword)
-	GET_PCB(r3)
-	ldr	r3, [r3]
-
-#ifdef DIAGNOSTIC
-	teq	r3, #0x00000000
-	beq	.Lfusupcbfault
-#endif
 	stmfd	sp!, {r4, r5}
+	GET_PCB(r3, r4)
+
 	adr	r4, .Lcasuwordfault
 	str	r4, [r3, #PCB_ONFAULT]
+#ifdef _ARM_ARCH_6
+1:
+	ldrex	r5, [r0]
+	cmp	r5, r1
+	movne	r0, r5
+	bne	2f
+	strex	r5, r2, [r0]
+	cmp	r5, #0
+	bne	1b
+#else
 	ldrt	r5, [r0]
 	cmp	r5, r1
 	movne	r0, r5
 	streqt	r2, [r0]
+#endif
 	moveq	r0, r1
+2:
 	ldmfd	sp!, {r4, r5}
 	mov	r1, #0x00000000
 	str	r1, [r3, #PCB_ONFAULT]
@@ -97,14 +103,8 @@
 
 ENTRY_NP(fuword32)
 ENTRY(fuword)
-	GET_PCB(r2)
-	ldr	r2, [r2]
+	GET_PCB(r2, r3)
 
-#ifdef DIAGNOSTIC
-	teq	r2, #0x00000000
-	beq	.Lfusupcbfault
-#endif
-
 	adr	r1, .Lfusufault
 	str	r1, [r2, #PCB_ONFAULT]
 
@@ -123,14 +123,8 @@
  */
 
 ENTRY(fusword)
-	GET_PCB(r2)
-	ldr	r2, [r2]
+	GET_PCB(r2, r3)
 
-#ifdef DIAGNOSTIC
-	teq	r2, #0x00000000
-	beq	.Lfusupcbfault
-#endif
-
 	adr	r1, .Lfusufault
 	str	r1, [r2, #PCB_ONFAULT]
 
@@ -159,14 +153,8 @@
 	mvnne	r0, #0x00000000
 	RETne
 
-	GET_PCB(r2)
-	ldr	r2, [r2]
+	GET_PCB(r2, r3)
 
-#ifdef DIAGNOSTIC
-	teq	r2, #0x00000000
-	beq	.Lfusupcbfault
-#endif
-
 	adr	r1, _C_LABEL(fusubailout)
 	str	r1, [r2, #PCB_ONFAULT]
 
@@ -199,14 +187,8 @@
  */
 
 ENTRY(fubyte)
-	GET_PCB(r2)
-	ldr	r2, [r2]
+	GET_PCB(r2, r3)
 
-#ifdef DIAGNOSTIC
-	teq	r2, #0x00000000
-	beq	.Lfusupcbfault
-#endif
-
 	adr	r1, .Lfusufault
 	str	r1, [r2, #PCB_ONFAULT]
 
@@ -242,36 +224,15 @@
 	mvn	r0, #0x00000000
 	RET
 
-#ifdef DIAGNOSTIC
 /*
- * Handle earlier faults from [fs]u*(), due to no pcb
- */
-
-.Lfusupcbfault:
-	mov	r1, r0
-	adr	r0, fusupcbfaulttext
-	b	_C_LABEL(panic)
-
-fusupcbfaulttext:
-	.asciz	"Yikes - no valid PCB during fusuxxx() addr=%08x\n"
-	.align	0
-#endif
-
-/*
  * suword(caddr_t uaddr, int x);
  * Store an int in the user's address space.
  */
 
 ENTRY_NP(suword32)
 ENTRY(suword)
-	GET_PCB(r2)
-	ldr	r2, [r2]
+	GET_PCB(r2, r3)
 
-#ifdef DIAGNOSTIC
-	teq	r2, #0x00000000
-	beq	.Lfusupcbfault
-#endif
-
 	adr	r3, .Lfusufault
 	str	r3, [r2, #PCB_ONFAULT]
 
@@ -296,14 +257,8 @@
 	mvnne	r0, #0x00000000
 	RETne
 
-	GET_PCB(r2)
-	ldr	r2, [r2]
+	GET_PCB(r2, r3)
 
-#ifdef DIAGNOSTIC
-	teq	r2, #0x00000000
-	beq	.Lfusupcbfault
-#endif
-
 	adr	r3, _C_LABEL(fusubailout)
 	str	r3, [r2, #PCB_ONFAULT]
 
@@ -327,14 +282,8 @@
  */
 
 ENTRY(susword)
-	GET_PCB(r2)
-	ldr	r2, [r2]
+	GET_PCB(r2, r3)
 
-#ifdef DIAGNOSTIC
-	teq	r2, #0x00000000
-	beq	.Lfusupcbfault
-#endif
-
 	adr	r3, .Lfusufault
 	str	r3, [r2, #PCB_ONFAULT]
 
@@ -358,15 +307,8 @@
  */
 
 ENTRY(subyte)
-	GET_PCB(r2)
-	ldr	r2, [r2]
+	GET_PCB(r2, r3)
 
-
-#ifdef DIAGNOSTIC
-	teq	r2, #0x00000000
-	beq	.Lfusupcbfault
-#endif
-
 	adr	r3, .Lfusufault
 	str	r3, [r2, #PCB_ONFAULT]
 
Index: arm/arm/gic.c
===================================================================
--- arm/arm/gic.c	(revision 251584)
+++ arm/arm/gic.c	(working copy)
@@ -253,7 +253,7 @@
 	active_irq &= 0x3FF;
 
 	if (active_irq == 0x3FF) {
-		if (last_irq == -1)
+		if (last_irq == -1 && 0)
 			printf("Spurious interrupt detected [0x%08x]\n", active_irq);
 		return -1;
 	}
Index: arm/arm/swtch.S
===================================================================
--- arm/arm/swtch.S	(revision 251584)
+++ arm/arm/swtch.S	(working copy)
@@ -192,7 +192,7 @@
 #endif
 
 	/* We have a new curthread now so make a note it */
-	GET_CURTHREAD_PTR(r6)
+	GET_PCPU_CURTHREAD_PTR(r6)
 	str	r5, [r6]
 
 	/* Set the new tp */
@@ -226,7 +226,7 @@
 
 	/* Process is now on a processor. */
 	/* We have a new curthread now so make a note it */
-	GET_CURTHREAD_PTR(r7)
+	GET_PCPU_CURTHREAD_PTR(r7)
 	str	r1, [r7]
 
 	/* Hook in a new pcb */
@@ -434,7 +434,11 @@
 	str	r6, [r4, #TD_LOCK]
 #if defined(SCHED_ULE) && defined(SMP)
 	ldr	r6, .Lblocked_lock
-	GET_CURTHREAD_PTR(r3)
+	/* 
+	 * It's fine to use GET_PCPU_CURTHREAD_PTR here, because interrupts
+	 * are disabled, so it will be atomic.
+	 */
+	GET_PCPU_CURTHREAD_PTR(r3)
 
 1:
 	ldr	r4, [r3, #TD_LOCK]
Index: arm/arm/bcopyinout.S
===================================================================
--- arm/arm/bcopyinout.S	(revision 251584)
+++ arm/arm/bcopyinout.S	(working copy)
@@ -39,6 +39,7 @@
 #include "assym.s"
 
 #include <machine/asm.h>
+#include <machine/asmacros.h>
 #include <sys/errno.h>
 
 .L_arm_memcpy:
@@ -55,14 +56,12 @@
 	.align	0
 
 #ifdef _ARM_ARCH_6
-#define GET_PCB(tmp) \
-	mrc p15, 0, tmp, c13, c0, 4; \
-	add	tmp, tmp, #(PC_CURPCB)
+KSTACK_LOCALS
 #else
 .Lcurpcb:
 	.word	_C_LABEL(__pcpu) + PC_CURPCB
 
-#define GET_PCB(tmp) \
+#define GET_PCB(tmp, tmp2) \
 	ldr	tmp, .Lcurpcb
 #endif
 
@@ -116,8 +115,7 @@
 
 .Lnormal:
 	SAVE_REGS
-	GET_PCB(r4)
-	ldr	r4, [r4]
+	GET_PCB(r4, r5)
 
 
 	ldr	r5, [r4, #PCB_ONFAULT]
@@ -354,8 +352,7 @@
 
 .Lnormale:
 	SAVE_REGS
-	GET_PCB(r4)
-	ldr	r4, [r4]
+	GET_PCB(r4, r5)
 
 	ldr	r5, [r4, #PCB_ONFAULT]
 	adr	r3, .Lcopyfault
@@ -549,8 +546,7 @@
  * else EFAULT if a page fault occurred.
  */
 ENTRY(badaddr_read_1)
-	GET_PCB(r2)
-	ldr	r2, [r2]
+	GET_PCB(r2, r3)
 
 	ldr	ip, [r2, #PCB_ONFAULT]
 	adr	r3, 1f
@@ -575,8 +571,7 @@
  * else EFAULT if a page fault occurred.
  */
 ENTRY(badaddr_read_2)
-	GET_PCB(r2)
-	ldr	r2, [r2]
+	GET_PCB(r2, r3)
 
 	ldr	ip, [r2, #PCB_ONFAULT]
 	adr	r3, 1f
@@ -601,8 +596,7 @@
  * else EFAULT if a page fault occurred.
  */
 ENTRY(badaddr_read_4)
-	GET_PCB(r2)
-	ldr	r2, [r2]
+	GET_PCB(r2, r3)
 
 	ldr	ip, [r2, #PCB_ONFAULT]
 	adr	r3, 1f
Index: arm/arm/pmap-v6.c
===================================================================
--- arm/arm/pmap-v6.c	(revision 251584)
+++ arm/arm/pmap-v6.c	(working copy)
@@ -1818,7 +1818,7 @@
 	cpu_tlb_flushID();
 	cpu_cpwait();
 	if (vector_page < KERNBASE) {
-		struct pcb *curpcb = PCPU_GET(curpcb);
+		struct pcb *_curpcb = curpcb;
 		pcb = thread0.td_pcb;
 		if (pmap_is_current(pmap)) {
 			/*
@@ -1838,10 +1838,10 @@
 		 * Make sure cpu_switch(), et al, DTRT. This is safe to do
 		 * since this process has no remaining mappings of its own.
 		 */
-		curpcb->pcb_pl1vec = pcb->pcb_pl1vec;
-		curpcb->pcb_l1vec = pcb->pcb_l1vec;
-		curpcb->pcb_dacr = pcb->pcb_dacr;
-		curpcb->pcb_pagedir = pcb->pcb_pagedir;
+		_curpcb->pcb_pl1vec = pcb->pcb_pl1vec;
+		_curpcb->pcb_l1vec = pcb->pcb_l1vec;
+		_curpcb->pcb_dacr = pcb->pcb_dacr;
+		_curpcb->pcb_pagedir = pcb->pcb_pagedir;
 
 	}
 	pmap_free_l1(pmap);
@@ -2229,6 +2229,8 @@
 pmap_kextract(vm_offset_t va)
 {
 
+	if (kernel_vm_end == 0)
+		return (0);
 	return (pmap_extract_locked(kernel_pmap, va));
 }
 
Index: arm/arm/vm_machdep.c
===================================================================
--- arm/arm/vm_machdep.c	(revision 251584)
+++ arm/arm/vm_machdep.c	(working copy)
@@ -131,6 +131,7 @@
 #endif
 	td2->td_pcb = pcb2;
 	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
+	pcb2->un_32.pcb32_curthread = td2;
 	mdp2 = &p2->p_md;
 	bcopy(&td1->td_proc->p_md, mdp2, sizeof(*mdp2));
 	pcb2->un_32.pcb32_und_sp = td2->td_kstack + USPACE_UNDEF_STACK_TOP;
@@ -344,6 +345,7 @@
 
 	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
 	bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb));
+	td->td_pcb->un_32.pcb32_curthread = td;
 	tf = td->td_frame;
 	sf = (struct switchframe *)tf - 1;
 	sf->sf_r4 = (u_int)fork_return;
@@ -406,6 +408,7 @@
 {
 	td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages *
 	    PAGE_SIZE) - 1;
+	td->td_pcb->un_32.pcb32_curthread = td;
 	/*
 	 * Ensure td_frame is aligned to an 8 byte boundary as it will be
 	 * placed into the stack pointer which must be 8 byte aligned in
Index: arm/arm/cpufunc.c
===================================================================
--- arm/arm/cpufunc.c	(revision 251584)
+++ arm/arm/cpufunc.c	(working copy)
@@ -1116,10 +1116,10 @@
 	
 	armv7_tlb_flushID,              /* tlb_flushID          */
 	armv7_tlb_flushID_SE,           /* tlb_flushID_SE       */
-	arm11_tlb_flushI,               /* tlb_flushI           */
-	arm11_tlb_flushI_SE,            /* tlb_flushI_SE        */
-	arm11_tlb_flushD,               /* tlb_flushD           */
-	arm11_tlb_flushD_SE,            /* tlb_flushD_SE        */
+	armv7_tlb_flushID,               /* tlb_flushI           */
+	armv7_tlb_flushID_SE,            /* tlb_flushI_SE        */
+	armv7_tlb_flushID,               /* tlb_flushD           */
+	armv7_tlb_flushID_SE,            /* tlb_flushD_SE        */
 	
 	/* Cache operations */
 	
Index: arm/arm/pmap.c
===================================================================
--- arm/arm/pmap.c	(revision 251584)
+++ arm/arm/pmap.c	(working copy)
@@ -2452,7 +2452,7 @@
 	pmap_tlb_flushID(pmap);
 	cpu_cpwait();
 	if (vector_page < KERNBASE) {
-		struct pcb *curpcb = PCPU_GET(curpcb);
+		struct pcb *_curpcb = curpcb;
 		pcb = thread0.td_pcb;
 		if (pmap_is_current(pmap)) {
 			/*
@@ -2472,10 +2472,10 @@
 		 * Make sure cpu_switch(), et al, DTRT. This is safe to do
 		 * since this process has no remaining mappings of its own.
 		 */
-		curpcb->pcb_pl1vec = pcb->pcb_pl1vec;
-		curpcb->pcb_l1vec = pcb->pcb_l1vec;
-		curpcb->pcb_dacr = pcb->pcb_dacr;
-		curpcb->pcb_pagedir = pcb->pcb_pagedir;
+		_curpcb->pcb_pl1vec = pcb->pcb_pl1vec;
+		_curpcb->pcb_l1vec = pcb->pcb_l1vec;
+		_curpcb->pcb_dacr = pcb->pcb_dacr;
+		_curpcb->pcb_pagedir = pcb->pcb_pagedir;
 
 	}
 	pmap_free_l1(pmap);
Index: arm/arm/locore.S
===================================================================
--- arm/arm/locore.S	(revision 251584)
+++ arm/arm/locore.S	(working copy)
@@ -225,6 +225,7 @@
 
 	/* init arm will return the new stack pointer. */
 	mov	sp, r0
+	bl	_C_LABEL(mutex_init)
 
 	bl	_C_LABEL(mi_startup)		/* call mi_startup()! */
 
@@ -261,6 +262,7 @@
 	MMU_INIT(PHYSADDR, PHYSADDR , 64, L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW))
 	/* map VA 0xc0000000..0xc3ffffff to PA */
 	MMU_INIT(KERNBASE, PHYSADDR, 64, L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW))
+	MMU_INIT(0x48000000, 0x48000000, 1, L1_TYPE_S|L1_SHARED|L1_S_C|L1_S_AP(AP_KRW))
 #else
 	MMU_INIT(PHYSADDR, PHYSADDR , 64, L1_TYPE_S|L1_SHARED|L1_S_C|L1_S_AP(AP_KRW))
 	/* map VA 0xc0000000..0xc3ffffff to PA */
Index: arm/arm/bcopyinout_xscale.S
===================================================================
--- arm/arm/bcopyinout_xscale.S	(revision 251584)
+++ arm/arm/bcopyinout_xscale.S	(working copy)
@@ -42,13 +42,11 @@
 	.align	0
 
 #ifdef _ARM_ARCH_6
-#define GET_PCB(tmp) \
-	mrc p15, 0, tmp, c13, c0, 4; \
-	add	tmp, tmp, #(PC_CURPCB)
+KSTACK_LOCALS
 #else
 .Lcurpcb:
 	.word	_C_LABEL(__pcpu) + PC_CURPCB
-#define GET_PCB(tmp) \
+#define GET_PCB(tmp, tmp2) \
 	ldr	tmp, .Lcurpcb
 #endif
 
@@ -88,8 +86,7 @@
 .Lnormal:
 	stmfd	sp!, {r10-r11, lr}
 
-	GET_PCB(r10)
-	ldr	r10, [r10]
+	GET_PCB(r10, r11)
 
 	mov	r3, #0x00
 	adr	ip, .Lcopyin_fault
@@ -530,8 +527,7 @@
 .Lnormale:									
 	stmfd	sp!, {r10-r11, lr}
 
-	GET_PCB(r10)
-	ldr	r10, [r10]
+	GET_PCB(r10, r11)
 
 	mov	r3, #0x00
 	adr	ip, .Lcopyout_fault
Index: arm/include/param.h
===================================================================
--- arm/include/param.h	(revision 251584)
+++ arm/include/param.h	(working copy)
@@ -124,6 +124,12 @@
 #define KSTACK_GUARD_PAGES	1
 #endif /* !KSTACK_GUARD_PAGES */
 
+#ifndef KSTACK_MAX_PAGES
+#define KSTACK_MAX_PAGES	32
+#endif /* !KSTACK_MAX_PAGES */
+
+#define KSTACK_MAX_MASK		((KSTACK_MAX_PAGES * PAGE_SIZE) - 1)
+
 #define USPACE_SVC_STACK_TOP		KSTACK_PAGES * PAGE_SIZE
 #define USPACE_SVC_STACK_BOTTOM		(USPACE_SVC_STACK_TOP - 0x1000)
 #define USPACE_UNDEF_STACK_TOP		(USPACE_SVC_STACK_BOTTOM - 0x10)
Index: arm/include/pcb.h
===================================================================
--- arm/include/pcb.h	(revision 251584)
+++ arm/include/pcb.h	(working copy)
@@ -63,6 +63,7 @@
 	u_int	pcb32_lr;
 	u_int	pcb32_pc;
 	u_int	pcb32_und_sp;
+	void *	pcb32_curthread;
 };
 #define	pcb_pagedir	un_32.pcb32_pagedir
 #define	pcb_pl1vec	un_32.pcb32_pl1vec
Index: arm/include/asmacros.h
===================================================================
--- arm/include/asmacros.h	(revision 251584)
+++ arm/include/asmacros.h	(working copy)
@@ -216,8 +216,23 @@
 name:
 
 #ifdef _ARM_ARCH_6
-#define	AST_LOCALS
-#define GET_CURTHREAD_PTR(tmp) \
+#define KSTACK_LOCALS \
+.Lkstack_consts:	\
+	.word KSTACK_ADDR_MASK;	\
+	.word KSTACK_PCB_ADDR;	
+#define	AST_LOCALS KSTACK_LOCALS
+
+#define GET_PCB(tmp, tmp2) \
+	ldr tmp2, .Lkstack_consts; \
+	bic tmp, sp, tmp2; \
+	ldr tmp2, .Lkstack_consts + 4; \
+	add tmp, tmp,  tmp2
+
+#define GET_CURTHREAD(tmp, tmp2) \
+	GET_PCB(tmp, tmp2); \
+	ldr tmp, [tmp, #PCB_CURTHREAD]
+
+#define GET_PCPU_CURTHREAD_PTR(tmp) \
 	mrc p15, 0, tmp, c13, c0, 4; \
 	add	tmp, tmp, #(PC_CURTHREAD)
 #else
@@ -225,8 +240,12 @@
 .Lcurthread:								;\
 	.word	_C_LABEL(__pcpu) + PC_CURTHREAD
 
-#define GET_CURTHREAD_PTR(tmp) \
+#define GET_PCPU_CURTHREAD_PTR(tmp) \
 	ldr	tmp, .Lcurthread
+
+#define GET_CURTHREAD(tmp, tmp2) \
+	ldr	tmp, .Lcurthread; \
+	ldr	tmp, [tmp]
 #endif
 
 #define	DO_AST								\
@@ -238,8 +257,7 @@
 	teq	r0, #(PSR_USR32_MODE)					;\
 	bne	2f			/* Nope, get out now */		;\
 	bic	r4, r4, #(I32_bit|F32_bit)				;\
-1:	GET_CURTHREAD_PTR(r5)						;\
-	ldr	r5, [r5]						;\
+1:	GET_CURTHREAD(r5, r6)						;\
 	ldr	r1, [r5, #(TD_FLAGS)]					;\
 	and	r1, r1, #(TDF_ASTPENDING|TDF_NEEDRESCHED)		;\
 	teq	r1, #0x00000000						;\
Index: arm/include/pcpu.h
===================================================================
--- arm/include/pcpu.h	(revision 251584)
+++ arm/include/pcpu.h	(working copy)
@@ -33,8 +33,14 @@
 #ifdef _KERNEL
 
 #include <machine/cpuconf.h>
+#include <machine/cpufunc.h>
 #include <machine/frame.h>
+#include <machine/pcb.h>
+#include <machine/atomic.h>
 
+#include <sys/types.h>
+#include <sys/systm.h>
+
 #define	ALT_STACK_SIZE	128
 
 struct vmspace;
@@ -94,15 +100,97 @@
 
 	__asm __volatile("mcr p15, 0, %0, c13, c0, 3" : : "r" (tls));
 }
+
+/*
+ * Evaluates to the byte offset of the per-cpu variable name.
+ */
+#define	__pcpu_offset(name)						\
+	__offsetof(struct pcpu, name)
+
+/*
+ * Evaluates to the type of the per-cpu variable name.
+ */
+#define	__pcpu_type(name)						\
+	__typeof(((struct pcpu *)0)->name)
+
+/*
+ * Evaluates to the address of the per-cpu variable name.
+ */
+#define	__PCPU_PTR(name) __extension__ ({				\
+	__pcpu_type(name) *__p;						\
+    	int __s;							\
+									\
+    	__s = disable_interrupts(I32_bit | F32_bit);			\
+	__p = &get_pcpu()->name;					\
+    	restore_interrupts(__s);					\
+	__p;								\
+})
+
+/*
+ * Evaluates to the value of the per-cpu variable name.
+ */
+#define	__PCPU_GET(name) __extension__ ({				\
+    	int __s;							\
+	__pcpu_type(name) __res;					\
+    	__s = disable_interrupts(I32_bit | F32_bit);			\
+	__res = get_pcpu()->name;					\
+    	restore_interrupts(__s);					\
+	__res;								\
+})
+
+/*
+ * Adds the value to the per-cpu counter name.  The implementation
+ * must be atomic with respect to interrupts.
+ */
+#define	__PCPU_ADD(name, val) do {					\
+	int __s ;							\
+	__s = disable_interrupts(I32_bit | F32_bit);			\
+	get_pcpu()->name += (val);					\
+	restore_interrupts(__s);					\
+} while (0)
+
+/*
+ * Sets the value of the per-cpu variable name to value val.
+ */
+#define	__PCPU_SET(name, val) {						\
+	int __s;							\
+	__s = disable_interrupts(I32_bit | F32_bit);			\
+	get_pcpu()->name = val;						\
+	restore_interrupts(__s);					\
+}
+
+#define	PCPU_GET(member)	__PCPU_GET(pc_ ## member)
+#define	PCPU_ADD(member, val)	__PCPU_ADD(pc_ ## member, val)
+#define	PCPU_INC(member)	PCPU_ADD(member, 1)
+#define	PCPU_PTR(member)	__PCPU_PTR(pc_ ## member)
+#define	PCPU_SET(member, val)	__PCPU_SET(pc_ ## member, val)
+
+static __inline struct pcb *
+__curpcb(void)
+{
+	register_t sp;
+
+	__asm __volatile("mov %0, sp" : "=r" (sp));
+	return ((struct pcb *)((sp &~ KSTACK_MAX_MASK) + 
+		(KSTACK_MAX_PAGES * PAGE_SIZE - sizeof(struct pcb))));
+}
+
+
+#define curpcb (__curpcb())
+#if 0
+#define curthread ((struct thread *)(curpcb->un_32.pcb32_curthread))
+#endif
+
+
 #else
 #define get_pcpu()	pcpup
-#endif
 
 #define	PCPU_GET(member)	(get_pcpu()->pc_ ## member)
 #define	PCPU_ADD(member, value)	(get_pcpu()->pc_ ## member += (value))
 #define	PCPU_INC(member)	PCPU_ADD(member, 1)
 #define	PCPU_PTR(member)	(&get_pcpu()->pc_ ## member)
 #define	PCPU_SET(member,value)	(get_pcpu()->pc_ ## member = (value))
+#endif
 
 void pcpu0_init(void);
 #endif	/* _KERNEL */
Index: arm/include/pmap.h
===================================================================
--- arm/include/pmap.h	(revision 251584)
+++ arm/include/pmap.h	(working copy)
@@ -63,7 +63,7 @@
 #endif
 #define PTE_CACHE	6
 #define PTE_DEVICE	2
-#define PTE_PAGETABLE	4
+#define PTE_PAGETABLE	6
 #else
 #define PTE_NOCACHE	1
 #define PTE_CACHE	2
@@ -463,7 +463,7 @@
 #if (ARM_MMU_SA1 == 1) && (ARM_NMMUS == 1)
 #define	PMAP_NEEDS_PTE_SYNC	1
 #define	PMAP_INCLUDE_PTE_SYNC
-#elif defined(CPU_XSCALE_81342)
+#elif defined(CPU_XSCALE_81342) || defined(ARM_ARCH_7A)
 #define PMAP_NEEDS_PTE_SYNC	1
 #define PMAP_INCLUDE_PTE_SYNC
 #elif (ARM_MMU_SA1 == 0)
@@ -529,11 +529,17 @@
 #define	PMAP_INCLUDE_PTE_SYNC
 #endif
 
+#ifdef ARM_L2_PIPT
+#define _sync_l2(pte, size) 	cpu_l2cache_wb_range(vtophys(pte), size)
+#else
+#define _sync_l2(pte, size) 	cpu_l2_cache_wb_range(pte, size)
+#endif
+
 #define	PTE_SYNC(pte)							\
 do {									\
 	if (PMAP_NEEDS_PTE_SYNC) {					\
 		cpu_dcache_wb_range((vm_offset_t)(pte), sizeof(pt_entry_t));\
-		cpu_l2cache_wb_range((vm_offset_t)(pte), sizeof(pt_entry_t));\
+		_sync_l2((vm_offset_t)(pte), sizeof(pt_entry_t));\
 	} else								\
 		cpu_drain_writebuf();					\
 } while (/*CONSTCOND*/0)
@@ -543,7 +549,7 @@
 	if (PMAP_NEEDS_PTE_SYNC) {					\
 		cpu_dcache_wb_range((vm_offset_t)(pte),			\
 		    (cnt) << 2); /* * sizeof(pt_entry_t) */		\
-		cpu_l2cache_wb_range((vm_offset_t)(pte), 		\
+		_sync_l2((vm_offset_t)(pte),		 		\
 		    (cnt) << 2); /* * sizeof(pt_entry_t) */		\
 	} else								\
 		cpu_drain_writebuf();					\
@@ -703,6 +709,21 @@
 void arm_unmap_nocache(void *, vm_size_t);
 
 extern vm_paddr_t dump_avail[];
+
+/* 
+ * We want to be able to determine the end address of the kstack, to be able
+ * to get the PCB address just using the SP.
+ */
+
+static __inline void
+pmap_align_stack(vm_offset_t *addr, vm_size_t length)
+{
+	if (((*addr + length) & (KSTACK_MAX_PAGES * PAGE_SIZE - 1)) == 0)
+		return;
+	*addr += (KSTACK_MAX_PAGES * PAGE_SIZE) - ((*addr + length) &
+			(KSTACK_MAX_PAGES * PAGE_SIZE - 1));
+}
+
 #endif	/* _KERNEL */
 
 #endif	/* !LOCORE */
Index: arm/ti/omap4/omap4_mp.c
===================================================================
--- arm/ti/omap4/omap4_mp.c	(revision 251584)
+++ arm/ti/omap4/omap4_mp.c	(working copy)
@@ -71,7 +71,7 @@
 		panic("Couldn't map the SCU\n");
 	/* Enable the SCU */
 	*(volatile unsigned int *)scu_addr |= 1;
-	//*(volatile unsigned int *)(scu_addr + 0x30) |= 1;
+	*(volatile unsigned int *)(scu_addr + 0x30) |= 1;
 	cpu_idcache_wbinv_all();
 	cpu_l2cache_wbinv_all();
 	ti_smc0(0x200, 0xfffffdff, MODIFY_AUX_CORE_0);
Index: vm/vm_map.c
===================================================================
--- vm/vm_map.c	(revision 251584)
+++ vm/vm_map.c	(working copy)
@@ -1465,6 +1465,11 @@
 				pmap_align_tlb(addr);
 				break;
 #endif
+#ifdef VMFS_STACK_ALIGNED_SPACE
+			case VMFS_STACK_ALIGNED_SPACE:
+				pmap_align_stack(addr, length);
+				break;
+#endif
 			default:
 				break;
 			}
@@ -1477,6 +1482,9 @@
 #ifdef VMFS_TLB_ALIGNED_SPACE
 	    || find_space == VMFS_TLB_ALIGNED_SPACE
 #endif
+#ifdef VMFS_STACK_ALIGNED_SPACE
+	    || find_space == VMFS_STACK_ALIGNED_SPACE
+#endif
 	    ));
 	vm_map_unlock(map);
 	return (result);
Index: vm/vm_map.h
===================================================================
--- vm/vm_map.h	(revision 251584)
+++ vm/vm_map.h	(working copy)
@@ -346,6 +346,9 @@
 #if defined(__mips__)
 #define	VMFS_TLB_ALIGNED_SPACE	3	/* find a TLB entry aligned range */
 #endif
+#if defined(__arm__)
+#define VMFS_STACK_ALIGNED_SPACE	4	/* Align stack so that the last addres is predictable */
+#endif
 
 /*
  * vm_map_wire and vm_map_unwire option flags
Index: vm/vm_glue.c
===================================================================
--- vm/vm_glue.c	(revision 251584)
+++ vm/vm_glue.c	(working copy)
@@ -371,6 +371,9 @@
 	 */
 	ks = kmem_alloc_nofault_space(kernel_map,
 	    (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE, VMFS_TLB_ALIGNED_SPACE);
+#elif defined(__arm__)
+	ks = kmem_alloc_nofault_space(kernel_map,
+	    (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE, VMFS_STACK_ALIGNED_SPACE);
 #else
 	ks = kmem_alloc_nofault(kernel_map,
 	   (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE);

--7JfCtLOvnd9MIVvH--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20130609225615.GA42548>