Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 19 Mar 2014 12:48:31 -0600
From:      Ian Lepore <ian@FreeBSD.org>
To:        Wojciech Macek <wma@semihalf.com>
Cc:        freebsd-arm@FreeBSD.org
Subject:   Re: arm SMP on Cortex-A15
Message-ID:  <1395254911.80941.9.camel@revolution.hippie.lan>
In-Reply-To: <1395149146.1149.586.camel@revolution.hippie.lan>
References:  <CANsEV8euHTsfviiCMP_aet3qYiK2T-oK%2B-37eay7zAPH2S2vaA@mail.gmail.com> <20131220125638.GA5132@mail.bsdpad.com> <20131222092913.GA89153@mail.bsdpad.com> <CANsEV8fSoygoSUyQqKoEQ7tRxjqDOwrPD8dU7O2V2PXRj35j4A@mail.gmail.com> <20131222123636.GA61193@ci0.org> <CANsEV8fWvUkFHi8DP6Nr807RwPDB1iZrO39fpfa44qOkJPidZA@mail.gmail.com> <1395149146.1149.586.camel@revolution.hippie.lan>

next in thread | previous in thread | raw e-mail | index | archive | help

--=-YwiFRevDQYDNGbo/Gkof
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit

On Tue, 2014-03-18 at 07:25 -0600, Ian Lepore wrote:
> On Mon, 2014-03-17 at 09:29 +0100, Wojciech Macek wrote:
> > Hi,
> > 
> > Finally I've found some time to continue SMP hacking. It seems that I
> > isolated the tlb/pmam failures and developed two simple patches that help.
> > There are still some pmap changes and TEX remap left, but I don't want to
> > use them now.
> > https://drive.google.com/folderview?id=0B-7yTLrPxaWtSzZPUGgtM3pnUjg&usp=sharing
> > * 01 - ensure that TTB is set before TLB invalidation and flush BTB to
> > comply the specs
> > * 02 - add missing TLB invalidations to pmap and fix invalidation order
> > 
> > I chose buildworld -j4 as a stresstest, and run it on Arndale (USB rootfs)
> > and a different 4-core a15 chip (SATA rootfs). On both setups test passed
> > and was significantly faster than the one with previous patchset.
> > 
> > I'd like to submit these changes to FreeBSD tree (with some help from our
> > local committers), so any comments and testing are really appreciated.
> > 
> > Best regards,
> > Wojtek
> 
> The first patch looks fine and is working without any problems for me.
> 
> For the second patch, I propose the attached similar patch which
> combines your changes with some I got from Olivier.  The main
> differences are moving the tlb flush outside the loop when propagating a
> change to all L1s, and moving the tlb flush (rather than adding another)
> in pmap_kenter_internal().
> 
> I believe even with the second patch there may still be some missing tlb
> flushes.
> 
> -- Ian

Following up with a third version of the pmap-v6.c patch.  On top of the
previous versions, this:

      * ensures that cpu_cpwait() is consistantly used after every tlb
        flush (sometimes it's a single wait after flushes that happen in
        a loop).
      * adds a tlb flush to pmap_free_l2_bucket()
      * adds a tlb flush to pmap_bootstrap()
      * adds a tlb flush to pmap_grow_map()
      * adds a tlb flush to pmap_grow_l2_bucket()
      * adds a tlb flush to pmap_kenter_section()

I'm not sure there's any armv6/7 platform that needs the cpu_cpwait(),
but if it's going to appear in the code at all, it should at least be
consisant. :)

-- Ian


--=-YwiFRevDQYDNGbo/Gkof
Content-Disposition: inline; filename="smp_patch_02b.patch"
Content-Type: text/x-patch; name="smp_patch_02b.patch"; charset="us-ascii"
Content-Transfer-Encoding: 7bit

Index: sys/arm/arm/pmap-v6.c
===================================================================
--- sys/arm/arm/pmap-v6.c	(revision 263112)
+++ sys/arm/arm/pmap-v6.c	(working copy)
@@ -844,6 +844,8 @@ pmap_free_l2_bucket(pmap_t pmap, struct l2_bucket
 	if (l1pd == (L1_C_DOM(pmap->pm_domain) | L1_TYPE_C)) {
 		*pl1pd = 0;
 		PTE_SYNC(pl1pd);
+		cpu_tlb_flushD_SE((vm_offset_t)ptep);
+		cpu_cpwait();
 	}
 
 	/*
@@ -1047,6 +1049,7 @@ small_mappings:
 				cpu_tlb_flushID_SE(pv->pv_va);
 			else if (PTE_BEEN_REFD(opte))
 				cpu_tlb_flushD_SE(pv->pv_va);
+			cpu_cpwait();
 		}
 
 		PMAP_UNLOCK(pmap);
@@ -1644,7 +1647,7 @@ pmap_postinit(void)
 				*ptep = pte;
 				PTE_SYNC(ptep);
 				cpu_tlb_flushD_SE(va);
-
+				cpu_cpwait();
 				va += PAGE_SIZE;
 		}
 		pmap_init_l1(l1, pl1pt);
@@ -1948,6 +1951,8 @@ pmap_bootstrap(vm_offset_t firstaddr, struct pv_ad
 	pmap_init_l1(l1, kernel_l1pt);
 	cpu_dcache_wbinv_all();
 	cpu_l2cache_wbinv_all();
+	cpu_tlb_flushID();
+	cpu_cpwait();
 
 	virtual_avail = round_page(virtual_avail);
 	virtual_end = vm_max_kernel_address;
@@ -2034,7 +2039,8 @@ pmap_grow_map(vm_offset_t va, pt_entry_t cache_mod
 	*ptep = L2_S_PROTO | pa | cache_mode | L2_S_REF;
 	pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE, 0);
 	PTE_SYNC(ptep);
-
+	cpu_tlb_flushD_SE(va);
+	cpu_cpwait();
 	return (0);
 }
 
@@ -2130,6 +2136,8 @@ pmap_grow_l2_bucket(pmap_t pmap, vm_offset_t va)
 			    L1_C_PROTO;
 			PTE_SYNC(pl1pd);
 	}
+	cpu_tlb_flushID_SE(va);
+	cpu_cpwait();
 
 	return (l2b);
 }
@@ -2348,6 +2356,8 @@ pmap_kenter_section(vm_offset_t va, vm_offset_t pa
 		l1->l1_kva[L1_IDX(va)] = pd;
 		PTE_SYNC(&l1->l1_kva[L1_IDX(va)]);
 	}
+	cpu_tlb_flushD_SE(va);
+	cpu_cpwait();
 }
 
 /*
@@ -2387,14 +2397,6 @@ pmap_kenter_internal(vm_offset_t va, vm_offset_t p
 
 	ptep = &l2b->l2b_kva[l2pte_index(va)];
 	opte = *ptep;
-	if (l2pte_valid(opte)) {
-		cpu_tlb_flushD_SE(va);
-		cpu_cpwait();
-	} else {
-		if (opte == 0)
-			l2b->l2b_occupancy++;
-	}
-
 	if (flags & KENTER_CACHE) {
 		*ptep = L2_S_PROTO | pa | pte_l2_s_cache_mode | L2_S_REF;
 		pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE,
@@ -2405,10 +2407,17 @@ pmap_kenter_internal(vm_offset_t va, vm_offset_t p
 		    0);
 	}
 
+	PTE_SYNC(ptep);
+	if (l2pte_valid(opte)) {
+		cpu_tlb_flushD_SE(va);
+	} else {
+		if (opte == 0)
+			l2b->l2b_occupancy++;
+	}
+	cpu_cpwait();
+
 	PDEBUG(1, printf("pmap_kenter: pte = %08x, opte = %08x, npte = %08x\n",
 	    (uint32_t) ptep, opte, *ptep));
-	PTE_SYNC(ptep);
-	cpu_cpwait();
 }
 
 void
@@ -2474,10 +2483,10 @@ pmap_kremove(vm_offset_t va)
 	opte = *ptep;
 	if (l2pte_valid(opte)) {
 		va = va & ~PAGE_MASK;
+		*ptep = 0;
+		PTE_SYNC(ptep);
 		cpu_tlb_flushD_SE(va);
 		cpu_cpwait();
-		*ptep = 0;
-		PTE_SYNC(ptep);
 	}
 }
 
@@ -2710,6 +2719,7 @@ small_mappings:
 			cpu_tlb_flushID();
 		else
 			cpu_tlb_flushD();
+		cpu_cpwait();
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_wunlock(&pvh_global_lock);
@@ -2763,6 +2773,7 @@ pmap_change_attr(vm_offset_t sva, vm_size_t len, i
 		pmap_l2cache_wbinv_range(tmpva, pte & L2_S_FRAME, PAGE_SIZE);
 		*ptep = pte;
 		cpu_tlb_flushID_SE(tmpva);
+		cpu_cpwait();
 
 		dprintf("%s: for va:%x ptep:%x pte:%x\n",
 		    __func__, tmpva, (uint32_t)ptep, pte);
@@ -2900,6 +2911,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offs
 		else
 		if (is_refd)
 			cpu_tlb_flushD();
+		cpu_cpwait();
 	}
 	rw_wunlock(&pvh_global_lock);
 
@@ -3166,6 +3178,7 @@ validate:
 			cpu_tlb_flushID_SE(va);
 		else if (is_refd)
 			cpu_tlb_flushD_SE(va);
+		cpu_cpwait();
 	}
 
 	if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
@@ -3713,6 +3726,7 @@ pmap_remove_section(pmap_t pmap, vm_offset_t sva)
 		cpu_tlb_flushID_SE(sva);
 	else
 		cpu_tlb_flushD_SE(sva);
+	cpu_cpwait();
 }
 
 /*
@@ -3885,6 +3899,7 @@ pmap_promote_section(pmap_t pmap, vm_offset_t va)
 		cpu_tlb_flushID();
 	else
 		cpu_tlb_flushD();
+	cpu_cpwait();
 
 	pmap_section_promotions++;
 	CTR2(KTR_PMAP, "pmap_promote_section: success for va %#x"
@@ -4009,6 +4024,7 @@ pmap_demote_section(pmap_t pmap, vm_offset_t va)
 		cpu_tlb_flushID_SE(va);
 	else if (L1_S_REFERENCED(l1pd))
 		cpu_tlb_flushD_SE(va);
+	cpu_cpwait();
 
 	pmap_section_demotions++;
 	CTR2(KTR_PMAP, "pmap_demote_section: success for va %#x"
@@ -4380,6 +4396,8 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse
 				}
 			}
 
+			*ptep = 0;
+			PTE_SYNC(ptep);
 			if (pmap_is_current(pmap)) {
 				total++;
 				if (total < PMAP_REMOVE_CLEAN_LIST_SIZE) {
@@ -4390,8 +4408,6 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse
 				} else if (total == PMAP_REMOVE_CLEAN_LIST_SIZE)
 					flushall = 1;
 			}
-			*ptep = 0;
-			PTE_SYNC(ptep);
 
 			sva += PAGE_SIZE;
 			ptep++;
@@ -4404,6 +4420,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse
 	rw_wunlock(&pvh_global_lock);
 	if (flushall)
 		cpu_tlb_flushID();
+	cpu_cpwait();
 	PMAP_UNLOCK(pmap);
 }
 
@@ -4923,6 +4940,7 @@ pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offse
 			}
 		}
 	}
+	cpu_cpwait();
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }

--=-YwiFRevDQYDNGbo/Gkof--




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?1395254911.80941.9.camel>