Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 15 Oct 2018 18:56:54 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r339367 - in head/sys: cddl/dev/fbt/riscv riscv/include riscv/riscv
Message-ID:  <201810151856.w9FIusEQ029384@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Mon Oct 15 18:56:54 2018
New Revision: 339367
URL: https://svnweb.freebsd.org/changeset/base/339367

Log:
  Various fixes for TLB management on RISC-V.
  
  - Remove the arm64-specific cpu_*cache* and cpu_tlb_flush* functions.
    Instead, add RISC-V specific inline functions in cpufunc.h for the
    fence.i and sfence.vma instructions.
  - Catch up to changes in the arm64 pmap and remove all the cpu_dcache_*
    calls, pmap_is_current, pmap_l3_valid_cacheable, and PTE_NEXT bits from
    pmap.
  - Remove references to the unimplemented riscv_setttb().
  - Remove unused cpu_nullop.
  - Add a link to the SBI doc to sbi.h.
  - Add support for a 4th argument in SBI calls.  It's not documented but
    it seems implied for the asid argument to SBI_REMOVE_SFENCE_VMA_ASID.
  - Pass the arguments from sbi_remote_sfence*() to the SEE.  BBL ignores
    them so this is just cosmetic.
  - Flush icaches on other CPUs when they resume from kdb in case the
    debugger wrote any breakpoints while the CPUs were paused in the IPI_STOP
    handler.
  - Add SMP vs UP versions of pmap_invalidate_* similar to amd64.  The
    UP versions just use simple fences.  The SMP versions use the
    sbi_remove_sfence*() functions to perform TLB shootdowns.  Since we
    don't have a valid pm_active field in the riscv pmap, just IPI all
    CPUs for all invalidations for now.
  - Remove an extraneous TLB flush from the end of pmap_bootstrap().
  - Don't do a TLB flush when writing new mappings in pmap_enter(), only if
    modifying an existing mapping.  Note that for COW faults a TLB flush is
    only performed after explicitly clearing the old mapping as is done in
    other pmaps.
  - Sync the i-cache on all harts before updating the PTE for executable
    mappings in pmap_enter and pmap_enter_quick.  Previously the i-cache was
    only sync'd after updating the PTE in pmap_enter.
  - Use sbi_remote_fence() instead of smp_rendezvous in pmap_sync_icache().
  
  Reviewed by:	markj
  Approved by:	re (gjb, kib)
  Sponsored by:	DARPA
  Differential Revision:	https://reviews.freebsd.org/D17414

Modified:
  head/sys/cddl/dev/fbt/riscv/fbt_isa.c
  head/sys/riscv/include/cpufunc.h
  head/sys/riscv/include/kdb.h
  head/sys/riscv/include/sbi.h
  head/sys/riscv/riscv/cpufunc_asm.S
  head/sys/riscv/riscv/db_interface.c
  head/sys/riscv/riscv/mp_machdep.c
  head/sys/riscv/riscv/pmap.c

Modified: head/sys/cddl/dev/fbt/riscv/fbt_isa.c
==============================================================================
--- head/sys/cddl/dev/fbt/riscv/fbt_isa.c	Mon Oct 15 18:39:33 2018	(r339366)
+++ head/sys/cddl/dev/fbt/riscv/fbt_isa.c	Mon Oct 15 18:56:54 2018	(r339367)
@@ -78,11 +78,11 @@ fbt_patch_tracepoint(fbt_probe_t *fbt, fbt_patchval_t 
 	switch(fbt->fbtp_patchval) {
 	case FBT_C_PATCHVAL:
 		*(uint16_t *)fbt->fbtp_patchpoint = (uint16_t)val;
-		cpu_icache_sync_range((vm_offset_t)fbt->fbtp_patchpoint, 2);
+		fence_i();
 		break;
 	case FBT_PATCHVAL:
 		*fbt->fbtp_patchpoint = val;
-		cpu_icache_sync_range((vm_offset_t)fbt->fbtp_patchpoint, 4);
+		fence_i();
 		break;
 	};
 }

Modified: head/sys/riscv/include/cpufunc.h
==============================================================================
--- head/sys/riscv/include/cpufunc.h	Mon Oct 15 18:39:33 2018	(r339366)
+++ head/sys/riscv/include/cpufunc.h	Mon Oct 15 18:56:54 2018	(r339367)
@@ -81,29 +81,32 @@ intr_enable(void)
 	);
 }
 
-#define	cpu_nullop()			riscv_nullop()
-#define	cpufunc_nullop()		riscv_nullop()
-#define	cpu_setttb(a)			riscv_setttb(a)
+/* NB: fence() is defined as a macro in <machine/atomic.h>. */
 
-#define	cpu_tlb_flushID()		riscv_tlb_flushID()
-#define	cpu_tlb_flushID_SE(e)		riscv_tlb_flushID_SE(e)
+static __inline void
+fence_i(void)
+{
 
-#define	cpu_dcache_wbinv_range(a, s)	riscv_dcache_wbinv_range((a), (s))
-#define	cpu_dcache_inv_range(a, s)	riscv_dcache_inv_range((a), (s))
-#define	cpu_dcache_wb_range(a, s)	riscv_dcache_wb_range((a), (s))
+	__asm __volatile("fence.i" ::: "memory");
+}
 
-#define	cpu_idcache_wbinv_range(a, s)	riscv_idcache_wbinv_range((a), (s))
-#define	cpu_icache_sync_range(a, s)	riscv_icache_sync_range((a), (s))
+static __inline void
+sfence_vma(void)
+{
 
+	__asm __volatile("sfence.vma" ::: "memory");
+}
+
+static __inline void
+sfence_vma_page(uintptr_t addr)
+{
+
+	__asm __volatile("sfence.vma %0" :: "r" (addr) : "memory");
+}
+
+#define	cpufunc_nullop()		riscv_nullop()
+
 void riscv_nullop(void);
-void riscv_setttb(vm_offset_t);
-void riscv_tlb_flushID(void);
-void riscv_tlb_flushID_SE(vm_offset_t);
-void riscv_icache_sync_range(vm_offset_t, vm_size_t);
-void riscv_idcache_wbinv_range(vm_offset_t, vm_size_t);
-void riscv_dcache_wbinv_range(vm_offset_t, vm_size_t);
-void riscv_dcache_inv_range(vm_offset_t, vm_size_t);
-void riscv_dcache_wb_range(vm_offset_t, vm_size_t);
 
 #endif	/* _KERNEL */
 #endif	/* _MACHINE_CPUFUNC_H_ */

Modified: head/sys/riscv/include/kdb.h
==============================================================================
--- head/sys/riscv/include/kdb.h	Mon Oct 15 18:39:33 2018	(r339366)
+++ head/sys/riscv/include/kdb.h	Mon Oct 15 18:56:54 2018	(r339367)
@@ -47,7 +47,11 @@ static __inline void
 kdb_cpu_sync_icache(unsigned char *addr, size_t size)
 {
 
-	cpu_icache_sync_range((vm_offset_t)addr, size);
+	/*
+	 * Other CPUs flush their instruction cache when resuming from
+	 * IPI_STOP.
+	 */
+	fence_i();
 }
 
 static __inline void

Modified: head/sys/riscv/include/sbi.h
==============================================================================
--- head/sys/riscv/include/sbi.h	Mon Oct 15 18:39:33 2018	(r339366)
+++ head/sys/riscv/include/sbi.h	Mon Oct 15 18:56:54 2018	(r339367)
@@ -47,18 +47,25 @@
 #define	SBI_REMOTE_SFENCE_VMA_ASID	7
 #define	SBI_SHUTDOWN			8
 
+/*
+ * Documentation available at
+ * https://github.com/riscv/riscv-sbi-doc/blob/master/riscv-sbi.md
+ */
+
 static __inline uint64_t
-sbi_call(uint64_t arg7, uint64_t arg0, uint64_t arg1, uint64_t arg2)
+sbi_call(uint64_t arg7, uint64_t arg0, uint64_t arg1, uint64_t arg2,
+    uint64_t arg3)
 {
-
 	register uintptr_t a0 __asm ("a0") = (uintptr_t)(arg0);
 	register uintptr_t a1 __asm ("a1") = (uintptr_t)(arg1);
 	register uintptr_t a2 __asm ("a2") = (uintptr_t)(arg2);
+	register uintptr_t a3 __asm ("a3") = (uintptr_t)(arg3);
 	register uintptr_t a7 __asm ("a7") = (uintptr_t)(arg7);
+
 	__asm __volatile(			\
 		"ecall"				\
 		:"+r"(a0)			\
-		:"r"(a1), "r"(a2), "r"(a7)	\
+		:"r"(a1), "r"(a2), "r" (a3), "r"(a7)	\
 		:"memory");
 
 	return (a0);
@@ -68,49 +75,49 @@ static __inline void
 sbi_console_putchar(int ch)
 {
 
-	sbi_call(SBI_CONSOLE_PUTCHAR, ch, 0, 0);
+	sbi_call(SBI_CONSOLE_PUTCHAR, ch, 0, 0, 0);
 }
 
 static __inline int
 sbi_console_getchar(void)
 {
 
-	return (sbi_call(SBI_CONSOLE_GETCHAR, 0, 0, 0));
+	return (sbi_call(SBI_CONSOLE_GETCHAR, 0, 0, 0, 0));
 }
 
 static __inline void
 sbi_set_timer(uint64_t val)
 {
 
-	sbi_call(SBI_SET_TIMER, val, 0, 0);
+	sbi_call(SBI_SET_TIMER, val, 0, 0, 0);
 }
 
 static __inline void
 sbi_shutdown(void)
 {
 
-	sbi_call(SBI_SHUTDOWN, 0, 0, 0);
+	sbi_call(SBI_SHUTDOWN, 0, 0, 0, 0);
 }
 
 static __inline void
 sbi_clear_ipi(void)
 {
 
-	sbi_call(SBI_CLEAR_IPI, 0, 0, 0);
+	sbi_call(SBI_CLEAR_IPI, 0, 0, 0, 0);
 }
 
 static __inline void
 sbi_send_ipi(const unsigned long *hart_mask)
 {
 
-	sbi_call(SBI_SEND_IPI, (uint64_t)hart_mask, 0, 0);
+	sbi_call(SBI_SEND_IPI, (uint64_t)hart_mask, 0, 0, 0);
 }
 
 static __inline void
 sbi_remote_fence_i(const unsigned long *hart_mask)
 {
 
-	sbi_call(SBI_REMOTE_FENCE_I, (uint64_t)hart_mask, 0, 0);
+	sbi_call(SBI_REMOTE_FENCE_I, (uint64_t)hart_mask, 0, 0, 0);
 }
 
 static __inline void
@@ -118,7 +125,7 @@ sbi_remote_sfence_vma(const unsigned long *hart_mask,
     unsigned long start, unsigned long size)
 {
 
-	sbi_call(SBI_REMOTE_SFENCE_VMA, (uint64_t)hart_mask, 0, 0);
+	sbi_call(SBI_REMOTE_SFENCE_VMA, (uint64_t)hart_mask, start, size, 0);
 }
 
 static __inline void
@@ -127,7 +134,8 @@ sbi_remote_sfence_vma_asid(const unsigned long *hart_m
     unsigned long asid)
 {
 
-	sbi_call(SBI_REMOTE_SFENCE_VMA_ASID, (uint64_t)hart_mask, 0, 0);
+	sbi_call(SBI_REMOTE_SFENCE_VMA_ASID, (uint64_t)hart_mask, start, size,
+	    asid);
 }
 
 #endif /* !_MACHINE_SBI_H_ */

Modified: head/sys/riscv/riscv/cpufunc_asm.S
==============================================================================
--- head/sys/riscv/riscv/cpufunc_asm.S	Mon Oct 15 18:39:33 2018	(r339366)
+++ head/sys/riscv/riscv/cpufunc_asm.S	Mon Oct 15 18:56:54 2018	(r339367)
@@ -33,70 +33,11 @@
  */
 
 #include <machine/asm.h>
-#include <machine/param.h>
 __FBSDID("$FreeBSD$");
 
 	.text
 	.align	2
 
-.Lpage_mask:
-	.word	PAGE_MASK
-
 ENTRY(riscv_nullop)
 	ret
 END(riscv_nullop)
-
-/*
- * Generic functions to read/modify/write the internal coprocessor registers
- */
-
-ENTRY(riscv_tlb_flushID)
-	sfence.vma
-	ret
-END(riscv_tlb_flushID)
-
-ENTRY(riscv_tlb_flushID_SE)
-	sfence.vma
-	ret
-END(riscv_tlb_flushID_SE)
-
-/*
- * void riscv_dcache_wb_range(vm_offset_t, vm_size_t)
- */
-ENTRY(riscv_dcache_wb_range)
-	sfence.vma
-	ret
-END(riscv_dcache_wb_range)
-
-/*
- * void riscv_dcache_wbinv_range(vm_offset_t, vm_size_t)
- */
-ENTRY(riscv_dcache_wbinv_range)
-	sfence.vma
-	ret
-END(riscv_dcache_wbinv_range)
-
-/*
- * void riscv_dcache_inv_range(vm_offset_t, vm_size_t)
- */
-ENTRY(riscv_dcache_inv_range)
-	sfence.vma
-	ret
-END(riscv_dcache_inv_range)
-
-/*
- * void riscv_idcache_wbinv_range(vm_offset_t, vm_size_t)
- */
-ENTRY(riscv_idcache_wbinv_range)
-	fence.i
-	sfence.vma
-	ret
-END(riscv_idcache_wbinv_range)
-
-/*
- * void riscv_icache_sync_range(vm_offset_t, vm_size_t)
- */
-ENTRY(riscv_icache_sync_range)
-	fence.i
-	ret
-END(riscv_icache_sync_range)

Modified: head/sys/riscv/riscv/db_interface.c
==============================================================================
--- head/sys/riscv/riscv/db_interface.c	Mon Oct 15 18:39:33 2018	(r339366)
+++ head/sys/riscv/riscv/db_interface.c	Mon Oct 15 18:56:54 2018	(r339367)
@@ -151,11 +151,8 @@ db_write_bytes(vm_offset_t addr, size_t size, char *da
 		while (size-- > 0)
 			*dst++ = *data++;
 
-		fence();
-
-		/* Clean D-cache and invalidate I-cache */
-		cpu_dcache_wb_range(addr, (vm_size_t)size);
-		cpu_icache_sync_range(addr, (vm_size_t)size);
+		/* Invalidate I-cache */
+		fence_i();
 	}
 	(void)kdb_jmpbuf(prev_jb);
 

Modified: head/sys/riscv/riscv/mp_machdep.c
==============================================================================
--- head/sys/riscv/riscv/mp_machdep.c	Mon Oct 15 18:39:33 2018	(r339366)
+++ head/sys/riscv/riscv/mp_machdep.c	Mon Oct 15 18:56:54 2018	(r339367)
@@ -328,6 +328,12 @@ ipi_handler(void *arg)
 			CPU_CLR_ATOMIC(cpu, &started_cpus);
 			CPU_CLR_ATOMIC(cpu, &stopped_cpus);
 			CTR0(KTR_SMP, "IPI_STOP (restart)");
+
+			/*
+			 * The kernel debugger might have set a breakpoint,
+			 * so flush the instruction cache.
+			 */
+			fence_i();
 			break;
 		case IPI_HARDCLOCK:
 			CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);

Modified: head/sys/riscv/riscv/pmap.c
==============================================================================
--- head/sys/riscv/riscv/pmap.c	Mon Oct 15 18:39:33 2018	(r339366)
+++ head/sys/riscv/riscv/pmap.c	Mon Oct 15 18:56:54 2018	(r339367)
@@ -152,6 +152,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/machdep.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
+#include <machine/sbi.h>
 
 #define	NPDEPG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define	NUPDE			(NPDEPG * NPDEPG)
@@ -364,31 +365,12 @@ pmap_is_write(pt_entry_t entry)
 }
 
 static __inline int
-pmap_is_current(pmap_t pmap)
-{
-
-	return ((pmap == pmap_kernel()) ||
-	    (pmap == curthread->td_proc->p_vmspace->vm_map.pmap));
-}
-
-static __inline int
 pmap_l3_valid(pt_entry_t l3)
 {
 
 	return (l3 & PTE_V);
 }
 
-static __inline int
-pmap_l3_valid_cacheable(pt_entry_t l3)
-{
-
-	/* TODO */
-
-	return (0);
-}
-
-#define	PTE_SYNC(pte)	cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte))
-
 static inline int
 pmap_page_accessed(pt_entry_t pte)
 {
@@ -514,14 +496,13 @@ pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t mi
 	dmap_phys_max = pa;
 	dmap_max_addr = va;
 
-	cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
-	cpu_tlb_flushID();
+	sfence_vma();
 }
 
 static vm_offset_t
 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
 {
-	vm_offset_t l2pt, l3pt;
+	vm_offset_t l3pt;
 	pt_entry_t entry;
 	pd_entry_t *l2;
 	vm_paddr_t pa;
@@ -532,7 +513,6 @@ pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm
 
 	l2 = pmap_l2(kernel_pmap, va);
 	l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1));
-	l2pt = (vm_offset_t)l2;
 	l2_slot = pmap_l2_index(va);
 	l3pt = l3_start;
 
@@ -550,10 +530,7 @@ pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm
 
 	/* Clean the L2 page table */
 	memset((void *)l3_start, 0, l3pt - l3_start);
-	cpu_dcache_wb_range(l3_start, l3pt - l3_start);
 
-	cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
-
 	return (l3pt);
 }
 
@@ -676,7 +653,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart,
 	freemempos = pmap_bootstrap_l3(l1pt,
 	    VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
 
-	cpu_tlb_flushID();
+	sfence_vma();
 
 #define alloc_pages(var, np)						\
 	(var) = freemempos;						\
@@ -732,8 +709,6 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart,
 	 * called something like "Maxphyspage".
 	 */
 	Maxmem = atop(phys_avail[avail_slot - 1]);
-
-	cpu_tlb_flushID();
 }
 
 /*
@@ -769,43 +744,99 @@ pmap_init(void)
 		rw_init(&pv_list_locks[i], "pmap pv list");
 }
 
+#ifdef SMP
 /*
- * Normal, non-SMP, invalidation functions.
- * We inline these within pmap.c for speed.
+ * For SMP, these functions have to use IPIs for coherence.
+ *
+ * In general, the calling thread uses a plain fence to order the
+ * writes to the page tables before invoking an SBI callback to invoke
+ * sfence_vma() on remote CPUs.
+ *
+ * Since the riscv pmap does not yet have a pm_active field, IPIs are
+ * sent to all CPUs in the system.
  */
-PMAP_INLINE void
+static void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
+	cpuset_t mask;
 
-	/* TODO */
-
 	sched_pin();
-	__asm __volatile("sfence.vma %0" :: "r" (va) : "memory");
+	mask = all_cpus;
+	CPU_CLR(PCPU_GET(cpuid), &mask);
+	fence();
+	sbi_remote_sfence_vma(mask.__bits, va, 1);
+	sfence_vma_page(va);
 	sched_unpin();
 }
 
-PMAP_INLINE void
+static void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
+	cpuset_t mask;
 
-	/* TODO */
-
 	sched_pin();
-	__asm __volatile("sfence.vma");
+	mask = all_cpus;
+	CPU_CLR(PCPU_GET(cpuid), &mask);
+	fence();
+	sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1);
+
+	/*
+	 * Might consider a loop of sfence_vma_page() for a small
+	 * number of pages in the future.
+	 */
+	sfence_vma();
 	sched_unpin();
 }
 
-PMAP_INLINE void
+static void
 pmap_invalidate_all(pmap_t pmap)
 {
+	cpuset_t mask;
 
-	/* TODO */
-
 	sched_pin();
-	__asm __volatile("sfence.vma");
+	mask = all_cpus;
+	CPU_CLR(PCPU_GET(cpuid), &mask);
+	fence();
+
+	/*
+	 * XXX: The SBI doc doesn't detail how to specify x0 as the
+	 * address to perform a global fence.  BBL currently treats
+	 * all sfence_vma requests as global however.
+	 */
+	sbi_remote_sfence_vma(mask.__bits, 0, 0);
 	sched_unpin();
 }
+#else
+/*
+ * Normal, non-SMP, invalidation functions.
+ * We inline these within pmap.c for speed.
+ */
+static __inline void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
 
+	sfence_vma_page(va);
+}
+
+static __inline void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+
+	/*
+	 * Might consider a loop of sfence_vma_page() for a small
+	 * number of pages in the future.
+	 */
+	sfence_vma();
+}
+
+static __inline void
+pmap_invalidate_all(pmap_t pmap)
+{
+
+	sfence_vma();
+}
+#endif
+
 /*
  *	Routine:	pmap_extract
  *	Function:
@@ -937,8 +968,6 @@ pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm
 		entry |= (pn << PTE_PPN0_S);
 		pmap_load_store(l3, entry);
 
-		PTE_SYNC(l3);
-
 		va += PAGE_SIZE;
 		pa += PAGE_SIZE;
 		size -= PAGE_SIZE;
@@ -958,11 +987,9 @@ pmap_kremove(vm_offset_t va)
 	l3 = pmap_l3(kernel_pmap, va);
 	KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
 
-	if (pmap_l3_valid_cacheable(pmap_load(l3)))
-		cpu_dcache_wb_range(va, L3_SIZE);
 	pmap_load_clear(l3);
-	PTE_SYNC(l3);
-	pmap_invalidate_page(kernel_pmap, va);
+
+	sfence_vma();
 }
 
 void
@@ -981,11 +1008,11 @@ pmap_kremove_device(vm_offset_t sva, vm_size_t size)
 		l3 = pmap_l3(kernel_pmap, va);
 		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
 		pmap_load_clear(l3);
-		PTE_SYNC(l3);
 
 		va += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
+
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
@@ -1039,7 +1066,6 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 		entry |= (pn << PTE_PPN0_S);
 		pmap_load_store(l3, entry);
 
-		PTE_SYNC(l3);
 		va += L3_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
@@ -1063,10 +1089,7 @@ pmap_qremove(vm_offset_t sva, int count)
 		l3 = pmap_l3(kernel_pmap, va);
 		KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
 
-		if (pmap_l3_valid_cacheable(pmap_load(l3)))
-			cpu_dcache_wb_range(va, L3_SIZE);
 		pmap_load_clear(l3);
-		PTE_SYNC(l3);
 
 		va += PAGE_SIZE;
 	}
@@ -1127,13 +1150,11 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t
 		l1 = pmap_l1(pmap, va);
 		pmap_load_clear(l1);
 		pmap_distribute_l1(pmap, pmap_l1_index(va), 0);
-		PTE_SYNC(l1);
 	} else {
 		/* PTE page */
 		pd_entry_t *l2;
 		l2 = pmap_l2(pmap, va);
 		pmap_load_clear(l2);
-		PTE_SYNC(l2);
 	}
 	pmap_resident_count_dec(pmap, 1);
 	if (m->pindex < NUPDE) {
@@ -1279,9 +1300,6 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, str
 		entry |= (pn << PTE_PPN0_S);
 		pmap_load_store(l1, entry);
 		pmap_distribute_l1(pmap, l1index, entry);
-
-		PTE_SYNC(l1);
-
 	} else {
 		vm_pindex_t l1index;
 		pd_entry_t *l1, *l2;
@@ -1310,8 +1328,6 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, str
 		entry = (PTE_V);
 		entry |= (pn << PTE_PPN0_S);
 		pmap_load_store(l2, entry);
-
-		PTE_SYNC(l2);
 	}
 
 	pmap_resident_count_inc(pmap, 1);
@@ -1445,8 +1461,6 @@ pmap_growkernel(vm_offset_t addr)
 			pmap_load_store(l1, entry);
 			pmap_distribute_l1(kernel_pmap,
 			    pmap_l1_index(kernel_vm_end), entry);
-
-			PTE_SYNC(l1);
 			continue; /* try again */
 		}
 		l2 = pmap_l1_to_l2(l1, kernel_vm_end);
@@ -1474,7 +1488,6 @@ pmap_growkernel(vm_offset_t addr)
 		entry |= (pn << PTE_PPN0_S);
 		pmap_load_store(l2, entry);
 
-		PTE_SYNC(l2);
 		pmap_invalidate_page(kernel_pmap, kernel_vm_end);
 
 		kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
@@ -1754,10 +1767,7 @@ pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-	if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3)))
-		cpu_dcache_wb_range(va, L3_SIZE);
 	old_l3 = pmap_load_clear(l3);
-	PTE_SYNC(l3);
 	pmap_invalidate_page(pmap, va);
 	if (old_l3 & PTE_SW_WIRED)
 		pmap->pm_stats.wired_count -= 1;
@@ -1913,11 +1923,7 @@ pmap_remove_all(vm_page_t m)
 		    "a block in %p's pv list", m));
 
 		l3 = pmap_l2_to_l3(l2, pv->pv_va);
-		if (pmap_is_current(pmap) &&
-		    pmap_l3_valid_cacheable(pmap_load(l3)))
-			cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
 		tl3 = pmap_load_clear(l3);
-		PTE_SYNC(l3);
 		pmap_invalidate_page(pmap, pv->pv_va);
 		if (tl3 & PTE_SW_WIRED)
 			pmap->pm_stats.wired_count--;
@@ -1947,7 +1953,7 @@ pmap_remove_all(vm_page_t m)
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
-	vm_offset_t va, va_next;
+	vm_offset_t va_next;
 	pd_entry_t *l1, *l2;
 	pt_entry_t *l3p, l3;
 	pt_entry_t entry;
@@ -1986,7 +1992,6 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t
 		if (va_next > eva)
 			va_next = eva;
 
-		va = va_next;
 		for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
 		    sva += L3_SIZE) {
 			l3 = pmap_load(l3p);
@@ -1994,7 +1999,6 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t
 				entry = pmap_load(l3p);
 				entry &= ~(PTE_W);
 				pmap_load_store(l3p, entry);
-				PTE_SYNC(l3p);
 				/* XXX: Use pmap_invalidate_range */
 				pmap_invalidate_page(pmap, sva);
 			}
@@ -2092,8 +2096,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v
 				entry |= (l2_pn << PTE_PPN0_S);
 				pmap_load_store(l1, entry);
 				pmap_distribute_l1(pmap, pmap_l1_index(va), entry);
-				PTE_SYNC(l1);
-
 				l2 = pmap_l1_to_l2(l1, va);
 			}
 
@@ -2112,7 +2114,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v
 			entry = (PTE_V);
 			entry |= (l3_pn << PTE_PPN0_S);
 			pmap_load_store(l2, entry);
-			PTE_SYNC(l2);
 			l3 = pmap_l2_to_l3(l2, va);
 		}
 		pmap_invalidate_page(pmap, va);
@@ -2163,10 +2164,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v
 			goto validate;
 		}
 
-		/* Flush the cache, there might be uncommitted data in it */
-		if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3))
-			cpu_dcache_wb_range(va, L3_SIZE);
-
 		/*
 		 * The physical page has changed.  Temporarily invalidate
 		 * the mapping.  This ensures that all threads sharing the
@@ -2225,13 +2222,20 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v
 			vm_page_aflag_set(m, PGA_WRITEABLE);
 	}
 
+validate:
 	/*
+	 * Sync the i-cache on all harts before updating the PTE
+	 * if the new PTE is executable.
+	 */
+	if (prot & VM_PROT_EXECUTE)
+		pmap_sync_icache(pmap, va, PAGE_SIZE);
+
+	/*
 	 * Update the L3 entry.
 	 */
 	if (orig_l3 != 0) {
-validate:
 		orig_l3 = pmap_load_store(l3, new_l3);
-		PTE_SYNC(l3);
+		pmap_invalidate_page(pmap, va);
 		KASSERT(PTE_TO_PHYS(orig_l3) == pa,
 		    ("pmap_enter: invalid update"));
 		if (pmap_page_dirty(orig_l3) &&
@@ -2239,11 +2243,7 @@ validate:
 			vm_page_dirty(m);
 	} else {
 		pmap_load_store(l3, new_l3);
-		PTE_SYNC(l3);
 	}
-	pmap_invalidate_page(pmap, va);
-	if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
-	    cpu_icache_sync_range(va, PAGE_SIZE);
 
 	if (lock != NULL)
 		rw_wunlock(lock);
@@ -2423,9 +2423,16 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, v
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		entry |= PTE_SW_MANAGED;
+
+	/*
+	 * Sync the i-cache on all harts before updating the PTE
+	 * if the new PTE is executable.
+	 */
+	if (prot & VM_PROT_EXECUTE)
+		pmap_sync_icache(pmap, va, PAGE_SIZE);
+
 	pmap_load_store(l3, entry);
 
-	PTE_SYNC(l3);
 	pmap_invalidate_page(pmap, va);
 	return (mpte);
 }
@@ -2766,11 +2773,7 @@ pmap_remove_pages(pmap_t pmap)
 				    ("pmap_remove_pages: bad l3 %#jx",
 				    (uintmax_t)tl3));
 
-				if (pmap_is_current(pmap) &&
-				    pmap_l3_valid_cacheable(pmap_load(l3)))
-					cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
 				pmap_load_clear(l3);
-				PTE_SYNC(l3);
 				pmap_invalidate_page(pmap, pv->pv_va);
 
 				/*
@@ -3244,16 +3247,10 @@ pmap_activate(struct thread *td)
 	critical_exit();
 }
 
-static void
-pmap_sync_icache_one(void *arg __unused)
-{
-
-	__asm __volatile("fence.i");
-}
-
 void
 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
 {
+	cpuset_t mask;
 
 	/*
 	 * From the RISC-V User-Level ISA V2.2:
@@ -3263,8 +3260,12 @@ pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t 
 	 * before requesting that all remote RISC-V harts execute a
 	 * FENCE.I."
 	 */
-	__asm __volatile("fence");
-	smp_rendezvous(NULL, pmap_sync_icache_one, NULL, NULL);
+	sched_pin();
+	mask = all_cpus;
+	CPU_CLR(PCPU_GET(cpuid), &mask);
+	fence();
+	sbi_remote_fence_i(mask.__bits);
+	sched_unpin();
 }
 
 /*



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201810151856.w9FIusEQ029384>