Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 14 Jun 2021 20:25:26 GMT
From:      Mark Johnston <markj@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 25b73f214418 - stable/13 - riscv: Handle hardware-managed dirty bit updates in pmap_promote_l2()
Message-ID:  <202106142025.15EKPQnV073261@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=25b73f21441893423d00b6631ad22c7d0ad4b0d2

commit 25b73f21441893423d00b6631ad22c7d0ad4b0d2
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2021-06-06 20:41:35 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2021-06-14 20:25:15 +0000

    riscv: Handle hardware-managed dirty bit updates in pmap_promote_l2()
    
    pmap_promote_l2() failed to handle implementations which set the
    accessed and dirty flags.  In particular, when comparing the attributes
    of a run of 512 PTEs, we must handle the possibility that the hardware
    will set PTE_D on a clean, writable mapping.
    
    Following the example of amd64 and arm64, change riscv's
    pmap_promote_l2() to downgrade clean, writable mappings to read-only, so
    that updates are synchronized by the pmap lock.
    
    Fixes:          f6893f09d
    Reported by:    Nathaniel Filardo <nwf20@cl.cam.ac.uk>
    Tested by:      Nathaniel Filardo <nwf20@cl.cam.ac.uk>
    Reviewed by:    jrtc27, alc, Nathaniel Filardo
    Sponsored by:   The FreeBSD Foundation
    
    (cherry picked from commit c05748e028b84c216d0161e70418f8cb09e074e4)
---
 sys/riscv/riscv/pmap.c | 41 ++++++++++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index f30dda17afae..0f2834febd41 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -2540,7 +2540,7 @@ static void
 pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
     struct rwlock **lockp)
 {
-	pt_entry_t *firstl3, *l3;
+	pt_entry_t *firstl3, firstl3e, *l3, l3e;
 	vm_paddr_t pa;
 	vm_page_t ml3;
 
@@ -2551,7 +2551,8 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
 	    ("pmap_promote_l2: invalid l2 entry %p", l2));
 
 	firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2)));
-	pa = PTE_TO_PHYS(pmap_load(firstl3));
+	firstl3e = pmap_load(firstl3);
+	pa = PTE_TO_PHYS(firstl3e);
 	if ((pa & L2_OFFSET) != 0) {
 		CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p",
 		    va, pmap);
@@ -2559,17 +2560,40 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
 		return;
 	}
 
+	/*
+	 * Downgrade a clean, writable mapping to read-only to ensure that the
+	 * hardware does not set PTE_D while we are comparing PTEs.
+	 *
+	 * Upon a write access to a clean mapping, the implementation will
+	 * either atomically check protections and set PTE_D, or raise a page
+	 * fault.  In the latter case, the pmap lock provides atomicity.  Thus,
+	 * we do not issue an sfence.vma here and instead rely on pmap_fault()
+	 * to do so lazily.
+	 */
+	while ((firstl3e & (PTE_W | PTE_D)) == PTE_W) {
+		if (atomic_fcmpset_64(firstl3, &firstl3e, firstl3e & ~PTE_W)) {
+			firstl3e &= ~PTE_W;
+			break;
+		}
+	}
+
 	pa += PAGE_SIZE;
 	for (l3 = firstl3 + 1; l3 < firstl3 + Ln_ENTRIES; l3++) {
-		if (PTE_TO_PHYS(pmap_load(l3)) != pa) {
+		l3e = pmap_load(l3);
+		if (PTE_TO_PHYS(l3e) != pa) {
 			CTR2(KTR_PMAP,
 			    "pmap_promote_l2: failure for va %#lx pmap %p",
 			    va, pmap);
 			atomic_add_long(&pmap_l2_p_failures, 1);
 			return;
 		}
-		if ((pmap_load(l3) & PTE_PROMOTE) !=
-		    (pmap_load(firstl3) & PTE_PROMOTE)) {
+		while ((l3e & (PTE_W | PTE_D)) == PTE_W) {
+			if (atomic_fcmpset_64(l3, &l3e, l3e & ~PTE_W)) {
+				l3e &= ~PTE_W;
+				break;
+			}
+		}
+		if ((l3e & PTE_PROMOTE) != (firstl3e & PTE_PROMOTE)) {
 			CTR2(KTR_PMAP,
 			    "pmap_promote_l2: failure for va %#lx pmap %p",
 			    va, pmap);
@@ -2589,11 +2613,10 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
 		return;
 	}
 
-	if ((pmap_load(firstl3) & PTE_SW_MANAGED) != 0)
-		pmap_pv_promote_l2(pmap, va, PTE_TO_PHYS(pmap_load(firstl3)),
-		    lockp);
+	if ((firstl3e & PTE_SW_MANAGED) != 0)
+		pmap_pv_promote_l2(pmap, va, PTE_TO_PHYS(firstl3e), lockp);
 
-	pmap_store(l2, pmap_load(firstl3));
+	pmap_store(l2, firstl3e);
 
 	atomic_add_long(&pmap_l2_promotions, 1);
 	CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va,



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202106142025.15EKPQnV073261>