Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 3 Sep 2009 13:56:19 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
Subject:   svn commit: r196781 - in stable/7/sys: . amd64/amd64 amd64/include contrib/pf i386/i386 i386/include vm
Message-ID:  <200909031356.n83DuJ7P034506@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Thu Sep  3 13:56:18 2009
New Revision: 196781
URL: http://svn.freebsd.org/changeset/base/196781

Log:
  MFC 180430, 180483, 180485, 180601, 180845, 180871-180873, 181043, 181077,
  181151, 181284, 181356, 181456, 189454, 194209, 195416, 195836, 196318,
  196705, and 196707:
  Various fixes and enhancements to the amd64 and i386 pmaps to support PAT.
  - Extend pmap_demote_pde() to include the ability to instantiate a new page
    table page where none existed before.
  - Enhance pmap_change_attr() to fully support large (2/4MB) pages by
    breaking demoting them to 4KB page mappings when needed.
  - Enhance pmap_change_attr() to avoid page demotions, cache mode changes,
    and cache and TLB invalidation when some or all of the specified range
    is already mapped with the specified cache mode.
  - Enhance pmap_change_attr() to adjust the direct map automatically when
    changing the cache mode of a kernel virtual address range.
  - Fix pmap_object_init_pt() to not assume that the pages of a OBJT_DEVICE
    object are always physically contiguous.
  - Correct a critical accounting error in pmap_demote_pde().
  
  Reviewed by:	alc

Modified:
  stable/7/sys/   (props changed)
  stable/7/sys/amd64/amd64/pmap.c
  stable/7/sys/amd64/include/pmap.h
  stable/7/sys/contrib/pf/   (props changed)
  stable/7/sys/i386/i386/pmap.c
  stable/7/sys/i386/include/pmap.h
  stable/7/sys/vm/vm_object.c
  stable/7/sys/vm/vm_object.h

Modified: stable/7/sys/amd64/amd64/pmap.c
==============================================================================
--- stable/7/sys/amd64/amd64/pmap.c	Thu Sep  3 13:54:58 2009	(r196780)
+++ stable/7/sys/amd64/amd64/pmap.c	Thu Sep  3 13:56:18 2009	(r196781)
@@ -220,17 +220,22 @@ static void	pmap_pvh_free(struct md_page
 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
 		    vm_offset_t va);
 
+static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode);
 static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
 static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
+static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
 static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
 static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
+static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
+static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
     vm_prot_t prot);
+static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
 static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
 		vm_page_t *free);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq,
@@ -461,11 +466,12 @@ create_pagetables(vm_paddr_t *firstaddr)
 		((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U;
 	}
 
-
 	/* Now set up the direct map space using 2MB pages */
+	/* Preset PG_M and PG_A because demotion expects it */
 	for (i = 0; i < NPDEPG * ndmpdp; i++) {
 		((pd_entry_t *)DMPDphys)[i] = (vm_paddr_t)i << PDRSHIFT;
-		((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G;
+		((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G |
+		    PG_M | PG_A;
 	}
 
 	/* And the direct map space's PDP */
@@ -1068,7 +1074,7 @@ pmap_kenter(vm_offset_t va, vm_paddr_t p
 	pte_store(pte, pa | PG_RW | PG_V | PG_G);
 }
 
-PMAP_INLINE void 
+static __inline void
 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
 {
 	pt_entry_t *pte;
@@ -2192,58 +2198,98 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offse
 }
 
 /*
- * Tries to demote a 2MB page mapping.
+ * Fills a page table page with mappings to consecutive physical pages.
+ */
+static void
+pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte)
+{
+	pt_entry_t *pte;
+
+	for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
+		*pte = newpte;
+		newpte += PAGE_SIZE;
+	}
+}
+
+/*
+ * Tries to demote a 2MB page mapping.  If demotion fails, the 2MB page
+ * mapping is invalidated.
  */
 static boolean_t
 pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	pd_entry_t newpde, oldpde;
-	pt_entry_t *firstpte, newpte, *pte;
+	pt_entry_t *firstpte, newpte;
 	vm_paddr_t mptepa;
 	vm_page_t free, mpte;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	oldpde = *pde;
+	KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
+	    ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
 	mpte = pmap_lookup_pt_page(pmap, va);
 	if (mpte != NULL)
 		pmap_remove_pt_page(pmap, mpte);
 	else {
-		KASSERT((*pde & PG_W) == 0,
+		KASSERT((oldpde & PG_W) == 0,
 		    ("pmap_demote_pde: page table page for a wired mapping"
 		    " is missing"));
-		free = NULL;
-		pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free);
-		pmap_invalidate_page(pmap, trunc_2mpage(va));
-		pmap_free_zero_pages(free);
-		CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx"
-		    " in pmap %p", va, pmap);
-		return (FALSE);
+
+		/*
+		 * Invalidate the 2MB page mapping and return "failure" if the
+		 * mapping was never accessed or the allocation of the new
+		 * page table page fails.  If the 2MB page mapping belongs to
+		 * the direct map region of the kernel's address space, then
+		 * the page allocation request specifies the highest possible
+		 * priority (VM_ALLOC_INTERRUPT).  Otherwise, the priority is
+		 * normal.  Page table pages are preallocated for every other
+		 * part of the kernel address space, so the direct map region
+		 * is the only part of the kernel address space that must be
+		 * handled here.
+		 */
+		if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
+		    pmap_pde_pindex(va), (va >= DMAP_MIN_ADDRESS && va <
+		    DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) |
+		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
+			free = NULL;
+			pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free);
+			pmap_invalidate_page(pmap, trunc_2mpage(va));
+			pmap_free_zero_pages(free);
+			CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx"
+			    " in pmap %p", va, pmap);
+			return (FALSE);
+		}
+		if (va < VM_MAXUSER_ADDRESS)
+			pmap->pm_stats.resident_count++;
 	}
 	mptepa = VM_PAGE_TO_PHYS(mpte);
 	firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa);
-	oldpde = *pde;
 	newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
-	KASSERT((oldpde & (PG_A | PG_V)) == (PG_A | PG_V),
-	    ("pmap_demote_pde: oldpde is missing PG_A and/or PG_V"));
+	KASSERT((oldpde & PG_A) != 0,
+	    ("pmap_demote_pde: oldpde is missing PG_A"));
 	KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW,
 	    ("pmap_demote_pde: oldpde is missing PG_M"));
-	KASSERT((oldpde & PG_PS) != 0,
-	    ("pmap_demote_pde: oldpde is missing PG_PS"));
 	newpte = oldpde & ~PG_PS;
 	if ((newpte & PG_PDE_PAT) != 0)
 		newpte ^= PG_PDE_PAT | PG_PTE_PAT;
 
 	/*
-	 * If the mapping has changed attributes, update the page table
-	 * entries.
+	 * If the page table page is new, initialize it.
 	 */
+	if (mpte->wire_count == 1) {
+		mpte->wire_count = NPTEPG;
+		pmap_fill_ptp(firstpte, newpte);
+	}
 	KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
 	    ("pmap_demote_pde: firstpte and newpte map different physical"
 	    " addresses"));
+
+	/*
+	 * If the mapping has changed attributes, update the page table
+	 * entries.
+	 */
 	if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
-		for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
-			*pte = newpte;
-			newpte += PAGE_SIZE;
-		}
+		pmap_fill_ptp(firstpte, newpte);
 
 	/*
 	 * Demote the mapping.  This pmap is locked.  The old PDE has
@@ -3310,78 +3356,74 @@ void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
-	vm_offset_t va;
+	pd_entry_t *pde;
+	vm_paddr_t pa, ptepa;
 	vm_page_t p, pdpg;
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	KASSERT(object->type == OBJT_DEVICE,
 	    ("pmap_object_init_pt: non-device object"));
-	if (((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
-		vm_page_t m[1];
-		pd_entry_t ptepa, *pde;
-
-		PMAP_LOCK(pmap);
-		pde = pmap_pde(pmap, addr);
-		if (pde != 0 && (*pde & PG_V) != 0)
-			goto out;
-		PMAP_UNLOCK(pmap);
-retry:
+	if ((addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
+		if (!vm_object_populate(object, pindex, pindex + atop(size)))
+			return;
 		p = vm_page_lookup(object, pindex);
-		if (p != NULL) {
-			if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
-				goto retry;
-		} else {
-			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
-			if (p == NULL)
-				return;
-			m[0] = p;
-
-			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
-				vm_page_lock_queues();
-				vm_page_free(p);
-				vm_page_unlock_queues();
-				return;
-			}
-
-			p = vm_page_lookup(object, pindex);
-			vm_page_wakeup(p);
-		}
+		KASSERT(p->valid == VM_PAGE_BITS_ALL,
+		    ("pmap_object_init_pt: invalid page %p", p));
 
+		/*
+		 * Abort the mapping if the first page is not physically
+		 * aligned to a 2MB page boundary.
+		 */
 		ptepa = VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1))
 			return;
 
-		p->valid = VM_PAGE_BITS_ALL;
+		/*
+		 * Skip the first page.  Abort the mapping if the rest of
+		 * the pages are not physically contiguous.
+		 */
+		p = TAILQ_NEXT(p, listq);
+		for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
+		    pa += PAGE_SIZE) {
+			KASSERT(p->valid == VM_PAGE_BITS_ALL,
+			    ("pmap_object_init_pt: invalid page %p", p));
+			if (pa != VM_PAGE_TO_PHYS(p))
+				return;
+			p = TAILQ_NEXT(p, listq);
+		}
 
+		/* Map using 2MB pages. */
 		PMAP_LOCK(pmap);
-		for (va = addr; va < addr + size; va += NBPDR) {
-			while ((pdpg =
-			    pmap_allocpde(pmap, va, M_NOWAIT)) == NULL) {
-				PMAP_UNLOCK(pmap);
-				vm_page_busy(p);
-				VM_OBJECT_UNLOCK(object);
-				VM_WAIT;
-				VM_OBJECT_LOCK(object);
-				vm_page_wakeup(p);
-				PMAP_LOCK(pmap);
+		for (pa = ptepa; pa < ptepa + size; pa += NBPDR) {
+			pdpg = pmap_allocpde(pmap, addr, M_NOWAIT);
+			if (pdpg == NULL) {
+				/*
+				 * The creation of mappings below is only an
+				 * optimization.  If a page directory page
+				 * cannot be allocated without blocking,
+				 * continue on to the next mapping rather than
+				 * blocking.
+				 */
+				addr += NBPDR;
+				continue;
 			}
 			pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg));
-			pde = &pde[pmap_pde_index(va)];
+			pde = &pde[pmap_pde_index(addr)];
 			if ((*pde & PG_V) == 0) {
-				pde_store(pde, ptepa | PG_PS | PG_M | PG_A |
+				pde_store(pde, pa | PG_PS | PG_M | PG_A |
 				    PG_U | PG_RW | PG_V);
-				pmap->pm_stats.resident_count +=
-				    NBPDR / PAGE_SIZE;
+				pmap->pm_stats.resident_count += NBPDR /
+				    PAGE_SIZE;
+				pmap_pde_mappings++;
 			} else {
+				/* Continue on if the PDE is already valid. */
 				pdpg->wire_count--;
 				KASSERT(pdpg->wire_count > 0,
 				    ("pmap_object_init_pt: missing reference "
-				     "to page directory page, va: 0x%lx", va));
+				    "to page directory page, va: 0x%lx", addr));
 			}
-			ptepa += NBPDR;
+			addr += NBPDR;
 		}
-		pmap_invalidate_all(pmap);
-out:
 		PMAP_UNLOCK(pmap);
 	}
 }
@@ -3739,7 +3781,9 @@ pmap_remove_pages(pmap_t pmap)
 				if ((tpte & PG_PS) != 0)
 					pte = pde;
 				else {
-					pte = vtopte(pv->pv_va);
+					pte = (pt_entry_t *)PHYS_TO_DMAP(tpte &
+					    PG_FRAME);
+					pte = &pte[pmap_pte_index(pv->pv_va)];
 					tpte = *pte & ~PG_PTE_PAT;
 				}
 
@@ -4160,41 +4204,35 @@ pmap_clear_reference(vm_page_t m)
 
 /* Adjust the cache mode for a 4KB page mapped via a PTE. */
 static __inline void
-pmap_pte_attr(vm_offset_t va, int mode)
+pmap_pte_attr(pt_entry_t *pte, int cache_bits)
 {
-	pt_entry_t *pte;
 	u_int opte, npte;
 
-	pte = vtopte(va);
-
 	/*
 	 * The cache mode bits are all in the low 32-bits of the
 	 * PTE, so we can just spin on updating the low 32-bits.
 	 */
 	do {
 		opte = *(u_int *)pte;
-		npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
-		npte |= pmap_cache_bits(mode, 0);
+		npte = opte & ~PG_PTE_CACHE;
+		npte |= cache_bits;
 	} while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte));
 }
 
 /* Adjust the cache mode for a 2MB page mapped via a PDE. */
 static __inline void
-pmap_pde_attr(vm_offset_t va, int mode)
+pmap_pde_attr(pd_entry_t *pde, int cache_bits)
 {
-	pd_entry_t *pde;
 	u_int opde, npde;
 
-	pde = pmap_pde(kernel_pmap, va);
-
 	/*
 	 * The cache mode bits are all in the low 32-bits of the
 	 * PDE, so we can just spin on updating the low 32-bits.
 	 */
 	do {
 		opde = *(u_int *)pde;
-		npde = opde & ~(PG_PDE_PAT | PG_NC_PCD | PG_NC_PWT);
-		npde |= pmap_cache_bits(mode, 1);
+		npde = opde & ~PG_PDE_CACHE;
+		npde |= cache_bits;
 	} while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde));
 }
 
@@ -4210,11 +4248,14 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_
 	vm_offset_t va, tmpva, offset;
 
 	/*
-	 * If this fits within the direct map window and use WB caching
-	 * mode, use the direct map.
+	 * If the specified range of physical addresses fits within the direct
+	 * map window, use the direct map. 
 	 */
-	if (pa < dmaplimit && (pa + size) < dmaplimit && mode == PAT_WRITE_BACK)
-		return ((void *)PHYS_TO_DMAP(pa));
+	if (pa < dmaplimit && pa + size < dmaplimit) {
+		va = PHYS_TO_DMAP(pa);
+		if (!pmap_change_attr(va, size, mode))
+			return ((void *)va);
+	}
 	offset = pa & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 	va = kmem_alloc_nofault(kernel_map, size);
@@ -4263,68 +4304,174 @@ pmap_unmapdev(vm_offset_t va, vm_size_t 
 	kmem_free(kernel_map, base, size);
 }
 
+/*
+ * Changes the specified virtual address range's memory type to that given by
+ * the parameter "mode".  The specified virtual address range must be
+ * completely contained within either the direct map or the kernel map.  If
+ * the virtual address range is contained within the kernel map, then the
+ * memory type for each of the corresponding ranges of the direct map is also
+ * changed.  (The corresponding ranges of the direct map are those ranges that
+ * map the same physical pages as the specified virtual address range.)  These
+ * changes to the direct map are necessary because Intel describes the
+ * behavior of their processors as "undefined" if two or more mappings to the
+ * same physical page have different memory types.
+ *
+ * Returns zero if the change completed successfully, and either EINVAL or
+ * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
+ * of the virtual address range was not mapped, and ENOMEM is returned if
+ * there was insufficient memory available to complete the change.  In the
+ * latter case, the memory type may have been changed on some part of the
+ * virtual address range or the direct map.
+ */
 int
 pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
 {
+	int error;
+
+	PMAP_LOCK(kernel_pmap);
+	error = pmap_change_attr_locked(va, size, mode);
+	PMAP_UNLOCK(kernel_pmap);
+	return (error);
+}
+
+static int
+pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
+{
 	vm_offset_t base, offset, tmpva;
+	vm_paddr_t pa_start, pa_end;
 	pd_entry_t *pde;
 	pt_entry_t *pte;
+	int cache_bits_pte, cache_bits_pde, error;
+	boolean_t changed;
 
+	PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED);
 	base = trunc_page(va);
 	offset = va & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 
-	/* Only supported on kernel virtual addresses. */
-	if (base <= VM_MAXUSER_ADDRESS)
+	/*
+	 * Only supported on kernel virtual addresses, including the direct
+	 * map but excluding the recursive map.
+	 */
+	if (base < DMAP_MIN_ADDRESS)
 		return (EINVAL);
 
+	cache_bits_pde = pmap_cache_bits(mode, 1);
+	cache_bits_pte = pmap_cache_bits(mode, 0);
+	changed = FALSE;
+
 	/*
-	 * XXX: We have to support tearing 2MB pages down into 4k pages if
-	 * needed here.
+	 * Pages that aren't mapped aren't supported.  Also break down 2MB pages
+	 * into 4KB pages if required.
 	 */
-	/* Pages that aren't mapped aren't supported. */
-	for (tmpva = base; tmpva < (base + size); ) {
+	for (tmpva = base; tmpva < base + size; ) {
 		pde = pmap_pde(kernel_pmap, tmpva);
 		if (*pde == 0)
 			return (EINVAL);
 		if (*pde & PG_PS) {
-			/* Handle 2MB pages that are completely contained. */
-			if (size >= NBPDR) {
+			/*
+			 * If the current 2MB page already has the required
+			 * memory type, then we need not demote this page. Just
+			 * increment tmpva to the next 2MB page frame.
+			 */
+			if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
+				tmpva = trunc_2mpage(tmpva) + NBPDR;
+				continue;
+			}
+
+			/*
+			 * If the current offset aligns with a 2MB page frame
+			 * and there is at least 2MB left within the range, then
+			 * we need not break down this page into 4KB pages.
+			 */
+			if ((tmpva & PDRMASK) == 0 &&
+			    tmpva + PDRMASK < base + size) {
 				tmpva += NBPDR;
 				continue;
 			}
-			return (EINVAL);
+			if (!pmap_demote_pde(kernel_pmap, pde, tmpva))
+				return (ENOMEM);
 		}
-		pte = vtopte(va);
+		pte = pmap_pde_to_pte(pde, tmpva);
 		if (*pte == 0)
 			return (EINVAL);
 		tmpva += PAGE_SIZE;
 	}
+	error = 0;
 
 	/*
 	 * Ok, all the pages exist, so run through them updating their
-	 * cache mode.
+	 * cache mode if required.
 	 */
-	for (tmpva = base; size > 0; ) {
+	pa_start = pa_end = 0;
+	for (tmpva = base; tmpva < base + size; ) {
 		pde = pmap_pde(kernel_pmap, tmpva);
 		if (*pde & PG_PS) {
-			pmap_pde_attr(tmpva, mode);
-			tmpva += NBPDR;
-			size -= NBPDR;
+			if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
+				pmap_pde_attr(pde, cache_bits_pde);
+				changed = TRUE;
+			}
+			if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
+				if (pa_start == pa_end) {
+					/* Start physical address run. */
+					pa_start = *pde & PG_PS_FRAME;
+					pa_end = pa_start + NBPDR;
+				} else if (pa_end == (*pde & PG_PS_FRAME))
+					pa_end += NBPDR;
+				else {
+					/* Run ended, update direct map. */
+					error = pmap_change_attr_locked(
+					    PHYS_TO_DMAP(pa_start),
+					    pa_end - pa_start, mode);
+					if (error != 0)
+						break;
+					/* Start physical address run. */
+					pa_start = *pde & PG_PS_FRAME;
+					pa_end = pa_start + NBPDR;
+				}
+			}
+			tmpva = trunc_2mpage(tmpva) + NBPDR;
 		} else {
-			pmap_pte_attr(tmpva, mode);
+			pte = pmap_pde_to_pte(pde, tmpva);
+			if ((*pte & PG_PTE_CACHE) != cache_bits_pte) {
+				pmap_pte_attr(pte, cache_bits_pte);
+				changed = TRUE;
+			}
+			if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
+				if (pa_start == pa_end) {
+					/* Start physical address run. */
+					pa_start = *pte & PG_FRAME;
+					pa_end = pa_start + PAGE_SIZE;
+				} else if (pa_end == (*pte & PG_FRAME))
+					pa_end += PAGE_SIZE;
+				else {
+					/* Run ended, update direct map. */
+					error = pmap_change_attr_locked(
+					    PHYS_TO_DMAP(pa_start),
+					    pa_end - pa_start, mode);
+					if (error != 0)
+						break;
+					/* Start physical address run. */
+					pa_start = *pte & PG_FRAME;
+					pa_end = pa_start + PAGE_SIZE;
+				}
+			}
 			tmpva += PAGE_SIZE;
-			size -= PAGE_SIZE;
 		}
 	}
+	if (error == 0 && pa_start != pa_end)
+		error = pmap_change_attr_locked(PHYS_TO_DMAP(pa_start),
+		    pa_end - pa_start, mode);
 
 	/*
-	 * Flush CPU caches to make sure any data isn't cached that shouldn't
-	 * be, etc.
-	 */    
-	pmap_invalidate_range(kernel_pmap, base, tmpva);
-	pmap_invalidate_cache();
-	return (0);
+	 * Flush CPU caches if required to make sure any data isn't cached that
+	 * shouldn't be, etc.
+	 */
+	if (changed) {
+		pmap_invalidate_range(kernel_pmap, base, tmpva);
+		pmap_invalidate_cache();
+	}
+	return (error);
 }
 
 /*

Modified: stable/7/sys/amd64/include/pmap.h
==============================================================================
--- stable/7/sys/amd64/include/pmap.h	Thu Sep  3 13:54:58 2009	(r196780)
+++ stable/7/sys/amd64/include/pmap.h	Thu Sep  3 13:56:18 2009	(r196781)
@@ -75,6 +75,10 @@
 #define	PG_PROT		(PG_RW|PG_U)	/* all protection bits . */
 #define PG_N		(PG_NC_PWT|PG_NC_PCD)	/* Non-cacheable */
 
+/* Page level cache control fields used to determine the PAT type */
+#define PG_PDE_CACHE	(PG_PDE_PAT | PG_NC_PWT | PG_NC_PCD)
+#define PG_PTE_CACHE	(PG_PTE_PAT | PG_NC_PWT | PG_NC_PCD)
+
 /*
  * Promotion to a 2MB (PDE) page mapping requires that the corresponding 4KB
  * (PTE) page mappings have identical settings for the following fields:
@@ -308,7 +312,6 @@ void	pmap_bootstrap(vm_paddr_t *);
 int	pmap_change_attr(vm_offset_t, vm_size_t, int);
 void	pmap_init_pat(void);
 void	pmap_kenter(vm_offset_t va, vm_paddr_t pa);
-void	pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 void	*pmap_kenter_temporary(vm_paddr_t pa, int i);
 vm_paddr_t pmap_kextract(vm_offset_t);
 void	pmap_kremove(vm_offset_t);

Modified: stable/7/sys/i386/i386/pmap.c
==============================================================================
--- stable/7/sys/i386/i386/pmap.c	Thu Sep  3 13:54:58 2009	(r196780)
+++ stable/7/sys/i386/i386/pmap.c	Thu Sep  3 13:56:18 2009	(r196781)
@@ -284,11 +284,15 @@ static boolean_t pmap_enter_pde(pmap_t p
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
 static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
 static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
+static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
+static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
     vm_prot_t prot);
+static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
 static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
     vm_page_t *free);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
@@ -1132,7 +1136,7 @@ pmap_kenter(vm_offset_t va, vm_paddr_t p
 	pte_store(pte, pa | PG_RW | PG_V | pgeflag);
 }
 
-PMAP_INLINE void 
+static __inline void
 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
 {
 	pt_entry_t *pte;
@@ -2245,32 +2249,62 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offse
 }
 
 /*
- * Tries to demote a 2- or 4MB page mapping.
+ * Fills a page table page with mappings to consecutive physical pages.
+ */
+static void
+pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte)
+{
+	pt_entry_t *pte;
+
+	for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
+		*pte = newpte;	
+		newpte += PAGE_SIZE;
+	}
+}
+
+/*
+ * Tries to demote a 2- or 4MB page mapping.  If demotion fails, the
+ * 2- or 4MB page mapping is invalidated.
  */
 static boolean_t
 pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	pd_entry_t newpde, oldpde;
 	pmap_t allpmaps_entry;
-	pt_entry_t *firstpte, newpte, *pte;
+	pt_entry_t *firstpte, newpte;
 	vm_paddr_t mptepa;
 	vm_page_t free, mpte;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	oldpde = *pde;
+	KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
+	    ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
 	mpte = pmap_lookup_pt_page(pmap, va);
 	if (mpte != NULL)
 		pmap_remove_pt_page(pmap, mpte);
 	else {
-		KASSERT((*pde & PG_W) == 0,
+		KASSERT((oldpde & PG_W) == 0,
 		    ("pmap_demote_pde: page table page for a wired mapping"
 		    " is missing"));
-		free = NULL;
-		pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
-		pmap_invalidate_page(pmap, trunc_4mpage(va));
-		pmap_free_zero_pages(free);
-		CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
-		    " in pmap %p", va, pmap);
-		return (FALSE);
+
+		/*
+		 * Invalidate the 2- or 4MB page mapping and return
+		 * "failure" if the mapping was never accessed or the
+		 * allocation of the new page table page fails.
+		 */
+		if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
+		    va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL |
+		    VM_ALLOC_WIRED)) == NULL) {
+			free = NULL;
+			pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
+			pmap_invalidate_page(pmap, trunc_4mpage(va));
+			pmap_free_zero_pages(free);
+			CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
+			    " in pmap %p", va, pmap);
+			return (FALSE);
+		}
+		if (va < VM_MAXUSER_ADDRESS)
+			pmap->pm_stats.resident_count++;
 	}
 	mptepa = VM_PAGE_TO_PHYS(mpte);
 
@@ -2304,30 +2338,32 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t 
 		}
 		firstpte = PADDR2;
 	}
-	oldpde = *pde;
 	newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
-	KASSERT((oldpde & (PG_A | PG_V)) == (PG_A | PG_V),
-	    ("pmap_demote_pde: oldpde is missing PG_A and/or PG_V"));
+	KASSERT((oldpde & PG_A) != 0,
+	    ("pmap_demote_pde: oldpde is missing PG_A"));
 	KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW,
 	    ("pmap_demote_pde: oldpde is missing PG_M"));
-	KASSERT((oldpde & PG_PS) != 0,
-	    ("pmap_demote_pde: oldpde is missing PG_PS"));
 	newpte = oldpde & ~PG_PS;
 	if ((newpte & PG_PDE_PAT) != 0)
 		newpte ^= PG_PDE_PAT | PG_PTE_PAT;
 
 	/*
-	 * If the mapping has changed attributes, update the page table
-	 * entries.
-	 */ 
+	 * If the page table page is new, initialize it.
+	 */
+	if (mpte->wire_count == 1) {
+		mpte->wire_count = NPTEPG;
+		pmap_fill_ptp(firstpte, newpte);
+	}
 	KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
 	    ("pmap_demote_pde: firstpte and newpte map different physical"
 	    " addresses"));
+
+	/*
+	 * If the mapping has changed attributes, update the page table
+	 * entries.
+	 */ 
 	if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
-		for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
-			*pte = newpte;	
-			newpte += PAGE_SIZE;
-		}
+		pmap_fill_ptp(firstpte, newpte);
 	
 	/*
 	 * Demote the mapping.  This pmap is locked.  The old PDE has
@@ -3432,62 +3468,57 @@ void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
+	pd_entry_t *pde;
+	vm_paddr_t pa, ptepa;
 	vm_page_t p;
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	KASSERT(object->type == OBJT_DEVICE,
 	    ("pmap_object_init_pt: non-device object"));
 	if (pseflag && 
-	    ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
-		int i;
-		vm_page_t m[1];
-		unsigned int ptepindex;
-		int npdes;
-		pd_entry_t ptepa;
-
-		PMAP_LOCK(pmap);
-		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
-			goto out;
-		PMAP_UNLOCK(pmap);
-retry:
+	    (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
+		if (!vm_object_populate(object, pindex, pindex + atop(size)))
+			return;
 		p = vm_page_lookup(object, pindex);
-		if (p != NULL) {
-			if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
-				goto retry;
-		} else {
-			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
-			if (p == NULL)
-				return;
-			m[0] = p;
-
-			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
-				vm_page_lock_queues();
-				vm_page_free(p);
-				vm_page_unlock_queues();
-				return;
-			}
-
-			p = vm_page_lookup(object, pindex);
-			vm_page_wakeup(p);
-		}
+		KASSERT(p->valid == VM_PAGE_BITS_ALL,
+		    ("pmap_object_init_pt: invalid page %p", p));
 
+		/*
+		 * Abort the mapping if the first page is not physically
+		 * aligned to a 2/4MB page boundary.
+		 */
 		ptepa = VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1))
 			return;
 
-		p->valid = VM_PAGE_BITS_ALL;
+		/*
+		 * Skip the first page.  Abort the mapping if the rest of
+		 * the pages are not physically contiguous.
+		 */
+		p = TAILQ_NEXT(p, listq);
+		for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
+		    pa += PAGE_SIZE) {
+			KASSERT(p->valid == VM_PAGE_BITS_ALL,
+			    ("pmap_object_init_pt: invalid page %p", p));
+			if (pa != VM_PAGE_TO_PHYS(p))
+				return;
+			p = TAILQ_NEXT(p, listq);
+		}
 
+		/* Map using 2/4MB pages. */
 		PMAP_LOCK(pmap);
-		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
-		npdes = size >> PDRSHIFT;
-		for(i = 0; i < npdes; i++) {
-			pde_store(&pmap->pm_pdir[ptepindex],
-			    ptepa | PG_U | PG_RW | PG_V | PG_PS);
-			ptepa += NBPDR;
-			ptepindex += 1;
+		for (pa = ptepa; pa < ptepa + size; pa += NBPDR) {
+			pde = pmap_pde(pmap, addr);
+			if (*pde == 0) {
+				pde_store(pde, pa | PG_PS | PG_M | PG_A |
+				    PG_U | PG_RW | PG_V);
+				pmap->pm_stats.resident_count += NBPDR /
+				    PAGE_SIZE;
+				pmap_pde_mappings++;
+			}
+			/* Else continue on if the PDE is already valid. */
+			addr += NBPDR;
 		}
-		pmap_invalidate_all(pmap);
-out:
 		PMAP_UNLOCK(pmap);
 	}
 }
@@ -4326,6 +4357,40 @@ pmap_clear_reference(vm_page_t m)
  * Miscellaneous support routines follow
  */
 
+/* Adjust the cache mode for a 4KB page mapped via a PTE. */
+static __inline void
+pmap_pte_attr(pt_entry_t *pte, int cache_bits)
+{
+	u_int opte, npte;
+
+	/*
+	 * The cache mode bits are all in the low 32-bits of the
+	 * PTE, so we can just spin on updating the low 32-bits.
+	 */
+	do {
+		opte = *(u_int *)pte;
+		npte = opte & ~PG_PTE_CACHE;
+		npte |= cache_bits;
+	} while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte));
+}
+
+/* Adjust the cache mode for a 2/4MB page mapped via a PDE. */
+static __inline void
+pmap_pde_attr(pd_entry_t *pde, int cache_bits)
+{
+	u_int opde, npde;
+
+	/*
+	 * The cache mode bits are all in the low 32-bits of the
+	 * PDE, so we can just spin on updating the low 32-bits.
+	 */
+	do {
+		opde = *(u_int *)pde;
+		npde = opde & ~PG_PDE_CACHE;
+		npde |= cache_bits;
+	} while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde));
+}
+
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
@@ -4389,61 +4454,117 @@ pmap_unmapdev(vm_offset_t va, vm_size_t 
 	kmem_free(kernel_map, base, size);
 }
 
+/*
+ * Changes the specified virtual address range's memory type to that given by
+ * the parameter "mode".  The specified virtual address range must be
+ * completely contained within either the kernel map.
+ *
+ * Returns zero if the change completed successfully, and either EINVAL or
+ * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
+ * of the virtual address range was not mapped, and ENOMEM is returned if
+ * there was insufficient memory available to complete the change.
+ */
 int
 pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
 {
 	vm_offset_t base, offset, tmpva;
-	pt_entry_t *pte;
-	u_int opte, npte;
 	pd_entry_t *pde;
+	pt_entry_t *pte;
+	int cache_bits_pte, cache_bits_pde;
+	boolean_t changed;
 
 	base = trunc_page(va);
 	offset = va & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 
-	/* Only supported on kernel virtual addresses. */
-	if (base <= VM_MAXUSER_ADDRESS)
+	/*
+	 * Only supported on kernel virtual addresses above the recursive map.
+	 */
+	if (base < VM_MIN_KERNEL_ADDRESS)
 		return (EINVAL);
 
-	/* 4MB pages and pages that aren't mapped aren't supported. */
-	for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
+	cache_bits_pde = pmap_cache_bits(mode, 1);
+	cache_bits_pte = pmap_cache_bits(mode, 0);
+	changed = FALSE;
+
+	/*
+	 * Pages that aren't mapped aren't supported.  Also break down
+	 * 2/4MB pages into 4KB pages if required.
+	 */
+	PMAP_LOCK(kernel_pmap);
+	for (tmpva = base; tmpva < base + size; ) {
 		pde = pmap_pde(kernel_pmap, tmpva);
-		if (*pde & PG_PS)
-			return (EINVAL);
-		if (*pde == 0)
+		if (*pde == 0) {
+			PMAP_UNLOCK(kernel_pmap);
 			return (EINVAL);
-		pte = vtopte(va);
-		if (*pte == 0)
+		}
+		if (*pde & PG_PS) {
+			/*
+			 * If the current 2/4MB page already has
+			 * the required memory type, then we need not
+			 * demote this page.  Just increment tmpva to
+			 * the next 2/4MB page frame.
+			 */
+			if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
+				tmpva = trunc_4mpage(tmpva) + NBPDR;
+				continue;
+			}
+
+			/*
+			 * If the current offset aligns with a 2/4MB
+			 * page frame and there is at least 2/4MB left
+			 * within the range, then we need not break
+			 * down this page into 4KB pages.
+			 */
+			if ((tmpva & PDRMASK) == 0 &&
+			    tmpva + PDRMASK < base + size) {
+				tmpva += NBPDR;
+				continue;
+			}
+			if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) {
+				PMAP_UNLOCK(kernel_pmap);
+				return (ENOMEM);
+			}
+		}
+		pte = vtopte(tmpva);
+		if (*pte == 0) {
+			PMAP_UNLOCK(kernel_pmap);
 			return (EINVAL);
+		}
+		tmpva += PAGE_SIZE;
 	}
+	PMAP_UNLOCK(kernel_pmap);
 
 	/*
-	 * Ok, all the pages exist and are 4k, so run through them updating
-	 * their cache mode.
+	 * Ok, all the pages exist, so run through them updating their
+	 * cache mode if required.
 	 */
-	for (tmpva = base; size > 0; ) {
-		pte = vtopte(tmpva);
-
-		/*
-		 * The cache mode bits are all in the low 32-bits of the
-		 * PTE, so we can just spin on updating the low 32-bits.
-		 */
-		do {
-			opte = *(u_int *)pte;
-			npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
-			npte |= pmap_cache_bits(mode, 0);
-		} while (npte != opte &&
-		    !atomic_cmpset_int((u_int *)pte, opte, npte));
-		tmpva += PAGE_SIZE;
-		size -= PAGE_SIZE;
+	for (tmpva = base; tmpva < base + size; ) {
+		pde = pmap_pde(kernel_pmap, tmpva);
+		if (*pde & PG_PS) {
+			if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
+				pmap_pde_attr(pde, cache_bits_pde);
+				changed = TRUE;
+			}
+			tmpva = trunc_4mpage(tmpva) + NBPDR;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200909031356.n83DuJ7P034506>