From owner-svn-src-stable@FreeBSD.ORG Thu Sep 3 13:54:58 2009 Return-Path: Delivered-To: svn-src-stable@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id A8E311065672; Thu, 3 Sep 2009 13:54:58 +0000 (UTC) (envelope-from jhb@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 9508C8FC2E; Thu, 3 Sep 2009 13:54:58 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n83Dswum034430; Thu, 3 Sep 2009 13:54:58 GMT (envelope-from jhb@svn.freebsd.org) Received: (from jhb@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n83DswuL034426; Thu, 3 Sep 2009 13:54:58 GMT (envelope-from jhb@svn.freebsd.org) Message-Id: <200909031354.n83DswuL034426@svn.freebsd.org> From: John Baldwin Date: Thu, 3 Sep 2009 13:54:58 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org X-SVN-Group: stable-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r196780 - in stable/8/sys: . amd64/amd64 amd64/include/xen cddl/contrib/opensolaris contrib/dev/acpica contrib/pf dev/xen/xenpci i386/i386 i386/include X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 03 Sep 2009 13:54:58 -0000 Author: jhb Date: Thu Sep 3 13:54:58 2009 New Revision: 196780 URL: http://svn.freebsd.org/changeset/base/196780 Log: MFC 196705 and 196707: - Improve pmap_change_attr() on i386 so that it is able to demote a large (2/4MB) page into 4KB pages as needed. This should be fairly rare in practice. - Simplify pmap_change_attr() a bit: - Always calculate the cache bits instead of doing it on-demand. - Always set changed to TRUE rather than only doing it if it is false. Approved by: re (kib) Modified: stable/8/sys/ (props changed) stable/8/sys/amd64/amd64/pmap.c stable/8/sys/amd64/include/xen/ (props changed) stable/8/sys/cddl/contrib/opensolaris/ (props changed) stable/8/sys/contrib/dev/acpica/ (props changed) stable/8/sys/contrib/pf/ (props changed) stable/8/sys/dev/xen/xenpci/ (props changed) stable/8/sys/i386/i386/pmap.c stable/8/sys/i386/include/pmap.h Modified: stable/8/sys/amd64/amd64/pmap.c ============================================================================== --- stable/8/sys/amd64/amd64/pmap.c Thu Sep 3 13:40:41 2009 (r196779) +++ stable/8/sys/amd64/amd64/pmap.c Thu Sep 3 13:54:58 2009 (r196780) @@ -4476,7 +4476,8 @@ pmap_change_attr_locked(vm_offset_t va, if (base < DMAP_MIN_ADDRESS) return (EINVAL); - cache_bits_pde = cache_bits_pte = -1; + cache_bits_pde = pmap_cache_bits(mode, 1); + cache_bits_pte = pmap_cache_bits(mode, 0); changed = FALSE; /* @@ -4493,8 +4494,6 @@ pmap_change_attr_locked(vm_offset_t va, * memory type, then we need not demote this page. Just * increment tmpva to the next 1GB page frame. */ - if (cache_bits_pde < 0) - cache_bits_pde = pmap_cache_bits(mode, 1); if ((*pdpe & PG_PDE_CACHE) == cache_bits_pde) { tmpva = trunc_1gpage(tmpva) + NBPDP; continue; @@ -4522,8 +4521,6 @@ pmap_change_attr_locked(vm_offset_t va, * memory type, then we need not demote this page. Just * increment tmpva to the next 2MB page frame. */ - if (cache_bits_pde < 0) - cache_bits_pde = pmap_cache_bits(mode, 1); if ((*pde & PG_PDE_CACHE) == cache_bits_pde) { tmpva = trunc_2mpage(tmpva) + NBPDR; continue; @@ -4557,12 +4554,9 @@ pmap_change_attr_locked(vm_offset_t va, for (tmpva = base; tmpva < base + size; ) { pdpe = pmap_pdpe(kernel_pmap, tmpva); if (*pdpe & PG_PS) { - if (cache_bits_pde < 0) - cache_bits_pde = pmap_cache_bits(mode, 1); if ((*pdpe & PG_PDE_CACHE) != cache_bits_pde) { pmap_pde_attr(pdpe, cache_bits_pde); - if (!changed) - changed = TRUE; + changed = TRUE; } if (tmpva >= VM_MIN_KERNEL_ADDRESS) { if (pa_start == pa_end) { @@ -4588,12 +4582,9 @@ pmap_change_attr_locked(vm_offset_t va, } pde = pmap_pdpe_to_pde(pdpe, tmpva); if (*pde & PG_PS) { - if (cache_bits_pde < 0) - cache_bits_pde = pmap_cache_bits(mode, 1); if ((*pde & PG_PDE_CACHE) != cache_bits_pde) { pmap_pde_attr(pde, cache_bits_pde); - if (!changed) - changed = TRUE; + changed = TRUE; } if (tmpva >= VM_MIN_KERNEL_ADDRESS) { if (pa_start == pa_end) { @@ -4616,13 +4607,10 @@ pmap_change_attr_locked(vm_offset_t va, } tmpva = trunc_2mpage(tmpva) + NBPDR; } else { - if (cache_bits_pte < 0) - cache_bits_pte = pmap_cache_bits(mode, 0); pte = pmap_pde_to_pte(pde, tmpva); if ((*pte & PG_PTE_CACHE) != cache_bits_pte) { pmap_pte_attr(pte, cache_bits_pte); - if (!changed) - changed = TRUE; + changed = TRUE; } if (tmpva >= VM_MIN_KERNEL_ADDRESS) { if (pa_start == pa_end) { Modified: stable/8/sys/i386/i386/pmap.c ============================================================================== --- stable/8/sys/i386/i386/pmap.c Thu Sep 3 13:40:41 2009 (r196779) +++ stable/8/sys/i386/i386/pmap.c Thu Sep 3 13:54:58 2009 (r196780) @@ -288,12 +288,15 @@ static boolean_t pmap_enter_pde(pmap_t p static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); +static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static boolean_t pmap_is_modified_pvh(struct md_page *pvh); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); +static void pmap_pde_attr(pd_entry_t *pde, int cache_bits); static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); +static void pmap_pte_attr(pt_entry_t *pte, int cache_bits); static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, vm_page_t *free); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, @@ -2289,32 +2292,62 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offse } /* - * Tries to demote a 2- or 4MB page mapping. + * Fills a page table page with mappings to consecutive physical pages. + */ +static void +pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte) +{ + pt_entry_t *pte; + + for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { + *pte = newpte; + newpte += PAGE_SIZE; + } +} + +/* + * Tries to demote a 2- or 4MB page mapping. If demotion fails, the + * 2- or 4MB page mapping is invalidated. */ static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde, oldpde; pmap_t allpmaps_entry; - pt_entry_t *firstpte, newpte, *pte; + pt_entry_t *firstpte, newpte; vm_paddr_t mptepa; vm_page_t free, mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); + oldpde = *pde; + KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), + ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); mpte = pmap_lookup_pt_page(pmap, va); if (mpte != NULL) pmap_remove_pt_page(pmap, mpte); else { - KASSERT((*pde & PG_W) == 0, + KASSERT((oldpde & PG_W) == 0, ("pmap_demote_pde: page table page for a wired mapping" " is missing")); - free = NULL; - pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free); - pmap_invalidate_page(pmap, trunc_4mpage(va)); - pmap_free_zero_pages(free); - CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x" - " in pmap %p", va, pmap); - return (FALSE); + + /* + * Invalidate the 2- or 4MB page mapping and return + * "failure" if the mapping was never accessed or the + * allocation of the new page table page fails. + */ + if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, + va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | + VM_ALLOC_WIRED)) == NULL) { + free = NULL; + pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free); + pmap_invalidate_page(pmap, trunc_4mpage(va)); + pmap_free_zero_pages(free); + CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x" + " in pmap %p", va, pmap); + return (FALSE); + } + if (va < VM_MAXUSER_ADDRESS) + pmap->pm_stats.resident_count++; } mptepa = VM_PAGE_TO_PHYS(mpte); @@ -2348,30 +2381,32 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t } firstpte = PADDR2; } - oldpde = *pde; newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; - KASSERT((oldpde & (PG_A | PG_V)) == (PG_A | PG_V), - ("pmap_demote_pde: oldpde is missing PG_A and/or PG_V")); + KASSERT((oldpde & PG_A) != 0, + ("pmap_demote_pde: oldpde is missing PG_A")); KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, ("pmap_demote_pde: oldpde is missing PG_M")); - KASSERT((oldpde & PG_PS) != 0, - ("pmap_demote_pde: oldpde is missing PG_PS")); newpte = oldpde & ~PG_PS; if ((newpte & PG_PDE_PAT) != 0) newpte ^= PG_PDE_PAT | PG_PTE_PAT; /* - * If the mapping has changed attributes, update the page table - * entries. - */ + * If the page table page is new, initialize it. + */ + if (mpte->wire_count == 1) { + mpte->wire_count = NPTEPG; + pmap_fill_ptp(firstpte, newpte); + } KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), ("pmap_demote_pde: firstpte and newpte map different physical" " addresses")); + + /* + * If the mapping has changed attributes, update the page table + * entries. + */ if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) - for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { - *pte = newpte; - newpte += PAGE_SIZE; - } + pmap_fill_ptp(firstpte, newpte); /* * Demote the mapping. This pmap is locked. The old PDE has @@ -4426,6 +4461,40 @@ pmap_clear_reference(vm_page_t m) * Miscellaneous support routines follow */ +/* Adjust the cache mode for a 4KB page mapped via a PTE. */ +static __inline void +pmap_pte_attr(pt_entry_t *pte, int cache_bits) +{ + u_int opte, npte; + + /* + * The cache mode bits are all in the low 32-bits of the + * PTE, so we can just spin on updating the low 32-bits. + */ + do { + opte = *(u_int *)pte; + npte = opte & ~PG_PTE_CACHE; + npte |= cache_bits; + } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte)); +} + +/* Adjust the cache mode for a 2/4MB page mapped via a PDE. */ +static __inline void +pmap_pde_attr(pd_entry_t *pde, int cache_bits) +{ + u_int opde, npde; + + /* + * The cache mode bits are all in the low 32-bits of the + * PDE, so we can just spin on updating the low 32-bits. + */ + do { + opde = *(u_int *)pde; + npde = opde & ~PG_PDE_CACHE; + npde |= cache_bits; + } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde)); +} + /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This @@ -4537,13 +4606,23 @@ pmap_page_set_memattr(vm_page_t m, vm_me } } +/* + * Changes the specified virtual address range's memory type to that given by + * the parameter "mode". The specified virtual address range must be + * completely contained within either the kernel map. + * + * Returns zero if the change completed successfully, and either EINVAL or + * ENOMEM if the change failed. Specifically, EINVAL is returned if some part + * of the virtual address range was not mapped, and ENOMEM is returned if + * there was insufficient memory available to complete the change. + */ int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) { vm_offset_t base, offset, tmpva; - pt_entry_t *pte; - u_int opte, npte; pd_entry_t *pde; + pt_entry_t *pte; + int cache_bits_pte, cache_bits_pde; boolean_t changed; base = trunc_page(va); @@ -4556,47 +4635,84 @@ pmap_change_attr(vm_offset_t va, vm_size if (base < VM_MIN_KERNEL_ADDRESS) return (EINVAL); - /* 4MB pages and pages that aren't mapped aren't supported. */ - for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { + cache_bits_pde = pmap_cache_bits(mode, 1); + cache_bits_pte = pmap_cache_bits(mode, 0); + changed = FALSE; + + /* + * Pages that aren't mapped aren't supported. Also break down + * 2/4MB pages into 4KB pages if required. + */ + PMAP_LOCK(kernel_pmap); + for (tmpva = base; tmpva < base + size; ) { pde = pmap_pde(kernel_pmap, tmpva); - if (*pde & PG_PS) - return (EINVAL); - if (*pde == 0) + if (*pde == 0) { + PMAP_UNLOCK(kernel_pmap); return (EINVAL); + } + if (*pde & PG_PS) { + /* + * If the current 2/4MB page already has + * the required memory type, then we need not + * demote this page. Just increment tmpva to + * the next 2/4MB page frame. + */ + if ((*pde & PG_PDE_CACHE) == cache_bits_pde) { + tmpva = trunc_4mpage(tmpva) + NBPDR; + continue; + } + + /* + * If the current offset aligns with a 2/4MB + * page frame and there is at least 2/4MB left + * within the range, then we need not break + * down this page into 4KB pages. + */ + if ((tmpva & PDRMASK) == 0 && + tmpva + PDRMASK < base + size) { + tmpva += NBPDR; + continue; + } + if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) { + PMAP_UNLOCK(kernel_pmap); + return (ENOMEM); + } + } pte = vtopte(tmpva); - if (*pte == 0) + if (*pte == 0) { + PMAP_UNLOCK(kernel_pmap); return (EINVAL); + } + tmpva += PAGE_SIZE; } - - changed = FALSE; + PMAP_UNLOCK(kernel_pmap); /* - * Ok, all the pages exist and are 4k, so run through them updating - * their cache mode. + * Ok, all the pages exist, so run through them updating their + * cache mode if required. */ - for (tmpva = base; size > 0; ) { - pte = vtopte(tmpva); - - /* - * The cache mode bits are all in the low 32-bits of the - * PTE, so we can just spin on updating the low 32-bits. - */ - do { - opte = *(u_int *)pte; - npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT); - npte |= pmap_cache_bits(mode, 0); - } while (npte != opte && - !atomic_cmpset_int((u_int *)pte, opte, npte)); - if (npte != opte) - changed = TRUE; - tmpva += PAGE_SIZE; - size -= PAGE_SIZE; + for (tmpva = base; tmpva < base + size; ) { + pde = pmap_pde(kernel_pmap, tmpva); + if (*pde & PG_PS) { + if ((*pde & PG_PDE_CACHE) != cache_bits_pde) { + pmap_pde_attr(pde, cache_bits_pde); + changed = TRUE; + } + tmpva = trunc_4mpage(tmpva) + NBPDR; + } else { + pte = vtopte(tmpva); + if ((*pte & PG_PTE_CACHE) != cache_bits_pte) { + pmap_pte_attr(pte, cache_bits_pte); + changed = TRUE; + } + tmpva += PAGE_SIZE; + } } /* - * Flush CPU caches to make sure any data isn't cached that shouldn't - * be, etc. - */ + * Flush CPU caches to make sure any data isn't cached that + * shouldn't be, etc. + */ if (changed) { pmap_invalidate_range(kernel_pmap, base, tmpva); pmap_invalidate_cache_range(base, tmpva); Modified: stable/8/sys/i386/include/pmap.h ============================================================================== --- stable/8/sys/i386/include/pmap.h Thu Sep 3 13:40:41 2009 (r196779) +++ stable/8/sys/i386/include/pmap.h Thu Sep 3 13:54:58 2009 (r196780) @@ -81,6 +81,10 @@ #define PG_PROT (PG_RW|PG_U) /* all protection bits . */ #define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */ +/* Page level cache control fields used to determine the PAT type */ +#define PG_PDE_CACHE (PG_PDE_PAT | PG_NC_PWT | PG_NC_PCD) +#define PG_PTE_CACHE (PG_PTE_PAT | PG_NC_PWT | PG_NC_PCD) + /* * Promotion to a 2 or 4MB (PDE) page mapping requires that the corresponding * 4KB (PTE) page mappings have identical settings for the following fields: