Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 2 May 2014 10:43:32 +0000 (UTC)
From:      "Cherry G. Mathew" <cherry@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r265223 - projects/amd64_xen_pv/sys/amd64/xen
Message-ID:  <201405021043.s42AhWp7089441@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: cherry
Date: Fri May  2 10:43:32 2014
New Revision: 265223
URL: http://svnweb.freebsd.org/changeset/base/265223

Log:
  This commit brings xen PV closer to native. Changes include:
   - Direct map support (using 4K pages)
   - Recursive page mapping (Read Only)
  
  This commit also brings in changes that were lost from
  r264590 and r264583 during MFC, namely:
   - mmu_map.[ch] final changes
   - desegragation of pmap_pv.c and pmap.c
  
  Note that 2M pages are still not supported, and max RAM support
  is still limited to what 4K pages can be used to map kva
  from the initial 512k xen mapped space. (Approx 64M)
  
  This commit should bring boot using native-ish pmap to a panic
  prompt.
  
  Approved by:	gibbs (implicit)

Modified:
  projects/amd64_xen_pv/sys/amd64/xen/machdep.c
  projects/amd64_xen_pv/sys/amd64/xen/mmu_map.c
  projects/amd64_xen_pv/sys/amd64/xen/mmu_map.h
  projects/amd64_xen_pv/sys/amd64/xen/pmap.c
  projects/amd64_xen_pv/sys/amd64/xen/pmap_pv.c

Modified: projects/amd64_xen_pv/sys/amd64/xen/machdep.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/machdep.c	Fri May  2 10:31:42 2014	(r265222)
+++ projects/amd64_xen_pv/sys/amd64/xen/machdep.c	Fri May  2 10:43:32 2014	(r265223)
@@ -408,6 +408,27 @@ xen_rootconf(void)
 
 SYSINIT(xen_rootconf, SI_SUB_ROOT_CONF, SI_ORDER_ANY, xen_rootconf, NULL);
 
+/* See: xen/interface/memory.h */
+/* XXX: TODO: export to vm/ later in the boot process */
+xen_pfn_t xen_m2m_list[NPDPEPG];
+
+static void init_xen_super(void)
+{
+	int rc;
+
+	struct xen_machphys_mfn_list mfn_list = {
+		.max_extents = NPDPEPG,
+		.nr_extents = 0
+	};
+	set_xen_guest_handle(mfn_list.extent_start, xen_m2m_list);
+
+	rc = HYPERVISOR_memory_op(XENMEM_machphys_mfn_list, &mfn_list);
+
+	if (rc < 0) panic("Hypercall for 2M frame list fails!");
+
+	if (!mfn_list.nr_extents) panic("No 2M frames available!");
+}
+
 /* 
  * Setup early kernel environment, based on start_info passed to us by
  * xen
@@ -425,12 +446,16 @@ initxen(struct start_info *si)
 
 	/* global variables */
 	xen_start_info = si;
+	HYPERVISOR_start_info = si;
 
 	/* xen variables */
 	xen_phys_machine = (xen_pfn_t *)si->mfn_list;
 
+	/* XXX: if hypervisor has FEATURE 2M SUPER pages) */
+	init_xen_super();
+
 	physmem = si->nr_pages;
-	Maxmem = si->nr_pages + 1;
+	Maxmem = si->nr_pages;
 	memset(phys_avail, 0, sizeof phys_avail);
 	memset(dump_avail, 0 , sizeof dump_avail);
 
@@ -517,6 +542,15 @@ initxen(struct start_info *si)
 		/* NOTREACHED */
 	}
 
+
+	/* Update the DMAP mapping, which is an alias. */
+	if (HYPERVISOR_update_va_mapping((vm_offset_t)PHYS_TO_DMAP(VTOP(gdt)), 
+		gdt0_frame | PG_U | PG_V, UVMF_INVLPG)) {
+		printk("HYPERVISOR_update_va_mapping() failed\n");
+		cpu_halt();
+		/* NOTREACHED */
+	}
+
 	if (HYPERVISOR_set_gdt((unsigned long *)&gdt0_frame_mfn, NGDT) != 0) {
 		printk("HYPERVISOR_set_gdt() failed\n");
 		cpu_halt();
@@ -639,7 +673,7 @@ initxen(struct start_info *si)
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0;
-	thread0.td_pcb->pcb_cr3 = xpmap_ptom(VTOP(KPML4phys));
+	thread0.td_pcb->pcb_cr3 = xpmap_ptom(KPML4phys);
 	thread0.td_frame = &proc0_tf;
 	thread0.td_pcb->pcb_gsbase = (uint64_t) pc;
 

Modified: projects/amd64_xen_pv/sys/amd64/xen/mmu_map.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/mmu_map.c	Fri May  2 10:31:42 2014	(r265222)
+++ projects/amd64_xen_pv/sys/amd64/xen/mmu_map.c	Fri May  2 10:43:32 2014	(r265223)
@@ -188,6 +188,126 @@ struct mmu_map_index {
 			  * Make optional on DEBUG */
 };
 
+/* Scan a page table for valid entries. Return true if any found */
+/* XXX: this is terribly slow - we need a queue/flush model to queue up frees and "flush" them all out together. */
+/* XXX; support huge pages */
+static bool
+pdpt_empty(pdp_entry_t *pdpt)
+{
+	int i;
+	
+	KASSERT(pdpt != NULL,
+		("Invalid pdpt\n"));
+
+	for (i = 0;i < NPDPEPG;i++) {
+		if (pdpt[i] & PG_V) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static bool
+pdt_empty(pd_entry_t *pdt)
+{
+	int i;
+
+	KASSERT(pdt != NULL,
+		("Invalid pdt\n"));
+
+	for (i = 0;i < NPDEPG;i++) {
+		if (pdt[i] & PG_V) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static bool
+pt_empty(pt_entry_t *pt)
+{
+	
+	int i;
+
+	KASSERT(pt != NULL,
+		("Invalid pt\n"));
+
+	for (i = 0;i < NPTEPG;i++) {
+		if (pt[i] & PG_V) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/* Free a table and update its entry in the hierarchy */
+static void
+mmu_map_pdpt_free(struct mmu_map_index *pti, uintptr_t va)
+{
+	vm_paddr_t pml4tep_ma;
+	pml4_entry_t *pml4tep;
+
+	KASSERT(pti->sanity == SANE, ("%s: pti insane!", __func__));
+
+	pml4tep = &pti->pml4t[pml4t_index(va)];
+
+	pml4tep_ma = xpmap_ptom(pti->ptmb.vtop((uintptr_t)pml4tep)
+);
+	xen_queue_pt_update(pml4tep_ma, 0);
+	xen_flush_queue();
+
+	/* The PDPT is empty. Free it and zero the
+	 * pointer 
+	 */
+	//if (pti->ptmb.free) pti->ptmb.free((uintptr_t)pti->pdpt);
+	pti->pdpt = NULL;
+}
+
+static void
+mmu_map_pdt_free(struct mmu_map_index *pti, uintptr_t va)
+{
+	vm_paddr_t pdptep_ma;
+	pdp_entry_t *pdptep;
+
+	KASSERT(pti->sanity == SANE, ("%s: pti insane!", __func__));
+
+	pdptep = &pti->pdpt[pdpt_index(va)];
+
+	pdptep_ma = xpmap_ptom(pti->ptmb.vtop((uintptr_t)pdptep));
+	xen_queue_pt_update(pdptep_ma, 0);
+	xen_flush_queue();
+
+	/* The PDT is empty. Free it and zero the
+	 * pointer 
+	 */
+	//	if (pti->ptmb.free) pti->ptmb.free((uintptr_t)pti->pdt);
+	pti->pdt = NULL;
+}
+
+static void
+mmu_map_pt_free(struct mmu_map_index *pti, uintptr_t va)
+{
+	vm_paddr_t pdtep_ma;
+	pd_entry_t *pdtep;
+
+	KASSERT(pti->sanity == SANE, ("%s: pti insane!", __func__));
+
+	pdtep = &pti->pdt[pdt_index(va)];
+
+	/* Zap the backing PDT entry */
+	pdtep_ma = xpmap_ptom(pti->ptmb.vtop((uintptr_t)pdtep));
+	xen_queue_pt_update(pdtep_ma, 0);
+	xen_flush_queue();
+
+	/* The PT is empty. Free it and zero the
+	 * pointer */
+	//if (pti->ptmb.free) pti->ptmb.free((uintptr_t)pti->pt);
+	pti->pt = NULL;
+}
+
 size_t
 mmu_map_t_size(void)
 {
@@ -222,8 +342,7 @@ void mmu_map_t_fini(void *addr)
 	pti->sanity = 0;
 
 	if (mb->free != NULL) {
-		/* XXX: go through PT hierarchy and free + unmap
-		 * unused tables */ 
+		/* XXX */
 	}
 }
 
@@ -312,10 +431,12 @@ mmu_map_inspect_va(struct pmap *pm, void
 
 	return true;
 }
-
+#include <sys/proc.h>
 bool
 mmu_map_hold_va(struct pmap *pm, void *addr, uintptr_t va)
 {
+	bool alloced = false;
+
 	KASSERT(addr != NULL && pm != NULL, ("NULL arg(s) given"));
 
 	struct mmu_map_index *pti = addr;
@@ -324,7 +445,6 @@ mmu_map_hold_va(struct pmap *pm, void *a
 	/* Reset pti */
 	pti->pml4t = pti->pdpt = pti->pdt = pti->pt = 0;
 
-	bool alloced = false; /* Did we have to alloc backing pages ? */
 	vm_paddr_t pt;
 
 	pti->pml4t = pmap_get_pml4t(pm);
@@ -416,12 +536,10 @@ mmu_map_release_va(struct pmap *pm, void
 
 	if (pti->pdt != NULL) { /* Zap pdte */
 
-		pd_entry_t *pdtep;
-		vm_paddr_t pdtep_ma;
-
-		pdtep = &pti->pdt[pdt_index(va)];
-
 		if (pti->pt == NULL) {
+			pd_entry_t *pdtep;
+			pdtep = &pti->pdt[pdt_index(va)];
+
 			KASSERT(*pdtep == 0, ("%s(%d): mmu state machine out of sync!\n", __func__, __LINE__));
 		} else {
 
@@ -443,33 +561,23 @@ mmu_map_release_va(struct pmap *pm, void
 				return;
 			}
 
-			/* We can free the PT only after the PDT entry is zapped */
-			if (memcchr(pti->pt, 0, PAGE_SIZE) == NULL) {
-				/* Zap the backing PDT entry */
-				pdtep_ma = xpmap_ptom(pti->ptmb.vtop((uintptr_t)pdtep));
-				xen_queue_pt_update(pdtep_ma, 0);
-				xen_flush_queue();
-
-				/* The PT is empty. Free it and zero the
-				 * pointer */
-				if (pti->ptmb.free) pti->ptmb.free((uintptr_t)pti->pt);
-				pti->pt = NULL;
+			if (pt_empty(pti->pt)) {
+				mmu_map_pt_free(pti, va);
+#ifdef notyet
+				pm->pm_stats.resident_count--;
+#endif
 
 			}
-
 		}
 
 		KASSERT(pti->pdpt != 0, ("Invalid pdpt\n"));
 	}
 
 	if (pti->pdpt != NULL) { /* Zap pdpte */
-
-		pdp_entry_t *pdptep;
-		vm_paddr_t pdptep_ma;
-
-		pdptep = &pti->pdpt[pdpt_index(va)];
-
 		if (pti->pdt == NULL) {
+			pdp_entry_t *pdptep;
+			pdptep = &pti->pdpt[pdpt_index(va)];
+
 			KASSERT(*pdptep == 0, ("%s(%d): mmu state machine out of sync!\n", __func__, __LINE__));
 		}
 
@@ -491,28 +599,20 @@ mmu_map_release_va(struct pmap *pm, void
 			return;
 		}
 
-		/* We can free the PDT only after the PDPT entry is zapped */
-		if (memcchr(pti->pdt, 0, PAGE_SIZE) == NULL) {
-			pdptep_ma = xpmap_ptom(pti->ptmb.vtop((uintptr_t)pdptep));
-			xen_queue_pt_update(pdptep_ma, 0);
-			xen_flush_queue();
-
-			/* The PDT is empty. Free it and zero the
-			 * pointer 
-			 */
-			if (pti->ptmb.free) pti->ptmb.free((uintptr_t)pti->pdt);
-			pti->pdt = NULL;
+		if (pdt_empty(pti->pdt)) {
+			mmu_map_pdt_free(pti, va);
+#ifdef notyet
+			pm->pm_stats.resident_count--;
+#endif
 		}
 		KASSERT(pti->pml4t != 0, ("Invalid pml4t\n"));
 	}
 
 	if (pti->pml4t != NULL) { /* Zap pml4te */
-		pml4_entry_t *pml4tep;
-		vm_paddr_t pml4tep_ma;
-
-		pml4tep = &pti->pml4t[pml4t_index(va)];
-
 		if (pti->pdpt == NULL) {
+			pml4_entry_t *pml4tep;
+			pml4tep = &pti->pml4t[pml4t_index(va)];
+
 			KASSERT(*pml4tep == 0, ("%s(%d): mmu state machine out of sync!\n", __func__, __LINE__));
 		}
 
@@ -534,17 +634,12 @@ mmu_map_release_va(struct pmap *pm, void
 			return;
 		}
 
-		if (memcchr(pti->pdpt, 0, PAGE_SIZE) == NULL) {
-			pml4tep_ma = xpmap_ptom(pti->ptmb.vtop((uintptr_t)pml4tep)
-);
-			xen_queue_pt_update(pml4tep_ma, 0);
-			xen_flush_queue();
+		if (pdpt_empty(pti->pdpt)) {
+			mmu_map_pdpt_free(pti, va);
+#ifdef notyet
+			pm->pm_stats.resident_count--;
+#endif
 
-			/* The PDPT is empty. Free it and zero the
-			 * pointer 
-			 */
-			if (pti->ptmb.free) pti->ptmb.free((uintptr_t)pti->pdpt);
-			pti->pdpt = NULL;
 		}
 	}			
 
@@ -553,5 +648,6 @@ mmu_map_release_va(struct pmap *pm, void
 	 * higher level aliasing issues across pmaps and vcpus that
 	 * can't be addressed here.
 	 */
+
 }
 					

Modified: projects/amd64_xen_pv/sys/amd64/xen/mmu_map.h
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/mmu_map.h	Fri May  2 10:31:42 2014	(r265222)
+++ projects/amd64_xen_pv/sys/amd64/xen/mmu_map.h	Fri May  2 10:43:32 2014	(r265223)
@@ -133,10 +133,18 @@ bool mmu_map_inspect_va(struct pmap *, m
  */
 bool mmu_map_hold_va(struct pmap *,  mmu_map_t, vm_offset_t);
 
-/* Optionally release resources after tear down of a va->pa mapping */
+/* Optionally release resources needed to "inspect" a va->pa mapping
+ * Note: This function is advisory in nature. The va->pa mapping
+ * itself may *NOT* be modified.
+ */
 void mmu_map_release_va(struct pmap *, mmu_map_t, vm_offset_t);
 
 /* 
+ * Release all resources allocated via mmu_map_hold_va().
+ */
+void mmu_map_destroy(struct pmap *);
+
+/* 
  * Machine dependant "view" into the page table hierarchy FSM.
  * On amd64, there are four tables that are consulted for a va->pa
  * translation. This information may be extracted by the MD functions

Modified: projects/amd64_xen_pv/sys/amd64/xen/pmap.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/pmap.c	Fri May  2 10:31:42 2014	(r265222)
+++ projects/amd64_xen_pv/sys/amd64/xen/pmap.c	Fri May  2 10:43:32 2014	(r265223)
@@ -83,6 +83,9 @@
  * SUCH DAMAGE.
  */
 
+
+#define	AMD64_NPT_AWARE
+
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
@@ -136,15 +139,19 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/proc.h>
 #include <sys/sched.h>
+#include <sys/sysctl.h>
 
 #include <vm/vm.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
 #include <vm/vm_param.h>
-#include <vm/vm_map.h>
 #include <vm/vm_kern.h>
-#include <vm/pmap.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_radix.h>
+#include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
 #include <machine/md_var.h>
@@ -156,6 +163,172 @@ __FBSDID("$FreeBSD$");
 #include <amd64/xen/mmu_map.h>
 #include <amd64/xen/pmap_pv.h>
 
+static __inline boolean_t
+pmap_emulate_ad_bits(pmap_t pmap)
+{
+
+	return ((pmap->pm_flags & PMAP_EMULATE_AD_BITS) != 0);
+}
+
+static __inline pt_entry_t
+pmap_valid_bit(pmap_t pmap)
+{
+	pt_entry_t mask;
+
+	switch (pmap->pm_type) {
+	case PT_X86:
+		mask = X86_PG_V;
+		break;
+	case PT_EPT:
+		if (pmap_emulate_ad_bits(pmap))
+			mask = EPT_PG_EMUL_V;
+		else
+			mask = EPT_PG_READ;
+		break;
+	default:
+		panic("pmap_valid_bit: invalid pm_type %d", pmap->pm_type);
+	}
+
+	return (mask);
+}
+
+static __inline pt_entry_t
+pmap_rw_bit(pmap_t pmap)
+{
+	pt_entry_t mask;
+
+	switch (pmap->pm_type) {
+	case PT_X86:
+		mask = X86_PG_RW;
+		break;
+	case PT_EPT:
+		if (pmap_emulate_ad_bits(pmap))
+			mask = EPT_PG_EMUL_RW;
+		else
+			mask = EPT_PG_WRITE;
+		break;
+	default:
+		panic("pmap_rw_bit: invalid pm_type %d", pmap->pm_type);
+	}
+
+	return (mask);
+}
+
+static __inline pt_entry_t
+pmap_global_bit(pmap_t pmap)
+{
+	pt_entry_t mask;
+
+	switch (pmap->pm_type) {
+	case PT_X86:
+		mask = X86_PG_G;
+		break;
+	case PT_EPT:
+		mask = 0;
+		break;
+	default:
+		panic("pmap_global_bit: invalid pm_type %d", pmap->pm_type);
+	}
+
+	return (mask);
+}
+
+static __inline pt_entry_t
+pmap_accessed_bit(pmap_t pmap)
+{
+	pt_entry_t mask;
+
+	switch (pmap->pm_type) {
+	case PT_X86:
+		mask = X86_PG_A;
+		break;
+	case PT_EPT:
+		if (pmap_emulate_ad_bits(pmap))
+			mask = EPT_PG_READ;
+		else
+			mask = EPT_PG_A;
+		break;
+	default:
+		panic("pmap_accessed_bit: invalid pm_type %d", pmap->pm_type);
+	}
+
+	return (mask);
+}
+
+static __inline pt_entry_t
+pmap_modified_bit(pmap_t pmap)
+{
+	pt_entry_t mask;
+
+	switch (pmap->pm_type) {
+	case PT_X86:
+		mask = X86_PG_M;
+		break;
+	case PT_EPT:
+		if (pmap_emulate_ad_bits(pmap))
+			mask = EPT_PG_WRITE;
+		else
+			mask = EPT_PG_M;
+		break;
+	default:
+		panic("pmap_modified_bit: invalid pm_type %d", pmap->pm_type);
+	}
+
+	return (mask);
+}
+
+#if !defined(DIAGNOSTIC)
+#ifdef __GNUC_GNU_INLINE__
+#define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
+#else
+#define PMAP_INLINE	extern inline
+#endif
+#else
+#define PMAP_INLINE
+#endif
+
+#ifdef PV_STATS
+#define PV_STAT(x)	do { x ; } while (0)
+#else
+#define PV_STAT(x)	do { } while (0)
+#endif
+
+#define	pa_index(pa)	((pa) >> PDRSHIFT)
+#define	pa_to_pvh(pa)	(&pv_table[pa_index(pa)])
+
+#define	NPV_LIST_LOCKS	MAXCPU
+
+#define	PHYS_TO_PV_LIST_LOCK(pa)	\
+			(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
+
+#define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
+	struct rwlock **_lockp = (lockp);		\
+	struct rwlock *_new_lock;			\
+							\
+	_new_lock = PHYS_TO_PV_LIST_LOCK(pa);		\
+	if (_new_lock != *_lockp) {			\
+		if (*_lockp != NULL)			\
+			rw_wunlock(*_lockp);		\
+		*_lockp = _new_lock;			\
+		rw_wlock(*_lockp);			\
+	}						\
+} while (0)
+
+#define	CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)	\
+			CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
+
+#define	RELEASE_PV_LIST_LOCK(lockp)		do {	\
+	struct rwlock **_lockp = (lockp);		\
+							\
+	if (*_lockp != NULL) {				\
+		rw_wunlock(*_lockp);			\
+		*_lockp = NULL;				\
+	}						\
+} while (0)
+
+#define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
+			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
+
 extern vm_offset_t pa_index; /* from machdep.c */
 extern unsigned long physfree; /* from machdep.c */
 
@@ -172,35 +345,58 @@ uintptr_t virtual_end;	/* VA of last ava
 
 int nkpt;
 
-/* 
- * VA for temp mapping to zero.
- * We need this because on xen, the DMAP is R/O
- */
-const uintptr_t zerova = VM_MAX_KERNEL_ADDRESS;
-
-#define DMAP4KSUPPORT /* Temporary 4K based DMAP support */
-#ifdef DMAPSUPPORT
 static int ndmpdp;
-static vm_paddr_t dmaplimit;
-#endif
-
+vm_paddr_t dmaplimit;
 uintptr_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
 pt_entry_t pg_nx = 0; /* XXX: probe for this ? */
 
 struct msgbuf *msgbufp = 0;
 
+static int pg_ps_enabled = 1;
+static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
+
+SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0,
+    "Are large page mappings enabled?");
+
 static u_int64_t	KPTphys;	/* phys addr of kernel level 1 */
 static u_int64_t	KPDphys;	/* phys addr of kernel level 2 */
 u_int64_t		KPDPphys;	/* phys addr of kernel level 3 */
 u_int64_t		KPML4phys;	/* phys addr of kernel level 4 */
 
-#if defined(DMAPSUPPORT) || defined(DMAP4KSUPPORT)
-#ifdef DMAP4KSUPPORT
 static u_int64_t	DMPTphys;	/* phys addr of direct mapped level 1 */
-#endif
 static u_int64_t	DMPDphys;	/* phys addr of direct mapped level 2 */
 static u_int64_t	DMPDPphys;	/* phys addr of direct mapped level 3 */
-#endif /* DMAPSUPPORT || DMAP4KSUPPORT */
+
+static int		ndmpdpphys;	/* number of DMPDPphys pages */
+
+static struct rwlock_padalign pvh_global_lock;
+
+/*
+ * Data for the pv entry allocation mechanism
+ */
+TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
+struct mtx pv_chunks_mutex;
+struct rwlock pv_list_locks[NPV_LIST_LOCKS];
+static struct md_page *pv_table;
+
+static int pmap_flags = 0; // XXX: PMAP_PDE_SUPERPAGE;	/* flags for x86 pmaps */
+
+static void	free_pv_chunk(struct pv_chunk *pc);
+static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
+static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
+static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
+static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
+static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
+		    vm_offset_t va);
+
+
+static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
+    vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
+static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
+
+static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m,
+    struct spglist *free);
+static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
 
 static vm_paddr_t	boot_ptphys;	/* phys addr of start of
 					 * kernel bootstrap tables
@@ -208,13 +404,6 @@ static vm_paddr_t	boot_ptphys;	/* phys a
 static vm_paddr_t	boot_ptendphys;	/* phys addr of end of kernel
 					 * bootstrap page tables
 					 */
-
-static size_t tsz; /* mmu_map.h opaque cookie size */
-static uintptr_t (*ptmb_mappedalloc)(void) = NULL;
-static void (*ptmb_mappedfree)(uintptr_t) = NULL;
-static uintptr_t (*ptmb_ptov)(vm_paddr_t) = NULL;
-static vm_paddr_t (*ptmb_vtop)(uintptr_t) = NULL;
-
 extern int gdtset;
 extern uint64_t xenstack; /* The stack Xen gives us at boot */
 extern char *console_page; /* The shared ring for console i/o */
@@ -222,6 +411,134 @@ extern struct xenstore_domain_interface 
 
 extern vm_map_t pv_map;
 
+/********************/
+/* Inline functions */
+/********************/
+
+/* XXX: */
+
+#define MACH_TO_DMAP(_m) PHYS_TO_DMAP(xpmap_mtop(_m))
+#define DMAP_TO_MACH(_v) xpmap_ptom(DMAP_TO_PHYS(_v))
+
+/* Return a non-clipped PD index for a given VA */
+static __inline vm_pindex_t
+pmap_pde_pindex(vm_offset_t va)
+{
+	return (va >> PDRSHIFT);
+}
+
+
+/* Return various clipped indexes for a given VA */
+static __inline vm_pindex_t
+pmap_pte_index(vm_offset_t va)
+{
+
+	return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1));
+}
+
+static __inline vm_pindex_t
+pmap_pde_index(vm_offset_t va)
+{
+
+	return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
+}
+
+static __inline vm_pindex_t
+pmap_pdpe_index(vm_offset_t va)
+{
+
+	return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1));
+}
+
+static __inline vm_pindex_t
+pmap_pml4e_index(vm_offset_t va)
+{
+
+	return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1));
+}
+
+/* Return a pointer to the PML4 slot that corresponds to a VA */
+static __inline pml4_entry_t *
+pmap_pml4e(pmap_t pmap, vm_offset_t va)
+{
+
+	return (&pmap->pm_pml4[pmap_pml4e_index(va)]);
+}
+
+/* Return a pointer to the PDP slot that corresponds to a VA */
+static __inline pdp_entry_t *
+pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va)
+{
+	pdp_entry_t *pdpe;
+
+	pdpe = (pdp_entry_t *)MACH_TO_DMAP(*pml4e & PG_FRAME);
+	return (&pdpe[pmap_pdpe_index(va)]);
+}
+
+/* Return a pointer to the PDP slot that corresponds to a VA */
+static __inline pdp_entry_t *
+pmap_pdpe(pmap_t pmap, vm_offset_t va)
+{
+	pml4_entry_t *pml4e;
+	pt_entry_t PG_V;
+
+	PG_V = pmap_valid_bit(pmap);
+	pml4e = pmap_pml4e(pmap, va);
+	if ((*pml4e & PG_V) == 0)
+		return (NULL);
+	return (pmap_pml4e_to_pdpe(pml4e, va));
+}
+
+/* Return a pointer to the PD slot that corresponds to a VA */
+static __inline pd_entry_t *
+pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va)
+{
+	pd_entry_t *pde;
+
+	pde = (pd_entry_t *)MACH_TO_DMAP(*pdpe & PG_FRAME);
+	return (&pde[pmap_pde_index(va)]);
+}
+
+/* Return a pointer to the PD slot that corresponds to a VA */
+static __inline pd_entry_t *
+pmap_pde(pmap_t pmap, vm_offset_t va)
+{
+	pdp_entry_t *pdpe;
+	pt_entry_t PG_V;
+
+	PG_V = pmap_valid_bit(pmap);
+	pdpe = pmap_pdpe(pmap, va);
+	if (pdpe == NULL || (*pdpe & PG_V) == 0)
+		return (NULL);
+	return (pmap_pdpe_to_pde(pdpe, va));
+}
+
+/* Return a pointer to the PT slot that corresponds to a VA */
+static __inline pt_entry_t *
+pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va)
+{
+	pt_entry_t *pte;
+
+	pte = (pt_entry_t *)MACH_TO_DMAP(*pde & PG_FRAME);
+	return (&pte[pmap_pte_index(va)]);
+}
+
+/* Return a pointer to the PT slot that corresponds to a VA */
+static __inline pt_entry_t *
+pmap_pte(pmap_t pmap, vm_offset_t va)
+{
+	pd_entry_t *pde;
+	pt_entry_t PG_V;
+
+	PG_V = pmap_valid_bit(pmap);
+	pde = pmap_pde(pmap, va);
+	if (pde == NULL || (*pde & PG_V) == 0)
+		return (NULL);
+	if ((*pde & PG_PS) != 0)	/* compat with i386 pmap_pte() */
+		return ((pt_entry_t *)pde);
+	return (pmap_pde_to_pte(pde, va));
+}
+
 /* Index offset into a pagetable, for a given va */
 static int
 pt_index(uintptr_t va)
@@ -229,21 +546,53 @@ pt_index(uintptr_t va)
 	return ((va & PDRMASK) >> PAGE_SHIFT);
 }
 
+static __inline void
+pmap_resident_count_inc(pmap_t pmap, int count)
+{
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	pmap->pm_stats.resident_count += count;
+}
+
+static __inline void
+pmap_resident_count_dec(pmap_t pmap, int count)
+{
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	pmap->pm_stats.resident_count -= count;
+}
+
+PMAP_INLINE pt_entry_t *
+vtopte(vm_offset_t va)
+{
+	u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
+
+	KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopte on a uva/gpa 0x%0lx", va));
+
+	return (PTmap + ((va >> PAGE_SHIFT) & mask));
+}
+
+static __inline pd_entry_t *
+vtopde(vm_offset_t va)
+{
+	u_int64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
+
+	KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopde on a uva/gpa 0x%0lx", va));
+
+	return (PDmap + ((va >> PDRSHIFT) & mask));
+}
 
 /* return kernel virtual address of  'n' claimed physical pages at boot. */
-static uintptr_t
-vallocpages(vm_paddr_t *firstaddr, int n)
+static vm_paddr_t
+allocpages(vm_paddr_t *firstaddr, int n)
 {
-	uintptr_t ret = *firstaddr + KERNBASE;
-	bzero((void *)ret, n * PAGE_SIZE);
+	vm_paddr_t ret = *firstaddr;
+	bzero((void *)PTOV(ret), n * PAGE_SIZE);
 	*firstaddr += n * PAGE_SIZE;
 
-	/* Make sure we are still inside of available mapped va. */
-	if (PTOV(*firstaddr) > (xenstack + 512 * 1024)) {
-		printk("Attempt to use unmapped va\n");
-	}
 	KASSERT(PTOV(*firstaddr) <= (xenstack + 512 * 1024), 
 		("Attempt to use unmapped va\n"));
+
 	return (ret);
 }
 
@@ -258,9 +607,21 @@ static void
 pmap_xen_setpages_ro(uintptr_t va, vm_size_t npages)
 {
 	vm_size_t i;
+	pt_entry_t PG_V;
+	uintptr_t tva;
+	vm_paddr_t ma;
+
+	PG_V = pmap_valid_bit(kernel_pmap);
+
 	for (i = 0; i < npages; i++) {
-		PT_SET_MA(va + PAGE_SIZE * i, 
-			  phystomach(ptmb_vtop(va + PAGE_SIZE * i)) | PG_U | PG_V);
+		tva = va + ptoa(i);
+		ma = phystomach(ISBOOTVA(tva) ? VTOP(tva) :
+				ISDMAPVA(tva) ? DMAP_TO_PHYS(tva) :
+				0);
+		KASSERT(ma != 0, ("%s: Unknown kernel va \n", __func__));
+
+		PT_SET_MA(va + PAGE_SIZE * i,
+			  ma | PG_U | PG_V);
 	}
 }
 
@@ -269,18 +630,37 @@ static void
 pmap_xen_setpages_rw(uintptr_t va, vm_size_t npages)
 {
 	vm_size_t i;
+
+	pt_entry_t PG_V, PG_RW;
+	uintptr_t tva;
+	vm_paddr_t ma;
+
+	PG_V = pmap_valid_bit(kernel_pmap);
+	PG_RW = pmap_rw_bit(kernel_pmap);
+
 	for (i = 0; i < npages; i++) {
+		tva = va + ptoa(i);
+		ma = phystomach(ISBOOTVA(tva) ? VTOP(tva) :
+				ISDMAPVA(tva) ? DMAP_TO_PHYS(tva) :
+				0);
+		KASSERT(ma != 0, ("%s: Unknown kernel va \n", __func__));
+
+			      
 		PT_SET_MA(va + PAGE_SIZE * i, 
-			  phystomach(ptmb_vtop(va + PAGE_SIZE * i)) | PG_U | PG_V | PG_RW);
+			  ma | PG_U | PG_V | PG_RW);
 	}
 }
 
 extern int etext;	/* End of kernel text (virtual address) */
 extern int end;		/* End of kernel binary (virtual address) */
 /* Return pte flags according to kernel va access restrictions */
+
 static pt_entry_t
 pmap_xen_kernel_vaflags(uintptr_t va)
 {
+	pt_entry_t PG_RW;
+	PG_RW = pmap_rw_bit(kernel_pmap);
+
 	if ((va > (uintptr_t) &etext && /* .data, .bss et. al */
 	     (va < (uintptr_t) &end))
 	    ||
@@ -294,82 +674,116 @@ pmap_xen_kernel_vaflags(uintptr_t va)
 
 	return 0;
 }
+
 uintptr_t tmpva;
 
+CTASSERT(powerof2(NDMPML4E));
+
+/* number of kernel PDP slots */
+#define	NKPDPE(ptpgs)		howmany((ptpgs), NPDEPG)
+
 static void
-create_boot_pagetables(vm_paddr_t *firstaddr)
+nkpt_init(vm_paddr_t addr)
 {
-	int i;
-	int nkpdpe;
-	int nkmapped = atop(VTOP(xenstack + 512 * 1024 + PAGE_SIZE));
+	int pt_pages;
+	
+#ifdef NKPT
+	pt_pages = NKPT;
+#else
+	
+	pt_pages = howmany(addr, 1 << PDRSHIFT);
+	pt_pages += NKPDPE(pt_pages);
 
-	kernel_vm_end = PTOV(ptoa(nkmapped - 1));
+	/*
+	 * Add some slop beyond the bare minimum required for bootstrapping
+	 * the kernel.
+	 *
+	 * This is quite important when allocating KVA for kernel modules.
+	 * The modules are required to be linked in the negative 2GB of
+	 * the address space.  If we run out of KVA in this region then
+	 * pmap_growkernel() will need to allocate page table pages to map
+	 * the entire 512GB of KVA space which is an unnecessary tax on
+	 * physical memory.
+	 */
+	pt_pages += 8;		/* 16MB additional slop for kernel modules */
+#endif
+	nkpt = pt_pages;
+}
 
-	boot_ptphys = *firstaddr; /* lowest available r/w area */
 
-	/* Allocate pseudo-physical pages for kernel page tables. */
-	nkpt = howmany(nkmapped, NPTEPG);
-	nkpdpe = howmany(nkpt, NPDEPG);
-	KPML4phys = vallocpages(firstaddr, 1);
-	KPDPphys = vallocpages(firstaddr, NKPML4E);
-	KPDphys = vallocpages(firstaddr, nkpdpe);
-	KPTphys = vallocpages(firstaddr, nkpt);
+/* create a linear mapping for a span of 'nkmapped' pages */
 
-#ifdef DMAPSUPPORT
-	int ndm1g;
+static void
+create_pagetables(vm_paddr_t *firstaddr, int nkmapped)
+{

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201405021043.s42AhWp7089441>