Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 15 Jul 2013 01:36:50 -0600
From:      Chris Torek <torek@torek.net>
To:        freebsd-hackers@freebsd.org
Subject:   Re: expanding amd64 past the 1TB limit
Message-ID:  <201307150736.r6F7aoaw039955@elf.torek.net>
In-Reply-To: Your message of "Thu, 11 Jul 2013 00:41:38 -0700." <CAFgRE9HycyTL-Mch9KF08Df6qFzzCTb-aU0AkKQhi7F5gtGevA@mail.gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help
(Durn mailing list software, eating attachments... there are just
the two so I will just send them one at a time here.  I took the
individual people off the to/cc since presumably you all got the 
attachments already.)

Date: Thu, 27 Jun 2013 18:49:29 -0600
Subject: [PATCH 2/2] increase physical and virtual memory limits

Increase kernel VM space: go from .5 TB of KVA and 1 TB of direct
map, to 8 TB of KVA and 16 TB of direct map.  However, we allocate
less direct map space for small physical-memory systems.  Also, if
Maxmem is so large that there is not enough direct map space,
reduce Maxmem to fit, so that the system can boot unassisted.
---
 amd64/amd64/pmap.c      | 44 +++++++++++++++++++++++++++++++++-----------
 amd64/include/pmap.h    | 36 +++++++++++++++++++++++++++++-------
 amd64/include/vmparam.h | 13 +++++++------
 3 files changed, 69 insertions(+), 24 deletions(-)

diff --git a/amd64/amd64/pmap.c b/amd64/amd64/pmap.c
index 46f6940..5e43c93 100644
--- a/amd64/amd64/pmap.c
+++ b/amd64/amd64/pmap.c
@@ -232,6 +232,7 @@ u_int64_t		KPML4phys;	/* phys addr of kernel level 4 */
 
 static u_int64_t	DMPDphys;	/* phys addr of direct mapped level 2 */
 static u_int64_t	DMPDPphys;	/* phys addr of direct mapped level 3 */
+static int		ndmpdpphys;	/* number of DMPDPphys pages */
 
 static struct rwlock_padalign pvh_global_lock;
 
@@ -540,7 +541,18 @@ create_pagetables(vm_paddr_t *firstaddr)
 	ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT;
 	if (ndmpdp < 4)		/* Minimum 4GB of dirmap */
 		ndmpdp = 4;
-	DMPDPphys = allocpages(firstaddr, NDMPML4E);
+	ndmpdpphys = howmany(ndmpdp, NPDPEPG);
+	if (ndmpdpphys > NDMPML4E) {
+		/*
+		 * Each NDMPML4E allows 512 GB, so limit to that,
+		 * and then readjust ndmpdp and ndmpdpphys.
+		 */
+		printf("NDMPML4E limits system to %d GB\n", NDMPML4E * 512);
+		Maxmem = atop(NDMPML4E * NBPML4);
+		ndmpdpphys = NDMPML4E;
+		ndmpdp = NDMPML4E * NPDEPG;
+	}
+	DMPDPphys = allocpages(firstaddr, ndmpdpphys);
 	ndm1g = 0;
 	if ((amd_feature & AMDID_PAGE1GB) != 0)
 		ndm1g = ptoa(Maxmem) >> PDPSHIFT;
@@ -557,6 +569,10 @@ create_pagetables(vm_paddr_t *firstaddr)
 	 * bootstrap.  We defer this until after all memory-size dependent
 	 * allocations are done (e.g. direct map), so that we don't have to
 	 * build in too much slop in our estimate.
+	 *
+	 * Note that when NKPML4E > 1, we have an empty page underneath
+	 * all but the KPML4I'th one, so we need NKPML4E-1 extra (zeroed)
+	 * pages.  (pmap_enter requires a PD page to exist for each KPML4E.)
 	 */
 	nkpt_init(*firstaddr);
 	nkpdpe = NKPDPE(nkpt);
@@ -581,8 +597,8 @@ create_pagetables(vm_paddr_t *firstaddr)
 	for (i = 0; (i << PDRSHIFT) < *firstaddr; i++)
 		pd_p[i] = (i << PDRSHIFT) | PG_RW | PG_V | PG_PS | PG_G;
 
-	/* And connect up the PD to the PDP */
-	pdp_p = (pdp_entry_t *)KPDPphys;
+	/* And connect up the PD to the PDP (leaving room for L4 pages) */
+	pdp_p = (pdp_entry_t *)(KPDPphys + ptoa(KPML4I - KPML4BASE));
 	for (i = 0; i < nkpdpe; i++)
 		pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | PG_RW | PG_V | PG_U;
 
@@ -619,14 +635,16 @@ create_pagetables(vm_paddr_t *firstaddr)
 	p4_p[PML4PML4I] |= PG_RW | PG_V | PG_U;
 
 	/* Connect the Direct Map slot(s) up to the PML4. */
-	for (i = 0; i < NDMPML4E; i++) {
+	for (i = 0; i < ndmpdpphys; i++) {
 		p4_p[DMPML4I + i] = DMPDPphys + ptoa(i);
 		p4_p[DMPML4I + i] |= PG_RW | PG_V | PG_U;
 	}
 
-	/* Connect the KVA slot up to the PML4 */
-	p4_p[KPML4I] = KPDPphys;
-	p4_p[KPML4I] |= PG_RW | PG_V | PG_U;
+	/* Connect the KVA slots up to the PML4 */
+	for (i = 0; i < NKPML4E; i++) {
+		p4_p[KPML4BASE + i] = KPDPphys + ptoa(i);
+		p4_p[KPML4BASE + i] |= PG_RW | PG_V | PG_U;
+	}
 }
 
 /*
@@ -1685,8 +1703,11 @@ pmap_pinit(pmap_t pmap)
 		pagezero(pmap->pm_pml4);
 
 	/* Wire in kernel global address entries. */
-	pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U;
-	for (i = 0; i < NDMPML4E; i++) {
+	for (i = 0; i < NKPML4E; i++) {
+		pmap->pm_pml4[KPML4BASE + i] = (KPDPphys + (i << PAGE_SHIFT)) |
+		    PG_RW | PG_V | PG_U;
+	}
+	for (i = 0; i < ndmpdpphys; i++) {
 		pmap->pm_pml4[DMPML4I + i] = (DMPDPphys + (i << PAGE_SHIFT)) |
 		    PG_RW | PG_V | PG_U;
 	}
@@ -1941,8 +1962,9 @@ pmap_release(pmap_t pmap)
 
 	m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME);
 
-	pmap->pm_pml4[KPML4I] = 0;	/* KVA */
-	for (i = 0; i < NDMPML4E; i++)	/* Direct Map */
+	for (i = 0; i < NKPML4E; i++)	/* KVA */
+		pmap->pm_pml4[KPML4BASE + i] = 0;
+	for (i = 0; i < ndmpdpphys; i++)/* Direct Map */
 		pmap->pm_pml4[DMPML4I + i] = 0;
 	pmap->pm_pml4[PML4PML4I] = 0;	/* Recursive Mapping */
 
diff --git a/amd64/include/pmap.h b/amd64/include/pmap.h
index dc02e49..eda0295 100644
--- a/amd64/include/pmap.h
+++ b/amd64/include/pmap.h
@@ -113,28 +113,50 @@
 	((unsigned long)(l2) << PDRSHIFT) | \
 	((unsigned long)(l1) << PAGE_SHIFT))
 
-#define NKPML4E		1		/* number of kernel PML4 slots */
+/*
+ * Number of kernel PML4 slots.  Can be anywhere from 1 to 64 or so,
+ * but setting it larger than NDMPML4E makes no sense.
+ *
+ * Each slot provides .5 TB of kernel virtual space.
+ */
+#define NKPML4E		16
 
 #define	NUPML4E		(NPML4EPG/2)	/* number of userland PML4 pages */
 #define	NUPDPE		(NUPML4E*NPDPEPG)/* number of userland PDP pages */
 #define	NUPDE		(NUPDPE*NPDEPG)	/* number of userland PD entries */
 
 /*
- * NDMPML4E is the number of PML4 entries that are used to implement the
- * direct map.  It must be a power of two.
+ * NDMPML4E is the maximum number of PML4 entries that will be
+ * used to implement the direct map.  It must be a power of two,
+ * and should generally exceed NKPML4E.  The maximum possible
+ * value is 64; using 128 will make the direct map intrude into
+ * the recursive page table map.
  */
-#define	NDMPML4E	2
+#define	NDMPML4E	32
 
 /*
- * The *PDI values control the layout of virtual memory.  The starting address
+ * These values control the layout of virtual memory.  The starting address
  * of the direct map, which is controlled by DMPML4I, must be a multiple of
  * its size.  (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.)
+ *
+ * Note: KPML4I is the index of the (single) level 4 page that maps
+ * the KVA that holds KERNBASE, while KPML4BASE is the index of the
+ * first level 4 page that maps VM_MIN_KERNEL_ADDRESS.  If NKPML4E
+ * is 1, these are the same, otherwise KPML4BASE < KPML4I and extra
+ * level 4 PDEs are needed to map from VM_MIN_KERNEL_ADDRESS up to
+ * KERNBASE.  Similarly, if KMPL4I < (base+N-1), extra level 2 PDEs are
+ * needed to map from somewhere-above-KERNBASE to VM_MAX_KERNEL_ADDRESS.
+ *
+ * (KPML4I combines with KPDPI to choose where KERNBASE starts.
+ * Or, in other words, KPML4I provides bits 39..46 of KERNBASE,
+ * and KPDPI provides bits 30..38.)
  */
 #define	PML4PML4I	(NPML4EPG/2)	/* Index of recursive pml4 mapping */
 
-#define	KPML4I		(NPML4EPG-1)	/* Top 512GB for KVM */
-#define	DMPML4I		rounddown(KPML4I - NDMPML4E, NDMPML4E) /* Below KVM */
+#define	KPML4BASE	(NPML4EPG-NKPML4E) /* KVM at highest addresses */
+#define	DMPML4I		rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */
 
+#define	KPML4I		(NPML4EPG-1)
 #define	KPDPI		(NPDPEPG-2)	/* kernbase at -2GB */
 
 /*
diff --git a/amd64/include/vmparam.h b/amd64/include/vmparam.h
index 33f62bd..cff2558 100644
--- a/amd64/include/vmparam.h
+++ b/amd64/include/vmparam.h
@@ -145,18 +145,19 @@
  * 0x0000000000000000 - 0x00007fffffffffff   user map
  * 0x0000800000000000 - 0xffff7fffffffffff   does not exist (hole)
  * 0xffff800000000000 - 0xffff804020100fff   recursive page table (512GB slot)
- * 0xffff804020101000 - 0xfffffdffffffffff   unused
- * 0xfffffe0000000000 - 0xfffffeffffffffff   1TB direct map
- * 0xffffff0000000000 - 0xffffff7fffffffff   unused
- * 0xffffff8000000000 - 0xffffffffffffffff   512GB kernel map
+ * 0xffff804020101000 - 0xffffdfffffffffff   unused
+ * 0xffffe00000000000 - 0xffffefffffffffff   16TB direct map
+ * 0xfffff00000000000 - 0xfffff7ffffffffff   unused
+ * 0xfffff80000000000 - 0xffffffffffffffff   8TB kernel map
  *
  * Within the kernel map:
  *
  * 0xffffffff80000000                        KERNBASE
  */
 
-#define	VM_MAX_KERNEL_ADDRESS	KVADDR(KPML4I, NPDPEPG-1, NPDEPG-1, NPTEPG-1)
-#define	VM_MIN_KERNEL_ADDRESS	KVADDR(KPML4I, NPDPEPG-512, 0, 0)
+#define	VM_MIN_KERNEL_ADDRESS	KVADDR(KPML4BASE, 0, 0, 0)
+#define	VM_MAX_KERNEL_ADDRESS	KVADDR(KPML4BASE + NKPML4E - 1, \
+					NPDPEPG-1, NPDEPG-1, NPTEPG-1)
 
 #define	DMAP_MIN_ADDRESS	KVADDR(DMPML4I, 0, 0, 0)
 #define	DMAP_MAX_ADDRESS	KVADDR(DMPML4I + NDMPML4E, 0, 0, 0)
-- 
1.8.2.1




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201307150736.r6F7aoaw039955>