Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 22 May 2003 19:06:25 -0700 (PDT)
From:      Peter Wemm <peter@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 31662 for review
Message-ID:  <200305230206.h4N26Pbn096709@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=31662

Change 31662 by peter@peter_daintree on 2003/05/22 19:05:59

	Reverse integrate the peter_bighammer branch onto the hammer
	mainline.  This brings 512GB user VM (room for 128TB), a 1GB (room
	for 512GB) 'direct mapped' segment using 2MB pages (ala alpha/sparc64),
	moves the kernel to -1GB with room to increase KVM to 512GB.
	
	This leans heavily on the 3-level page table pmap code from alpha, but
	extended for 4 levels.  The alpha pmap depends heavily on the direct
	mapped segment to avoid things like pmap_enter_quick().
	
	pmap_object_init_pt(), pmap_prefault() and pmap_copy() are #if 0'ed
	out for the moment.  Even with these missing, it seems to be a useful
	checkpoint.

Affected files ...

.. //depot/projects/hammer/sys/amd64/amd64/genassym.c#18 integrate
.. //depot/projects/hammer/sys/amd64/amd64/locore.S#4 integrate
.. //depot/projects/hammer/sys/amd64/amd64/machdep.c#39 integrate
.. //depot/projects/hammer/sys/amd64/amd64/mem.c#6 integrate
.. //depot/projects/hammer/sys/amd64/amd64/pmap.c#8 integrate
.. //depot/projects/hammer/sys/amd64/amd64/trap.c#18 integrate
.. //depot/projects/hammer/sys/amd64/amd64/vm_machdep.c#8 integrate
.. //depot/projects/hammer/sys/amd64/conf/GENERIC#11 integrate
.. //depot/projects/hammer/sys/amd64/ia32/ia32_signal.c#4 integrate
.. //depot/projects/hammer/sys/amd64/ia32/ia32_sysvec.c#9 integrate
.. //depot/projects/hammer/sys/amd64/ia32/ia32_util.h#4 integrate
.. //depot/projects/hammer/sys/amd64/include/param.h#4 integrate
.. //depot/projects/hammer/sys/amd64/include/pmap.h#8 integrate
.. //depot/projects/hammer/sys/amd64/include/vmparam.h#7 integrate
.. //depot/projects/hammer/sys/nfsclient/nfs_bio.c#5 integrate

Differences ...

==== //depot/projects/hammer/sys/amd64/amd64/genassym.c#18 (text+ko) ====

@@ -99,8 +99,11 @@
 ASSYM(PAGE_SIZE, PAGE_SIZE);
 ASSYM(NPTEPG, NPTEPG);
 ASSYM(NPDEPG, NPDEPG);
-ASSYM(NPDEPTD, NPDEPTD);
-ASSYM(NPGPTD, NPGPTD);
+ASSYM(addr_PTmap, addr_PTmap);
+ASSYM(addr_PDmap, addr_PDmap);
+ASSYM(addr_PDPmap, addr_PDPmap);
+ASSYM(addr_PML4map, addr_PML4map);
+ASSYM(addr_PML4pml4e, addr_PML4pml4e);
 ASSYM(PDESIZE, sizeof(pd_entry_t));
 ASSYM(PTESIZE, sizeof(pt_entry_t));
 ASSYM(PTESHIFT, PTESHIFT);
@@ -109,9 +112,14 @@
 ASSYM(PDRSHIFT, PDRSHIFT);
 ASSYM(PDPSHIFT, PDPSHIFT);
 ASSYM(PML4SHIFT, PML4SHIFT);
+ASSYM(val_KPDPI, KPDPI);
+ASSYM(val_KPML4I, KPML4I);
+ASSYM(val_PML4PML4I, PML4PML4I);
 ASSYM(USRSTACK, USRSTACK);
 ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
 ASSYM(KERNBASE, KERNBASE);
+ASSYM(DMAP_MIN_ADDRESS, DMAP_MIN_ADDRESS);
+ASSYM(DMAP_MAX_ADDRESS, DMAP_MAX_ADDRESS);
 ASSYM(MCLBYTES, MCLBYTES);
 ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3));
 ASSYM(PCB_R15, offsetof(struct pcb, pcb_r15));

==== //depot/projects/hammer/sys/amd64/amd64/locore.S#4 (text+ko) ====

@@ -36,8 +36,15 @@
 /*
  * Compiled KERNBASE location
  */
-	.globl	kernbase
+	.globl	kernbase,loc_PTmap,loc_PDmap,loc_PDPmap,loc_PML4map,loc_PML4pml4e,dmapbase,dmapend
 	.set	kernbase,KERNBASE
+	.set	loc_PTmap,addr_PTmap
+	.set	loc_PDmap,addr_PDmap
+	.set	loc_PDPmap,addr_PDPmap
+	.set	loc_PML4map,addr_PML4map
+	.set	loc_PML4pml4e,addr_PML4pml4e
+	.set	dmapbase,DMAP_MIN_ADDRESS
+	.set	dmapend,DMAP_MAX_ADDRESS
 
 	.text
 /**********************************************************************

==== //depot/projects/hammer/sys/amd64/amd64/machdep.c#39 (text+ko) ====

@@ -133,11 +133,6 @@
 
 u_int64_t	modulep;	/* phys addr of metadata table */
 u_int64_t	physfree;	/* first free page after kernel */
-u_int64_t	IdlePTD;	/* phys addr of kernel PTD */
-u_int64_t	IdlePDP;	/* phys addr of kernel level 3 */
-u_int64_t	IdlePML4;	/* phys addr of kernel level 4 */
-struct user	*proc0uarea;	/* address of proc 0 uarea space */
-vm_offset_t	proc0kstack;	/* address of proc 0 kstack space */
 
 int cold = 1;
 
@@ -945,7 +940,7 @@
 		physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 
 	/* call pmap initialization to make new kernel address space */
-	pmap_bootstrap(first, 0);
+	pmap_bootstrap(&first);
 
 	/*
 	 * Size up each available chunk of physical memory.
@@ -1086,69 +1081,6 @@
 	return (ret);
 }
 
-static void
-create_pagetables(void)
-{
-	u_int64_t p0kpa;
-	u_int64_t p0upa;
-	u_int64_t KPTphys;
-	int i;
-
-	/* Allocate pages */
-	KPTphys = allocpages(NKPT);
-	IdlePML4 = allocpages(NKPML4E);
-	IdlePDP = allocpages(NKPDPE);
-	IdlePTD = allocpages(NPGPTD);
-	p0upa = allocpages(UAREA_PAGES);
-	p0kpa = allocpages(KSTACK_PAGES);
-
-	proc0uarea = (struct user *)(p0upa + KERNBASE);
-	proc0kstack = p0kpa + KERNBASE;
-
-	/* Fill in the underlying page table pages */
-	/* Read-only from zero to physfree */
-	/* XXX not fully used, underneath 2M pages */
-	for (i = 0; (i << PAGE_SHIFT) < physfree; i++) {
-		((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT;
-		((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V;
-	}
-
-	/* Now map the page tables at their location within PTmap */
-	for (i = 0; i < NKPT; i++) {
-		((pd_entry_t *)IdlePTD)[i + KPTDI] = KPTphys + (i << PAGE_SHIFT);
-		((pd_entry_t *)IdlePTD)[i + KPTDI] |= PG_RW | PG_V;
-	}
-
-	/* Map from zero to end of allocations under 2M pages */
-	/* This replaces some of the PTD entries above */
-	for (i = 0; (i << PDRSHIFT) < physfree; i++) {
-		((pd_entry_t *)IdlePTD)[i] = i << PDRSHIFT;
-		((pd_entry_t *)IdlePTD)[i] |= PG_RW | PG_V | PG_PS;
-	}
-
-	/* Now map the page tables at their location within PTmap */
-	for (i = 0; i < NKPT; i++) {
-		((pd_entry_t *)IdlePTD)[i] = KPTphys + (i << PAGE_SHIFT);
-		((pd_entry_t *)IdlePTD)[i] |= PG_RW | PG_V;
-	}
-
-	/* Now map the PTD at the top of the PTmap (ie: PTD[]) */
-	for (i = 0; i < NPGPTD; i++) {
-		((pd_entry_t *)IdlePTD)[i + PTDPTDI] = IdlePTD + (i << PAGE_SHIFT);
-		((pd_entry_t *)IdlePTD)[i + PTDPTDI] |= PG_RW | PG_V;
-	}
-
-	/* And connect up the PTD to the PDP */
-	for (i = 0; i < NPGPTD; i++) {
-		((pdp_entry_t *)IdlePDP)[i] = IdlePTD + (i << PAGE_SHIFT);
-		((pdp_entry_t *)IdlePDP)[i] |= PG_RW | PG_V | PG_U;
-	}
-
-	/* And connect up the PDP to the PML4 */
-	((pdp_entry_t *)IdlePML4)[0] = IdlePDP;
-	((pdp_entry_t *)IdlePML4)[0] |= PG_RW | PG_V | PG_U;
-}
-
 void
 hammer_time(void)
 {
@@ -1157,18 +1089,14 @@
 	struct region_descriptor r_gdt, r_idt;
 	struct pcpu *pc;
 	u_int64_t msr;
+	char *env;
 
 	/* Turn on PTE NX (no execute) bit */
 	msr = rdmsr(MSR_EFER) | EFER_NXE;
 	wrmsr(MSR_EFER, msr);
-	create_pagetables();
 
-	/* XXX do %cr0 as well */
-	load_cr4(rcr4() | CR4_PGE | CR4_PSE);
-	load_cr3(IdlePML4);
-
-	proc0.p_uarea = proc0uarea;
-	thread0.td_kstack = proc0kstack;
+	proc0.p_uarea = (struct user *)(allocpages(UAREA_PAGES) + KERNBASE);
+	thread0.td_kstack = allocpages(KSTACK_PAGES) + KERNBASE;
 	thread0.td_pcb = (struct pcb *)
 	   (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
 	atdevbase = ISA_HOLE_START + KERNBASE;
@@ -1310,8 +1238,12 @@
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
-	thread0.td_pcb->pcb_cr3 = IdlePML4;
+	thread0.td_pcb->pcb_cr3 = KPML4phys;
 	thread0.td_frame = &proc0_tf;
+
+        env = getenv("kernelname");
+	if (env != NULL)
+		strlcpy(kernelname, env, sizeof(kernelname));
 }
 
 void

==== //depot/projects/hammer/sys/amd64/amd64/mem.c#6 (text+ko) ====

@@ -63,6 +63,7 @@
 #include <machine/frame.h>
 #include <machine/psl.h>
 #include <machine/specialreg.h>
+#include <machine/vmparam.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
@@ -177,7 +178,7 @@
 			addr = trunc_page(uio->uio_offset);
 			eaddr = round_page(uio->uio_offset + c);
 
-			if (addr < (vm_offset_t)VADDR(0, 0, PTDPTDI, 0))
+			if (addr < (vm_offset_t)KERNBASE)
 				return (EFAULT);
 			for (; addr < eaddr; addr += PAGE_SIZE) 
 				if (pmap_extract(kernel_pmap, addr) == 0)

==== //depot/projects/hammer/sys/amd64/amd64/pmap.c#8 (text+ko) ====

@@ -125,6 +125,7 @@
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/uma.h>
+#include <vm/uma_int.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
@@ -149,34 +150,19 @@
 #endif
 
 /*
- * Get PDEs and PTEs for user/kernel address space
- */
-#define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
-#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
-
-#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
-#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
-#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
-#define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
-#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
-
-#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
-#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
-
-/*
  * Given a map and a machine independent protection code,
  * convert to a vax protection code.
  */
 #define pte_prot(m, p)	(protection_codes[p])
-static int protection_codes[8];
+static pt_entry_t protection_codes[8];
 
 struct pmap kernel_pmap_store;
 LIST_HEAD(pmaplist, pmap);
 static struct pmaplist allpmaps;
 static struct mtx allpmaps_lock;
 
-vm_paddr_t avail_start;	/* PA of first available physical page */
-vm_paddr_t avail_end;	/* PA of last available physical page */
+vm_paddr_t avail_start;		/* PA of first available physical page */
+vm_paddr_t avail_end;		/* PA of last available physical page */
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
@@ -184,6 +170,14 @@
 static int nkpt;
 vm_offset_t kernel_vm_end;
 
+static u_int64_t	KPTphys;	/* phys addr of kernel level 1 */
+static u_int64_t	KPDphys;	/* phys addr of kernel level 2 */
+static u_int64_t	KPDPphys;	/* phys addr of kernel level 3 */
+u_int64_t		KPML4phys;	/* phys addr of kernel level 4 */
+
+static u_int64_t	DMPDphys;	/* phys addr of direct mapped level 2 */
+static u_int64_t	DMPDPphys;	/* phys addr of direct mapped level 3 */
+
 /*
  * Data for the pv entry allocation mechanism
  */
@@ -196,10 +190,8 @@
  * All those kernel PT submaps that BSD is so fond of
  */
 pt_entry_t *CMAP1 = 0;
-static pt_entry_t *CMAP2, *CMAP3, *ptmmap;
+static pt_entry_t *ptmmap;
 caddr_t CADDR1 = 0, ptvmmap = 0;
-static caddr_t CADDR2, CADDR3;
-static struct mtx CMAPCADDR12_lock;
 static pt_entry_t *msgbufmap;
 struct msgbuf *msgbufp = 0;
 
@@ -209,16 +201,15 @@
 static pt_entry_t *pt_crashdumpmap;
 static caddr_t crashdumpmap;
 
-static pt_entry_t *PMAP1 = 0;
-static pt_entry_t *PADDR1 = 0;
-
 static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
 static pv_entry_t get_pv_entry(void);
-static void	i386_protection_init(void);
+static void	amd64_protection_init(void);
 static __inline void	pmap_changebit(vm_page_t m, int bit, boolean_t setem);
 
+#if 0
 static vm_page_t pmap_enter_quick(pmap_t pmap, vm_offset_t va,
 				      vm_page_t m, vm_page_t mpte);
+#endif
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
 static int pmap_remove_entry(struct pmap *pmap, vm_page_t m,
@@ -252,10 +243,192 @@
 	return newaddr;
 }
 
+/********************/
+/* Inline functions */
+/********************/
+
+/* Return a non-clipped PD index for a given VA */
+static __inline unsigned long
+pmap_pde_pindex(vm_offset_t va)
+{
+	return va >> PDRSHIFT;
+}
+
+
+/* Return various clipped indexes for a given VA */
+static __inline int
+pmap_pte_index(vm_offset_t va)
+{
+
+	return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1));
+}
+
+static __inline int
+pmap_pde_index(vm_offset_t va)
+{
+
+	return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
+}
+
+static __inline int
+pmap_pdpe_index(vm_offset_t va)
+{
+
+	return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1));
+}
+
+static __inline int
+pmap_pml4e_index(vm_offset_t va)
+{
+
+	return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1));
+}
+
+/* Return a pointer to the PML4 slot that corresponds to a VA */
+static __inline pml4_entry_t *
+pmap_pml4e(pmap_t pmap, vm_offset_t va)
+{
+
+	if (!pmap)
+		return NULL;
+	return (&pmap->pm_pml4[pmap_pml4e_index(va)]);
+}
+
+/* Return a pointer to the PDP slot that corresponds to a VA */
+static __inline pdp_entry_t *
+pmap_pdpe(pmap_t pmap, vm_offset_t va)
+{
+	pml4_entry_t *pml4e;
+	pdp_entry_t *pdpe;
+
+	pml4e = pmap_pml4e(pmap, va);
+	if (pml4e == NULL || (*pml4e & PG_V) == 0)
+		return NULL;
+	pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & PG_FRAME);
+	return (&pdpe[pmap_pdpe_index(va)]);
+}
+
+/* Return a pointer to the PD slot that corresponds to a VA */
+static __inline pd_entry_t *
+pmap_pde(pmap_t pmap, vm_offset_t va)
+{
+	pdp_entry_t *pdpe;
+	pd_entry_t *pde;
+
+	pdpe = pmap_pdpe(pmap, va);
+	if (pdpe == NULL || (*pdpe & PG_V) == 0)
+		 return NULL;
+	pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & PG_FRAME);
+	return (&pde[pmap_pde_index(va)]);
+}
+
+/* Return a pointer to the PT slot that corresponds to a VA */
+static __inline pt_entry_t *
+pmap_pte(pmap_t pmap, vm_offset_t va)
+{
+	pd_entry_t *pde;
+	pt_entry_t *pte;
+
+	pde = pmap_pde(pmap, va);
+	if (pde == NULL || (*pde & PG_V) == 0)
+		return NULL;
+	pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
+	return (&pte[pmap_pte_index(va)]);
+}
+
+
+PMAP_INLINE pt_entry_t *
+vtopte(vm_offset_t va)
+{
+	u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
+
+	return (PTmap + (amd64_btop(va) & mask));
+}
+
+static u_int64_t
+allocpages(int n)
+{
+	u_int64_t ret;
+
+	ret = avail_start;
+	bzero((void *)ret, n * PAGE_SIZE);
+	avail_start += n * PAGE_SIZE;
+	return (ret);
+}
+
+static void
+create_pagetables(void)
+{
+	int i;
+
+	/* Allocate pages */
+	KPTphys = allocpages(NKPT);
+	KPML4phys = allocpages(1);
+	KPDPphys = allocpages(NKPML4E);
+	KPDphys = allocpages(NKPDPE);
+
+	DMPDPphys = allocpages(NDMPML4E);
+	DMPDphys = allocpages(NDMPDPE);
+
+	/* Fill in the underlying page table pages */
+	/* Read-only from zero to physfree */
+	/* XXX not fully used, underneath 2M pages */
+	for (i = 0; (i << PAGE_SHIFT) < avail_start; i++) {
+		((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT;
+		((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V;
+	}
+
+	/* Now map the page tables at their location within PTmap */
+	for (i = 0; i < NKPT; i++) {
+		((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT);
+		((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V;
+	}
+
+#if 0
+	/* Map from zero to end of allocations under 2M pages */
+	/* This replaces some of the KPTphys entries above */
+	for (i = 0; (i << PDRSHIFT) < avail_start; i++) {
+		((pd_entry_t *)KPDphys)[i] = i << PDRSHIFT;
+		((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS;
+	}
+#endif
+
+	/* And connect up the PD to the PDP */
+	for (i = 0; i < NKPDPE; i++) {
+		((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys + (i << PAGE_SHIFT);
+		((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U;
+	}
+
+
+	/* Now set up the direct map space using 2MB pages */
+	for (i = 0; i < NPDEPG; i++) {
+		((pd_entry_t *)DMPDphys)[i] = i << PDRSHIFT;
+		((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS;
+	}
+
+	/* And the direct map space's PDP */
+	for (i = 0; i < NDMPDPE; i++) {
+		((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + (i << PAGE_SHIFT);
+		((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U;
+	}
+
+	/* And recursively map PML4 to itself in order to get PTmap */
+	((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys;
+	((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U;
+
+	/* Connect the Direct Map slot up to the PML4 */
+	((pdp_entry_t *)KPML4phys)[DMPML4I] = DMPDPphys;
+	((pdp_entry_t *)KPML4phys)[DMPML4I] |= PG_RW | PG_V | PG_U;
+
+	/* Connect the KVA slot up to the PML4 */
+	((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys;
+	((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U;
+}
+
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *
- *	On the i386 this is called after mapping has already been enabled
+ *	On amd64 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
@@ -263,40 +436,39 @@
  *	(physical) address starting relative to 0]
  */
 void
-pmap_bootstrap(firstaddr, loadaddr)
-	vm_paddr_t firstaddr;
-	vm_paddr_t loadaddr;
+pmap_bootstrap(firstaddr)
+	vm_paddr_t *firstaddr;
 {
 	vm_offset_t va;
 	pt_entry_t *pte;
-	int i;
 
-	avail_start = firstaddr;
+	avail_start = *firstaddr;
 
 	/*
-	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
-	 * large. It should instead be correctly calculated in locore.s and
-	 * not based on 'first' (which is a physical address, not a virtual
-	 * address, for the start of unused physical memory). The kernel
-	 * page tables are NOT double mapped and thus should not be included
-	 * in this calculation.
+	 * Create an initial set of page tables to run the kernel in.
 	 */
-	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
+	create_pagetables();
+	*firstaddr = avail_start;
+
+	virtual_avail = (vm_offset_t) KERNBASE + avail_start;
 	virtual_avail = pmap_kmem_choose(virtual_avail);
 
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
+
+	/* XXX do %cr0 as well */
+	load_cr4(rcr4() | CR4_PGE | CR4_PSE);
+	load_cr3(KPML4phys);
+
 	/*
 	 * Initialize protection array.
 	 */
-	i386_protection_init();
+	amd64_protection_init();
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).
 	 */
-	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD);
-	kernel_pmap->pm_pdp = (pdp_entry_t *) (KERNBASE + IdlePDP);
-	kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + IdlePML4);
+	kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + KPML4phys);
 	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
 	LIST_INIT(&allpmaps);
@@ -316,16 +488,11 @@
 	va = virtual_avail;
 	pte = vtopte(va);
 
-	/*
-	 * CMAP1/CMAP2 are used for zeroing and copying pages.
-	 * CMAP3 is used for the idle process page zeroing.
+        /*
+	 * CMAP1 is only used for the memory test.
 	 */
 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
-	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
-	SYSMAP(caddr_t, CMAP3, CADDR3, 1)
 
-	mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF);
-
 	/*
 	 * Crashdump maps.
 	 */
@@ -344,16 +511,9 @@
 	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
 	       atop(round_page(MSGBUF_SIZE)))
 
-	/*
-	 * ptemap is used for pmap_pte_quick
-	 */
-	SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
-
 	virtual_avail = va;
 
-	*CMAP1 = *CMAP2 = 0;
-	for (i = 0; i < NKPT; i++)
-		PTD[i] = 0;
+	*CMAP1 = 0;
 
 	invltlb();
 }
@@ -365,6 +525,52 @@
 	return (void *)kmem_alloc(kernel_map, bytes);
 }
 
+void *
+uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+{
+	static vm_pindex_t colour;
+	vm_page_t m;
+	int pflags;
+	void *va;
+
+	*flags = UMA_SLAB_PRIV;
+
+	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
+		pflags = VM_ALLOC_INTERRUPT;
+	else
+		pflags = VM_ALLOC_SYSTEM;
+
+	if (wait & M_ZERO)
+		pflags |= VM_ALLOC_ZERO;
+
+	for (;;) {
+		m = vm_page_alloc(NULL, colour++, pflags | VM_ALLOC_NOOBJ);
+		if (m == NULL) {
+			if (wait & M_NOWAIT)
+				return (NULL);
+			else
+				VM_WAIT;
+		} else
+			break;
+	}
+
+	va = (void *)PHYS_TO_DMAP(m->phys_addr);
+	if ((m->flags & PG_ZERO) == 0)
+		pagezero(va);
+	return (va);
+}
+
+void
+uma_small_free(void *mem, int size, u_int8_t flags)
+{
+	vm_page_t m;
+
+	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)mem));
+	vm_page_lock_queues();
+	vm_page_free(m);
+	vm_page_unlock_queues();
+}
+
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
@@ -431,6 +637,7 @@
  * Low level helper routines.....
  ***************************************************/
 
+
 #if defined(PMAP_DIAGNOSTIC)
 
 /*
@@ -496,48 +703,6 @@
 }
 
 /*
- * Are we current address space or kernel?
- */
-static __inline int
-pmap_is_current(pmap_t pmap)
-{
-	return (pmap == kernel_pmap ||
-	    (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME));
-}
-
-/*
- * Super fast pmap_pte routine best used when scanning
- * the pv lists.  This eliminates many coarse-grained
- * invltlb calls.  Note that many of the pv list
- * scans are across different pmaps.  It is very wasteful
- * to do an entire invltlb for checking a single mapping.
- */
-pt_entry_t * 
-pmap_pte_quick(pmap, va)
-	register pmap_t pmap;
-	vm_offset_t va;
-{
-	pd_entry_t newpf;
-	pd_entry_t *pde;
-
-	pde = pmap_pde(pmap, va);
-	if (*pde & PG_PS)
-		return (pde);
-	if (*pde != 0) {
-		/* are we current address space or kernel? */
-		if (pmap_is_current(pmap))
-			return vtopte(va);
-		newpf = *pde & PG_FRAME;
-		if (((*PMAP1) & PG_FRAME) != newpf) {
-			*PMAP1 = newpf | PG_RW | PG_V;
-			pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR1);
-		}
-		return PADDR1 + (amd64_btop(va) & (NPTEPG - 1));
-	}
-	return (0);
-}
-
-/*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
@@ -550,24 +715,47 @@
 {
 	vm_paddr_t rtval;
 	pt_entry_t *pte;
-	pd_entry_t pde;
+	pd_entry_t pde, *pdep;
 
 	if (pmap == 0)
 		return 0;
-	pde = pmap->pm_pdir[va >> PDRSHIFT];
-	if (pde != 0) {
-		if ((pde & PG_PS) != 0) {
-			rtval = (pde & ~PDRMASK) | (va & PDRMASK);
+	pdep = pmap_pde(pmap, va);
+	if (pdep) {
+		pde = *pdep;
+		if (pde) {
+			if ((pde & PG_PS) != 0) {
+				rtval = (pde & ~PDRMASK) | (va & PDRMASK);
+				return rtval;
+			}
+			pte = pmap_pte(pmap, va);
+			rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
 			return rtval;
 		}
-		pte = pmap_pte_quick(pmap, va);
-		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
-		return rtval;
 	}
 	return 0;
 
 }
 
+vm_paddr_t
+pmap_kextract(vm_offset_t va)
+{
+	pd_entry_t *pde;
+	vm_paddr_t pa;
+
+	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
+		pa = DMAP_TO_PHYS(va);
+	} else {
+		pde = pmap_pde(kernel_pmap, va);
+		if (*pde & PG_PS) {
+			pa = (*pde & ~(NBPDR - 1)) | (va & (NBPDR - 1));
+		} else {
+			pa = *vtopte(va);
+			pa = (pa & PG_FRAME) | (va & PAGE_MASK);
+		}
+	}
+	return pa;
+}
+
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
@@ -689,7 +877,7 @@
 #endif
 
 /*
- * Create the kernel stack (including pcb for i386) for a new thread.
+ * Create the kernel stack (including pcb for amd64) for a new thread.
  * This routine directly affects the fork perf for a process and
  * create performance for a thread.
  */
@@ -899,7 +1087,7 @@
  * drops to zero, then it decrements the wire count.
  */
 static int 
-_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
+_pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 
 	while (vm_page_sleep_if_busy(m, FALSE, "pmuwpt"))
@@ -910,16 +1098,51 @@
 		/*
 		 * unmap the page table page
 		 */
-		pmap->pm_pdir[m->pindex] = 0;
+		if (m->pindex >= (NUPDE + NUPDPE)) {
+			/* PDP page */
+			pml4_entry_t *pml4;
+			pml4 = pmap_pml4e(pmap, va);
+			pteva = (vm_offset_t) PDPmap + amd64_ptob(m->pindex - (NUPDE + NUPDPE));
+			*pml4 = 0;
+		} else if (m->pindex >= NUPDE) {
+			/* PD page */
+			pdp_entry_t *pdp;
+			pdp = pmap_pdpe(pmap, va);
+			pteva = (vm_offset_t) PDmap + amd64_ptob(m->pindex - NUPDE);
+			*pdp = 0;
+		} else {
+			/* PTE page */
+			pd_entry_t *pd;
+			pd = pmap_pde(pmap, va);
+			pteva = (vm_offset_t) PTmap + amd64_ptob(m->pindex);
+			*pd = 0;
+		}
 		--pmap->pm_stats.resident_count;
-		if (pmap_is_current(pmap)) {
-			/*
-			 * Do an invltlb to make the invalidated mapping
-			 * take effect immediately.
-			 */
-			pteva = VM_MAXUSER_ADDRESS + amd64_ptob(m->pindex);
-			pmap_invalidate_page(pmap, pteva);
+		if (m->pindex < NUPDE) {
+			/* Unhold the PD page */
+			vm_page_t pdpg;
+			pdpg = vm_page_lookup(pmap->pm_pteobj, NUPDE + pmap_pdpe_index(va));
+			while (vm_page_sleep_if_busy(pdpg, FALSE, "pulook"))
+				vm_page_lock_queues();
+			vm_page_unhold(pdpg);
+			if (pdpg->hold_count == 0)
+				_pmap_unwire_pte_hold(pmap, va, pdpg);
+		}
+		if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) {
+			/* Unhold the PDP page */
+			vm_page_t pdppg;
+			pdppg = vm_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + pmap_pml4e_index(va));
+			while (vm_page_sleep_if_busy(pdppg, FALSE, "pulooK"))
+				vm_page_lock_queues();
+			vm_page_unhold(pdppg);
+			if (pdppg->hold_count == 0)
+				_pmap_unwire_pte_hold(pmap, va, pdppg);
 		}
+		/*
+		 * Do an invltlb to make the invalidated mapping
+		 * take effect immediately.
+		 */
+		pmap_invalidate_page(pmap, pteva);
 
 		/*
 		 * If the page is finally unwired, simply free it.
@@ -936,11 +1159,11 @@
 }
 
 static PMAP_INLINE int
-pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
+pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	vm_page_unhold(m);
 	if (m->hold_count == 0)
-		return _pmap_unwire_pte_hold(pmap, m);
+		return _pmap_unwire_pte_hold(pmap, va, m);
 	else
 		return 0;
 }
@@ -952,23 +1175,24 @@
 static int
 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
 {
-	unsigned ptepindex;
+	unsigned long ptepindex;
+
 	if (va >= VM_MAXUSER_ADDRESS)
 		return 0;
 
 	if (mpte == NULL) {
-		ptepindex = (va >> PDRSHIFT);
+		ptepindex = pmap_pde_pindex(va);
 		if (pmap->pm_pteobj->root &&
-			(pmap->pm_pteobj->root->pindex == ptepindex)) {
+		    pmap->pm_pteobj->root->pindex == ptepindex) {
 			mpte = pmap->pm_pteobj->root;
 		} else {
 			while ((mpte = vm_page_lookup(pmap->pm_pteobj, ptepindex)) != NULL &&
-			       vm_page_sleep_if_busy(mpte, FALSE, "pulook"))
+			    vm_page_sleep_if_busy(mpte, FALSE, "pulook"))
 				vm_page_lock_queues();
 		}
 	}
 
-	return pmap_unwire_pte_hold(pmap, mpte);
+	return pmap_unwire_pte_hold(pmap, va, mpte);
 }
 
 void
@@ -976,9 +1200,7 @@
 	struct pmap *pmap;
 {
 
-	pmap->pm_pdir = (pd_entry_t *)(KERNBASE + IdlePTD);
-	pmap->pm_pdp = (pdp_entry_t *)(KERNBASE + IdlePDP);
-	pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + IdlePML4);
+	pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + KPML4phys);
 	pmap->pm_active = 0;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -995,90 +1217,39 @@
 pmap_pinit(pmap)
 	register struct pmap *pmap;
 {
-	vm_page_t ptdpg[NPGPTD];
-	vm_page_t pdppg;
 	vm_page_t pml4pg;
-	vm_paddr_t pa;
-	int i;
-
-	/*
-	 * No need to allocate page table space yet but we do need a valid
-	 * page directory table.
-	 */
-	if (pmap->pm_pdir == NULL) {
-		pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map,
-		    NBPTD);
-		pmap->pm_pdp = (pdp_entry_t *)kmem_alloc_pageable(kernel_map,
-		    PAGE_SIZE);
-		pmap->pm_pml4 = (pml4_entry_t *)kmem_alloc_pageable(kernel_map,
-		    PAGE_SIZE);
-	}
 
 	/*
 	 * allocate object for the ptes
 	 */
 	if (pmap->pm_pteobj == NULL)
-		pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI +
-		    NPGPTD + 2);
+		pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, NUPDE + NUPDPE + NUPML4E + 1);
 
 	/*
-	 * allocate the page directory page(s)
+	 * allocate the page directory page
 	 */
-	for (i = 0; i < NPGPTD; i++) {
-		ptdpg[i] = vm_page_grab(pmap->pm_pteobj, PTDPTDI + i,
-		    VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED |
-		    VM_ALLOC_ZERO);
-		vm_page_lock_queues();
-		vm_page_flag_clear(ptdpg[i], PG_BUSY);
-		ptdpg[i]->valid = VM_PAGE_BITS_ALL;
-		vm_page_unlock_queues();
-	}
-
-	pml4pg = vm_page_grab(pmap->pm_pteobj, PTDPTDI + NPGPTD,
+	pml4pg = vm_page_grab(pmap->pm_pteobj, NUPDE + NUPDPE + NUPML4E,
 	    VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 	vm_page_lock_queues();
 	vm_page_flag_clear(pml4pg, PG_BUSY);
 	pml4pg->valid = VM_PAGE_BITS_ALL;
 	vm_page_unlock_queues();
 
-	pdppg = vm_page_grab(pmap->pm_pteobj, PTDPTDI + NPGPTD + 1,
-	    VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
-	vm_page_lock_queues();
-	vm_page_flag_clear(pdppg, PG_BUSY);
-	pdppg->valid = VM_PAGE_BITS_ALL;
-	vm_page_unlock_queues();
+	pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg));
 
-	pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
-	pmap_qenter((vm_offset_t)pmap->pm_pdp, &pdppg, 1);
-	pmap_qenter((vm_offset_t)pmap->pm_pml4, &pml4pg, 1);
-
-	for (i = 0; i < NPGPTD; i++) {
-		if ((ptdpg[i]->flags & PG_ZERO) == 0)
-			bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
-	}
-	if ((pdppg->flags & PG_ZERO) == 0)
-		bzero(pmap->pm_pdp, PAGE_SIZE);
 	if ((pml4pg->flags & PG_ZERO) == 0)
 		bzero(pmap->pm_pml4, PAGE_SIZE);
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
+
 	/* Wire in kernel global address entries. */
-	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
+	pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U;
+	pmap->pm_pml4[DMPML4I] = DMPDPphys | PG_RW | PG_V | PG_U;

>>> TRUNCATED FOR MAIL (1000 lines) <<<



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200305230206.h4N26Pbn096709>