Date: Thu, 22 May 2003 19:06:25 -0700 (PDT) From: Peter Wemm <peter@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 31662 for review Message-ID: <200305230206.h4N26Pbn096709@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=31662 Change 31662 by peter@peter_daintree on 2003/05/22 19:05:59 Reverse integrate the peter_bighammer branch onto the hammer mainline. This brings 512GB user VM (room for 128TB), a 1GB (room for 512GB) 'direct mapped' segment using 2MB pages (ala alpha/sparc64), moves the kernel to -1GB with room to increase KVM to 512GB. This leans heavily on the 3-level page table pmap code from alpha, but extended for 4 levels. The alpha pmap depends heavily on the direct mapped segment to avoid things like pmap_enter_quick(). pmap_object_init_pt(), pmap_prefault() and pmap_copy() are #if 0'ed out for the moment. Even with these missing, it seems to be a useful checkpoint. Affected files ... .. //depot/projects/hammer/sys/amd64/amd64/genassym.c#18 integrate .. //depot/projects/hammer/sys/amd64/amd64/locore.S#4 integrate .. //depot/projects/hammer/sys/amd64/amd64/machdep.c#39 integrate .. //depot/projects/hammer/sys/amd64/amd64/mem.c#6 integrate .. //depot/projects/hammer/sys/amd64/amd64/pmap.c#8 integrate .. //depot/projects/hammer/sys/amd64/amd64/trap.c#18 integrate .. //depot/projects/hammer/sys/amd64/amd64/vm_machdep.c#8 integrate .. //depot/projects/hammer/sys/amd64/conf/GENERIC#11 integrate .. //depot/projects/hammer/sys/amd64/ia32/ia32_signal.c#4 integrate .. //depot/projects/hammer/sys/amd64/ia32/ia32_sysvec.c#9 integrate .. //depot/projects/hammer/sys/amd64/ia32/ia32_util.h#4 integrate .. //depot/projects/hammer/sys/amd64/include/param.h#4 integrate .. //depot/projects/hammer/sys/amd64/include/pmap.h#8 integrate .. //depot/projects/hammer/sys/amd64/include/vmparam.h#7 integrate .. //depot/projects/hammer/sys/nfsclient/nfs_bio.c#5 integrate Differences ... ==== //depot/projects/hammer/sys/amd64/amd64/genassym.c#18 (text+ko) ==== @@ -99,8 +99,11 @@ ASSYM(PAGE_SIZE, PAGE_SIZE); ASSYM(NPTEPG, NPTEPG); ASSYM(NPDEPG, NPDEPG); -ASSYM(NPDEPTD, NPDEPTD); -ASSYM(NPGPTD, NPGPTD); +ASSYM(addr_PTmap, addr_PTmap); +ASSYM(addr_PDmap, addr_PDmap); +ASSYM(addr_PDPmap, addr_PDPmap); +ASSYM(addr_PML4map, addr_PML4map); +ASSYM(addr_PML4pml4e, addr_PML4pml4e); ASSYM(PDESIZE, sizeof(pd_entry_t)); ASSYM(PTESIZE, sizeof(pt_entry_t)); ASSYM(PTESHIFT, PTESHIFT); @@ -109,9 +112,14 @@ ASSYM(PDRSHIFT, PDRSHIFT); ASSYM(PDPSHIFT, PDPSHIFT); ASSYM(PML4SHIFT, PML4SHIFT); +ASSYM(val_KPDPI, KPDPI); +ASSYM(val_KPML4I, KPML4I); +ASSYM(val_PML4PML4I, PML4PML4I); ASSYM(USRSTACK, USRSTACK); ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); ASSYM(KERNBASE, KERNBASE); +ASSYM(DMAP_MIN_ADDRESS, DMAP_MIN_ADDRESS); +ASSYM(DMAP_MAX_ADDRESS, DMAP_MAX_ADDRESS); ASSYM(MCLBYTES, MCLBYTES); ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3)); ASSYM(PCB_R15, offsetof(struct pcb, pcb_r15)); ==== //depot/projects/hammer/sys/amd64/amd64/locore.S#4 (text+ko) ==== @@ -36,8 +36,15 @@ /* * Compiled KERNBASE location */ - .globl kernbase + .globl kernbase,loc_PTmap,loc_PDmap,loc_PDPmap,loc_PML4map,loc_PML4pml4e,dmapbase,dmapend .set kernbase,KERNBASE + .set loc_PTmap,addr_PTmap + .set loc_PDmap,addr_PDmap + .set loc_PDPmap,addr_PDPmap + .set loc_PML4map,addr_PML4map + .set loc_PML4pml4e,addr_PML4pml4e + .set dmapbase,DMAP_MIN_ADDRESS + .set dmapend,DMAP_MAX_ADDRESS .text /********************************************************************** ==== //depot/projects/hammer/sys/amd64/amd64/machdep.c#39 (text+ko) ==== @@ -133,11 +133,6 @@ u_int64_t modulep; /* phys addr of metadata table */ u_int64_t physfree; /* first free page after kernel */ -u_int64_t IdlePTD; /* phys addr of kernel PTD */ -u_int64_t IdlePDP; /* phys addr of kernel level 3 */ -u_int64_t IdlePML4; /* phys addr of kernel level 4 */ -struct user *proc0uarea; /* address of proc 0 uarea space */ -vm_offset_t proc0kstack; /* address of proc 0 kstack space */ int cold = 1; @@ -945,7 +940,7 @@ physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* call pmap initialization to make new kernel address space */ - pmap_bootstrap(first, 0); + pmap_bootstrap(&first); /* * Size up each available chunk of physical memory. @@ -1086,69 +1081,6 @@ return (ret); } -static void -create_pagetables(void) -{ - u_int64_t p0kpa; - u_int64_t p0upa; - u_int64_t KPTphys; - int i; - - /* Allocate pages */ - KPTphys = allocpages(NKPT); - IdlePML4 = allocpages(NKPML4E); - IdlePDP = allocpages(NKPDPE); - IdlePTD = allocpages(NPGPTD); - p0upa = allocpages(UAREA_PAGES); - p0kpa = allocpages(KSTACK_PAGES); - - proc0uarea = (struct user *)(p0upa + KERNBASE); - proc0kstack = p0kpa + KERNBASE; - - /* Fill in the underlying page table pages */ - /* Read-only from zero to physfree */ - /* XXX not fully used, underneath 2M pages */ - for (i = 0; (i << PAGE_SHIFT) < physfree; i++) { - ((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT; - ((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V; - } - - /* Now map the page tables at their location within PTmap */ - for (i = 0; i < NKPT; i++) { - ((pd_entry_t *)IdlePTD)[i + KPTDI] = KPTphys + (i << PAGE_SHIFT); - ((pd_entry_t *)IdlePTD)[i + KPTDI] |= PG_RW | PG_V; - } - - /* Map from zero to end of allocations under 2M pages */ - /* This replaces some of the PTD entries above */ - for (i = 0; (i << PDRSHIFT) < physfree; i++) { - ((pd_entry_t *)IdlePTD)[i] = i << PDRSHIFT; - ((pd_entry_t *)IdlePTD)[i] |= PG_RW | PG_V | PG_PS; - } - - /* Now map the page tables at their location within PTmap */ - for (i = 0; i < NKPT; i++) { - ((pd_entry_t *)IdlePTD)[i] = KPTphys + (i << PAGE_SHIFT); - ((pd_entry_t *)IdlePTD)[i] |= PG_RW | PG_V; - } - - /* Now map the PTD at the top of the PTmap (ie: PTD[]) */ - for (i = 0; i < NPGPTD; i++) { - ((pd_entry_t *)IdlePTD)[i + PTDPTDI] = IdlePTD + (i << PAGE_SHIFT); - ((pd_entry_t *)IdlePTD)[i + PTDPTDI] |= PG_RW | PG_V; - } - - /* And connect up the PTD to the PDP */ - for (i = 0; i < NPGPTD; i++) { - ((pdp_entry_t *)IdlePDP)[i] = IdlePTD + (i << PAGE_SHIFT); - ((pdp_entry_t *)IdlePDP)[i] |= PG_RW | PG_V | PG_U; - } - - /* And connect up the PDP to the PML4 */ - ((pdp_entry_t *)IdlePML4)[0] = IdlePDP; - ((pdp_entry_t *)IdlePML4)[0] |= PG_RW | PG_V | PG_U; -} - void hammer_time(void) { @@ -1157,18 +1089,14 @@ struct region_descriptor r_gdt, r_idt; struct pcpu *pc; u_int64_t msr; + char *env; /* Turn on PTE NX (no execute) bit */ msr = rdmsr(MSR_EFER) | EFER_NXE; wrmsr(MSR_EFER, msr); - create_pagetables(); - /* XXX do %cr0 as well */ - load_cr4(rcr4() | CR4_PGE | CR4_PSE); - load_cr3(IdlePML4); - - proc0.p_uarea = proc0uarea; - thread0.td_kstack = proc0kstack; + proc0.p_uarea = (struct user *)(allocpages(UAREA_PAGES) + KERNBASE); + thread0.td_kstack = allocpages(KSTACK_PAGES) + KERNBASE; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; atdevbase = ISA_HOLE_START + KERNBASE; @@ -1310,8 +1238,12 @@ /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; /* XXXKSE */ - thread0.td_pcb->pcb_cr3 = IdlePML4; + thread0.td_pcb->pcb_cr3 = KPML4phys; thread0.td_frame = &proc0_tf; + + env = getenv("kernelname"); + if (env != NULL) + strlcpy(kernelname, env, sizeof(kernelname)); } void ==== //depot/projects/hammer/sys/amd64/amd64/mem.c#6 (text+ko) ==== @@ -63,6 +63,7 @@ #include <machine/frame.h> #include <machine/psl.h> #include <machine/specialreg.h> +#include <machine/vmparam.h> #include <vm/vm.h> #include <vm/pmap.h> @@ -177,7 +178,7 @@ addr = trunc_page(uio->uio_offset); eaddr = round_page(uio->uio_offset + c); - if (addr < (vm_offset_t)VADDR(0, 0, PTDPTDI, 0)) + if (addr < (vm_offset_t)KERNBASE) return (EFAULT); for (; addr < eaddr; addr += PAGE_SIZE) if (pmap_extract(kernel_pmap, addr) == 0) ==== //depot/projects/hammer/sys/amd64/amd64/pmap.c#8 (text+ko) ==== @@ -125,6 +125,7 @@ #include <vm/vm_pageout.h> #include <vm/vm_pager.h> #include <vm/uma.h> +#include <vm/uma_int.h> #include <machine/cpu.h> #include <machine/cputypes.h> @@ -149,34 +150,19 @@ #endif /* - * Get PDEs and PTEs for user/kernel address space - */ -#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) -#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) - -#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) -#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) -#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) -#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) -#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) - -#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) -#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) - -/* * Given a map and a machine independent protection code, * convert to a vax protection code. */ #define pte_prot(m, p) (protection_codes[p]) -static int protection_codes[8]; +static pt_entry_t protection_codes[8]; struct pmap kernel_pmap_store; LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps; static struct mtx allpmaps_lock; -vm_paddr_t avail_start; /* PA of first available physical page */ -vm_paddr_t avail_end; /* PA of last available physical page */ +vm_paddr_t avail_start; /* PA of first available physical page */ +vm_paddr_t avail_end; /* PA of last available physical page */ vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ @@ -184,6 +170,14 @@ static int nkpt; vm_offset_t kernel_vm_end; +static u_int64_t KPTphys; /* phys addr of kernel level 1 */ +static u_int64_t KPDphys; /* phys addr of kernel level 2 */ +static u_int64_t KPDPphys; /* phys addr of kernel level 3 */ +u_int64_t KPML4phys; /* phys addr of kernel level 4 */ + +static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ +static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ + /* * Data for the pv entry allocation mechanism */ @@ -196,10 +190,8 @@ * All those kernel PT submaps that BSD is so fond of */ pt_entry_t *CMAP1 = 0; -static pt_entry_t *CMAP2, *CMAP3, *ptmmap; +static pt_entry_t *ptmmap; caddr_t CADDR1 = 0, ptvmmap = 0; -static caddr_t CADDR2, CADDR3; -static struct mtx CMAPCADDR12_lock; static pt_entry_t *msgbufmap; struct msgbuf *msgbufp = 0; @@ -209,16 +201,15 @@ static pt_entry_t *pt_crashdumpmap; static caddr_t crashdumpmap; -static pt_entry_t *PMAP1 = 0; -static pt_entry_t *PADDR1 = 0; - static PMAP_INLINE void free_pv_entry(pv_entry_t pv); static pv_entry_t get_pv_entry(void); -static void i386_protection_init(void); +static void amd64_protection_init(void); static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem); +#if 0 static vm_page_t pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte); +#endif static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva); static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); static int pmap_remove_entry(struct pmap *pmap, vm_page_t m, @@ -252,10 +243,192 @@ return newaddr; } +/********************/ +/* Inline functions */ +/********************/ + +/* Return a non-clipped PD index for a given VA */ +static __inline unsigned long +pmap_pde_pindex(vm_offset_t va) +{ + return va >> PDRSHIFT; +} + + +/* Return various clipped indexes for a given VA */ +static __inline int +pmap_pte_index(vm_offset_t va) +{ + + return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); +} + +static __inline int +pmap_pde_index(vm_offset_t va) +{ + + return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); +} + +static __inline int +pmap_pdpe_index(vm_offset_t va) +{ + + return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); +} + +static __inline int +pmap_pml4e_index(vm_offset_t va) +{ + + return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); +} + +/* Return a pointer to the PML4 slot that corresponds to a VA */ +static __inline pml4_entry_t * +pmap_pml4e(pmap_t pmap, vm_offset_t va) +{ + + if (!pmap) + return NULL; + return (&pmap->pm_pml4[pmap_pml4e_index(va)]); +} + +/* Return a pointer to the PDP slot that corresponds to a VA */ +static __inline pdp_entry_t * +pmap_pdpe(pmap_t pmap, vm_offset_t va) +{ + pml4_entry_t *pml4e; + pdp_entry_t *pdpe; + + pml4e = pmap_pml4e(pmap, va); + if (pml4e == NULL || (*pml4e & PG_V) == 0) + return NULL; + pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & PG_FRAME); + return (&pdpe[pmap_pdpe_index(va)]); +} + +/* Return a pointer to the PD slot that corresponds to a VA */ +static __inline pd_entry_t * +pmap_pde(pmap_t pmap, vm_offset_t va) +{ + pdp_entry_t *pdpe; + pd_entry_t *pde; + + pdpe = pmap_pdpe(pmap, va); + if (pdpe == NULL || (*pdpe & PG_V) == 0) + return NULL; + pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & PG_FRAME); + return (&pde[pmap_pde_index(va)]); +} + +/* Return a pointer to the PT slot that corresponds to a VA */ +static __inline pt_entry_t * +pmap_pte(pmap_t pmap, vm_offset_t va) +{ + pd_entry_t *pde; + pt_entry_t *pte; + + pde = pmap_pde(pmap, va); + if (pde == NULL || (*pde & PG_V) == 0) + return NULL; + pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); + return (&pte[pmap_pte_index(va)]); +} + + +PMAP_INLINE pt_entry_t * +vtopte(vm_offset_t va) +{ + u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); + + return (PTmap + (amd64_btop(va) & mask)); +} + +static u_int64_t +allocpages(int n) +{ + u_int64_t ret; + + ret = avail_start; + bzero((void *)ret, n * PAGE_SIZE); + avail_start += n * PAGE_SIZE; + return (ret); +} + +static void +create_pagetables(void) +{ + int i; + + /* Allocate pages */ + KPTphys = allocpages(NKPT); + KPML4phys = allocpages(1); + KPDPphys = allocpages(NKPML4E); + KPDphys = allocpages(NKPDPE); + + DMPDPphys = allocpages(NDMPML4E); + DMPDphys = allocpages(NDMPDPE); + + /* Fill in the underlying page table pages */ + /* Read-only from zero to physfree */ + /* XXX not fully used, underneath 2M pages */ + for (i = 0; (i << PAGE_SHIFT) < avail_start; i++) { + ((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT; + ((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V; + } + + /* Now map the page tables at their location within PTmap */ + for (i = 0; i < NKPT; i++) { + ((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT); + ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V; + } + +#if 0 + /* Map from zero to end of allocations under 2M pages */ + /* This replaces some of the KPTphys entries above */ + for (i = 0; (i << PDRSHIFT) < avail_start; i++) { + ((pd_entry_t *)KPDphys)[i] = i << PDRSHIFT; + ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS; + } +#endif + + /* And connect up the PD to the PDP */ + for (i = 0; i < NKPDPE; i++) { + ((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys + (i << PAGE_SHIFT); + ((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U; + } + + + /* Now set up the direct map space using 2MB pages */ + for (i = 0; i < NPDEPG; i++) { + ((pd_entry_t *)DMPDphys)[i] = i << PDRSHIFT; + ((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS; + } + + /* And the direct map space's PDP */ + for (i = 0; i < NDMPDPE; i++) { + ((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + (i << PAGE_SHIFT); + ((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U; + } + + /* And recursively map PML4 to itself in order to get PTmap */ + ((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys; + ((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U; + + /* Connect the Direct Map slot up to the PML4 */ + ((pdp_entry_t *)KPML4phys)[DMPML4I] = DMPDPphys; + ((pdp_entry_t *)KPML4phys)[DMPML4I] |= PG_RW | PG_V | PG_U; + + /* Connect the KVA slot up to the PML4 */ + ((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys; + ((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U; +} + /* * Bootstrap the system enough to run with virtual memory. * - * On the i386 this is called after mapping has already been enabled + * On amd64 this is called after mapping has already been enabled * and just syncs the pmap module with what has already been done. * [We can't call it easily with mapping off since the kernel is not * mapped with PA == VA, hence we would have to relocate every address @@ -263,40 +436,39 @@ * (physical) address starting relative to 0] */ void -pmap_bootstrap(firstaddr, loadaddr) - vm_paddr_t firstaddr; - vm_paddr_t loadaddr; +pmap_bootstrap(firstaddr) + vm_paddr_t *firstaddr; { vm_offset_t va; pt_entry_t *pte; - int i; - avail_start = firstaddr; + avail_start = *firstaddr; /* - * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too - * large. It should instead be correctly calculated in locore.s and - * not based on 'first' (which is a physical address, not a virtual - * address, for the start of unused physical memory). The kernel - * page tables are NOT double mapped and thus should not be included - * in this calculation. + * Create an initial set of page tables to run the kernel in. */ - virtual_avail = (vm_offset_t) KERNBASE + firstaddr; + create_pagetables(); + *firstaddr = avail_start; + + virtual_avail = (vm_offset_t) KERNBASE + avail_start; virtual_avail = pmap_kmem_choose(virtual_avail); virtual_end = VM_MAX_KERNEL_ADDRESS; + + /* XXX do %cr0 as well */ + load_cr4(rcr4() | CR4_PGE | CR4_PSE); + load_cr3(KPML4phys); + /* * Initialize protection array. */ - i386_protection_init(); + amd64_protection_init(); /* * Initialize the kernel pmap (which is statically allocated). */ - kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD); - kernel_pmap->pm_pdp = (pdp_entry_t *) (KERNBASE + IdlePDP); - kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + IdlePML4); + kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + KPML4phys); kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvlist); LIST_INIT(&allpmaps); @@ -316,16 +488,11 @@ va = virtual_avail; pte = vtopte(va); - /* - * CMAP1/CMAP2 are used for zeroing and copying pages. - * CMAP3 is used for the idle process page zeroing. + /* + * CMAP1 is only used for the memory test. */ SYSMAP(caddr_t, CMAP1, CADDR1, 1) - SYSMAP(caddr_t, CMAP2, CADDR2, 1) - SYSMAP(caddr_t, CMAP3, CADDR3, 1) - mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF); - /* * Crashdump maps. */ @@ -344,16 +511,9 @@ SYSMAP(struct msgbuf *, msgbufmap, msgbufp, atop(round_page(MSGBUF_SIZE))) - /* - * ptemap is used for pmap_pte_quick - */ - SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1); - virtual_avail = va; - *CMAP1 = *CMAP2 = 0; - for (i = 0; i < NKPT; i++) - PTD[i] = 0; + *CMAP1 = 0; invltlb(); } @@ -365,6 +525,52 @@ return (void *)kmem_alloc(kernel_map, bytes); } +void * +uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + static vm_pindex_t colour; + vm_page_t m; + int pflags; + void *va; + + *flags = UMA_SLAB_PRIV; + + if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT) + pflags = VM_ALLOC_INTERRUPT; + else + pflags = VM_ALLOC_SYSTEM; + + if (wait & M_ZERO) + pflags |= VM_ALLOC_ZERO; + + for (;;) { + m = vm_page_alloc(NULL, colour++, pflags | VM_ALLOC_NOOBJ); + if (m == NULL) { + if (wait & M_NOWAIT) + return (NULL); + else + VM_WAIT; + } else + break; + } + + va = (void *)PHYS_TO_DMAP(m->phys_addr); + if ((m->flags & PG_ZERO) == 0) + pagezero(va); + return (va); +} + +void +uma_small_free(void *mem, int size, u_int8_t flags) +{ + vm_page_t m; + + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)mem)); + vm_page_lock_queues(); + vm_page_free(m); + vm_page_unlock_queues(); +} + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap @@ -431,6 +637,7 @@ * Low level helper routines..... ***************************************************/ + #if defined(PMAP_DIAGNOSTIC) /* @@ -496,48 +703,6 @@ } /* - * Are we current address space or kernel? - */ -static __inline int -pmap_is_current(pmap_t pmap) -{ - return (pmap == kernel_pmap || - (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)); -} - -/* - * Super fast pmap_pte routine best used when scanning - * the pv lists. This eliminates many coarse-grained - * invltlb calls. Note that many of the pv list - * scans are across different pmaps. It is very wasteful - * to do an entire invltlb for checking a single mapping. - */ -pt_entry_t * -pmap_pte_quick(pmap, va) - register pmap_t pmap; - vm_offset_t va; -{ - pd_entry_t newpf; - pd_entry_t *pde; - - pde = pmap_pde(pmap, va); - if (*pde & PG_PS) - return (pde); - if (*pde != 0) { - /* are we current address space or kernel? */ - if (pmap_is_current(pmap)) - return vtopte(va); - newpf = *pde & PG_FRAME; - if (((*PMAP1) & PG_FRAME) != newpf) { - *PMAP1 = newpf | PG_RW | PG_V; - pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR1); - } - return PADDR1 + (amd64_btop(va) & (NPTEPG - 1)); - } - return (0); -} - -/* * Routine: pmap_extract * Function: * Extract the physical page address associated @@ -550,24 +715,47 @@ { vm_paddr_t rtval; pt_entry_t *pte; - pd_entry_t pde; + pd_entry_t pde, *pdep; if (pmap == 0) return 0; - pde = pmap->pm_pdir[va >> PDRSHIFT]; - if (pde != 0) { - if ((pde & PG_PS) != 0) { - rtval = (pde & ~PDRMASK) | (va & PDRMASK); + pdep = pmap_pde(pmap, va); + if (pdep) { + pde = *pdep; + if (pde) { + if ((pde & PG_PS) != 0) { + rtval = (pde & ~PDRMASK) | (va & PDRMASK); + return rtval; + } + pte = pmap_pte(pmap, va); + rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); return rtval; } - pte = pmap_pte_quick(pmap, va); - rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); - return rtval; } return 0; } +vm_paddr_t +pmap_kextract(vm_offset_t va) +{ + pd_entry_t *pde; + vm_paddr_t pa; + + if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { + pa = DMAP_TO_PHYS(va); + } else { + pde = pmap_pde(kernel_pmap, va); + if (*pde & PG_PS) { + pa = (*pde & ~(NBPDR - 1)) | (va & (NBPDR - 1)); + } else { + pa = *vtopte(va); + pa = (pa & PG_FRAME) | (va & PAGE_MASK); + } + } + return pa; +} + /*************************************************** * Low level mapping routines..... ***************************************************/ @@ -689,7 +877,7 @@ #endif /* - * Create the kernel stack (including pcb for i386) for a new thread. + * Create the kernel stack (including pcb for amd64) for a new thread. * This routine directly affects the fork perf for a process and * create performance for a thread. */ @@ -899,7 +1087,7 @@ * drops to zero, then it decrements the wire count. */ static int -_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) +_pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) { while (vm_page_sleep_if_busy(m, FALSE, "pmuwpt")) @@ -910,16 +1098,51 @@ /* * unmap the page table page */ - pmap->pm_pdir[m->pindex] = 0; + if (m->pindex >= (NUPDE + NUPDPE)) { + /* PDP page */ + pml4_entry_t *pml4; + pml4 = pmap_pml4e(pmap, va); + pteva = (vm_offset_t) PDPmap + amd64_ptob(m->pindex - (NUPDE + NUPDPE)); + *pml4 = 0; + } else if (m->pindex >= NUPDE) { + /* PD page */ + pdp_entry_t *pdp; + pdp = pmap_pdpe(pmap, va); + pteva = (vm_offset_t) PDmap + amd64_ptob(m->pindex - NUPDE); + *pdp = 0; + } else { + /* PTE page */ + pd_entry_t *pd; + pd = pmap_pde(pmap, va); + pteva = (vm_offset_t) PTmap + amd64_ptob(m->pindex); + *pd = 0; + } --pmap->pm_stats.resident_count; - if (pmap_is_current(pmap)) { - /* - * Do an invltlb to make the invalidated mapping - * take effect immediately. - */ - pteva = VM_MAXUSER_ADDRESS + amd64_ptob(m->pindex); - pmap_invalidate_page(pmap, pteva); + if (m->pindex < NUPDE) { + /* Unhold the PD page */ + vm_page_t pdpg; + pdpg = vm_page_lookup(pmap->pm_pteobj, NUPDE + pmap_pdpe_index(va)); + while (vm_page_sleep_if_busy(pdpg, FALSE, "pulook")) + vm_page_lock_queues(); + vm_page_unhold(pdpg); + if (pdpg->hold_count == 0) + _pmap_unwire_pte_hold(pmap, va, pdpg); + } + if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) { + /* Unhold the PDP page */ + vm_page_t pdppg; + pdppg = vm_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + pmap_pml4e_index(va)); + while (vm_page_sleep_if_busy(pdppg, FALSE, "pulooK")) + vm_page_lock_queues(); + vm_page_unhold(pdppg); + if (pdppg->hold_count == 0) + _pmap_unwire_pte_hold(pmap, va, pdppg); } + /* + * Do an invltlb to make the invalidated mapping + * take effect immediately. + */ + pmap_invalidate_page(pmap, pteva); /* * If the page is finally unwired, simply free it. @@ -936,11 +1159,11 @@ } static PMAP_INLINE int -pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) +pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) { vm_page_unhold(m); if (m->hold_count == 0) - return _pmap_unwire_pte_hold(pmap, m); + return _pmap_unwire_pte_hold(pmap, va, m); else return 0; } @@ -952,23 +1175,24 @@ static int pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) { - unsigned ptepindex; + unsigned long ptepindex; + if (va >= VM_MAXUSER_ADDRESS) return 0; if (mpte == NULL) { - ptepindex = (va >> PDRSHIFT); + ptepindex = pmap_pde_pindex(va); if (pmap->pm_pteobj->root && - (pmap->pm_pteobj->root->pindex == ptepindex)) { + pmap->pm_pteobj->root->pindex == ptepindex) { mpte = pmap->pm_pteobj->root; } else { while ((mpte = vm_page_lookup(pmap->pm_pteobj, ptepindex)) != NULL && - vm_page_sleep_if_busy(mpte, FALSE, "pulook")) + vm_page_sleep_if_busy(mpte, FALSE, "pulook")) vm_page_lock_queues(); } } - return pmap_unwire_pte_hold(pmap, mpte); + return pmap_unwire_pte_hold(pmap, va, mpte); } void @@ -976,9 +1200,7 @@ struct pmap *pmap; { - pmap->pm_pdir = (pd_entry_t *)(KERNBASE + IdlePTD); - pmap->pm_pdp = (pdp_entry_t *)(KERNBASE + IdlePDP); - pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + IdlePML4); + pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + KPML4phys); pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -995,90 +1217,39 @@ pmap_pinit(pmap) register struct pmap *pmap; { - vm_page_t ptdpg[NPGPTD]; - vm_page_t pdppg; vm_page_t pml4pg; - vm_paddr_t pa; - int i; - - /* - * No need to allocate page table space yet but we do need a valid - * page directory table. - */ - if (pmap->pm_pdir == NULL) { - pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map, - NBPTD); - pmap->pm_pdp = (pdp_entry_t *)kmem_alloc_pageable(kernel_map, - PAGE_SIZE); - pmap->pm_pml4 = (pml4_entry_t *)kmem_alloc_pageable(kernel_map, - PAGE_SIZE); - } /* * allocate object for the ptes */ if (pmap->pm_pteobj == NULL) - pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + - NPGPTD + 2); + pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, NUPDE + NUPDPE + NUPML4E + 1); /* - * allocate the page directory page(s) + * allocate the page directory page */ - for (i = 0; i < NPGPTD; i++) { - ptdpg[i] = vm_page_grab(pmap->pm_pteobj, PTDPTDI + i, - VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | - VM_ALLOC_ZERO); - vm_page_lock_queues(); - vm_page_flag_clear(ptdpg[i], PG_BUSY); - ptdpg[i]->valid = VM_PAGE_BITS_ALL; - vm_page_unlock_queues(); - } - - pml4pg = vm_page_grab(pmap->pm_pteobj, PTDPTDI + NPGPTD, + pml4pg = vm_page_grab(pmap->pm_pteobj, NUPDE + NUPDPE + NUPML4E, VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO); vm_page_lock_queues(); vm_page_flag_clear(pml4pg, PG_BUSY); pml4pg->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); - pdppg = vm_page_grab(pmap->pm_pteobj, PTDPTDI + NPGPTD + 1, - VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO); - vm_page_lock_queues(); - vm_page_flag_clear(pdppg, PG_BUSY); - pdppg->valid = VM_PAGE_BITS_ALL; - vm_page_unlock_queues(); + pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg)); - pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); - pmap_qenter((vm_offset_t)pmap->pm_pdp, &pdppg, 1); - pmap_qenter((vm_offset_t)pmap->pm_pml4, &pml4pg, 1); - - for (i = 0; i < NPGPTD; i++) { - if ((ptdpg[i]->flags & PG_ZERO) == 0) - bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE); - } - if ((pdppg->flags & PG_ZERO) == 0) - bzero(pmap->pm_pdp, PAGE_SIZE); if ((pml4pg->flags & PG_ZERO) == 0) bzero(pmap->pm_pml4, PAGE_SIZE); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); + /* Wire in kernel global address entries. */ - bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); + pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U; + pmap->pm_pml4[DMPML4I] = DMPDPphys | PG_RW | PG_V | PG_U; >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200305230206.h4N26Pbn096709>