Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 17 Mar 2017 21:40:14 +0000 (UTC)
From:      Justin Hibbits <jhibbits@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r315464 - in head/sys/powerpc: booke conf include powerpc
Message-ID:  <201703172140.v2HLeEhw044986@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhibbits
Date: Fri Mar 17 21:40:14 2017
New Revision: 315464
URL: https://svnweb.freebsd.org/changeset/base/315464

Log:
  Introduce 64-bit PowerPC Book-E support
  
  Extend the Book-E pmap to support 64-bit operation.  Much of this was taken from
  Juniper's Junos FreeBSD port.  It uses a 3-level page table (page directory
  list -- PP2D, page directory, page table), but has gaps in the page directory
  list where regions will repeat, due to the design of the PP2D hash (a 20-bit gap
  between the two parts of the index).  In practice this may not be a problem
  given the expanded address space.  However, an alternative to this would be to
  use a 4-level page table, like Linux, and possibly reduce the available address
  space; Linux appears to use a 46-bit address space.  Alternatively, a cache of
  page directory pointers could be used to keep the overall design as-is, but
  remove the gaps in the address space.
  
  This includes a new kernel config for 64-bit QorIQ SoCs, based on MPC85XX, with
  the following notes:
  * The DPAA driver has not yet been ported to 64-bit so is not included in the
    kernel config.
  * This has been tested on the AmigaOne X5000, using a MD_ROOT compiled in
    (total size kernel+mdroot must be under 64MB).
  * This can run both 32-bit and 64-bit processes, and has even been tested to run
    a 32-bit init with 64-bit children.
  
  Many thanks to stevek and marcel for getting Juniper's FreeBSD patches open
  sourced to be used here, and to stevek for reviewing, and providing some
  historical contexts on quirks of the code.
  
  Reviewed by:	stevek
  Obtained from:	Juniper (in part)
  MFC after:	2 months
  Relnotes:	yes
  Differential Revision:	https://reviews.freebsd.org/D9433

Added:
  head/sys/powerpc/conf/QORIQ64   (contents, props changed)
Modified:
  head/sys/powerpc/booke/booke_machdep.c
  head/sys/powerpc/booke/locore.S
  head/sys/powerpc/booke/mp_cpudep.c
  head/sys/powerpc/booke/pmap.c
  head/sys/powerpc/booke/trap_subr.S
  head/sys/powerpc/include/asm.h
  head/sys/powerpc/include/pcpu.h
  head/sys/powerpc/include/pmap.h
  head/sys/powerpc/include/psl.h
  head/sys/powerpc/include/pte.h
  head/sys/powerpc/include/spr.h
  head/sys/powerpc/include/tlb.h
  head/sys/powerpc/include/vmparam.h
  head/sys/powerpc/powerpc/db_interface.c
  head/sys/powerpc/powerpc/exec_machdep.c
  head/sys/powerpc/powerpc/genassym.c

Modified: head/sys/powerpc/booke/booke_machdep.c
==============================================================================
--- head/sys/powerpc/booke/booke_machdep.c	Fri Mar 17 21:24:09 2017	(r315463)
+++ head/sys/powerpc/booke/booke_machdep.c	Fri Mar 17 21:40:14 2017	(r315464)
@@ -216,7 +216,7 @@ void
 ivor_setup(void)
 {
 
-	mtspr(SPR_IVPR, ((uintptr_t)&interrupt_vector_base) & 0xffff0000);
+	mtspr(SPR_IVPR, ((uintptr_t)&interrupt_vector_base) & ~0xffffUL);
 
 	SET_TRAP(SPR_IVOR0, int_critical_input);
 	SET_TRAP(SPR_IVOR1, int_machine_check);
@@ -250,6 +250,11 @@ ivor_setup(void)
 		SET_TRAP(SPR_IVOR32, int_vec);
 		break;
 	}
+
+#ifdef __powerpc64__
+	/* Set 64-bit interrupt mode. */
+	mtspr(SPR_EPCR, mfspr(SPR_EPCR) | EPCR_ICM);
+#endif
 }
 
 static int
@@ -353,7 +358,7 @@ booke_init(u_long arg1, u_long arg2)
 }
 
 #define RES_GRANULE 32
-extern uint32_t tlb0_miss_locks[];
+extern uintptr_t tlb0_miss_locks[];
 
 /* Initialise a struct pcpu. */
 void
@@ -363,8 +368,8 @@ cpu_pcpu_init(struct pcpu *pcpu, int cpu
 	pcpu->pc_tid_next = TID_MIN;
 
 #ifdef SMP
-	uint32_t *ptr;
-	int words_per_gran = RES_GRANULE / sizeof(uint32_t);
+	uintptr_t *ptr;
+	int words_per_gran = RES_GRANULE / sizeof(uintptr_t);
 
 	ptr = &tlb0_miss_locks[cpuid * words_per_gran];
 	pcpu->pc_booke_tlb_lock = ptr;

Modified: head/sys/powerpc/booke/locore.S
==============================================================================
--- head/sys/powerpc/booke/locore.S	Fri Mar 17 21:24:09 2017	(r315463)
+++ head/sys/powerpc/booke/locore.S	Fri Mar 17 21:40:14 2017	(r315464)
@@ -41,6 +41,39 @@
 
 #define TMPSTACKSZ	16384
 
+#ifdef __powerpc64__
+#define GET_TOCBASE(r)  \
+	mfspr	r, SPR_SPRG8
+#define	TOC_RESTORE	nop
+#define	CMPI	cmpdi
+#define	CMPL	cmpld
+#define	LOAD	ld
+#define	LOADX	ldarx
+#define	STORE	std
+#define	STOREX	stdcx.
+#define	STU	stdu
+#define	CALLSIZE	48
+#define	REDZONE		288
+#define	THREAD_REG	%r13
+#define	ADDR(x)	\
+	.llong	x
+#else
+#define	GET_TOCBASE(r)
+#define	TOC_RESTORE
+#define	CMPI	cmpwi
+#define	CMPL	cmplw
+#define	LOAD	lwz
+#define	LOADX	lwarx
+#define	STOREX	stwcx.
+#define	STORE	stw
+#define	STU	stwu
+#define	CALLSIZE	8
+#define	REDZONE		0
+#define	THREAD_REG	%r2
+#define	ADDR(x)	\
+	.long	x
+#endif
+
 	.text
 	.globl	btext
 btext:
@@ -101,6 +134,9 @@ __start:
  * Initial cleanup
  */
 	li	%r3, PSL_DE	/* Keep debug exceptions for CodeWarrior. */
+#ifdef __powerpc64__
+	oris	%r3, %r3, PSL_CM@h
+#endif
 	mtmsr	%r3
 	isync
 
@@ -200,11 +236,8 @@ __start:
 	mtspr	SPR_MAS1, %r3		/* note TS was not filled, so it's TS=0 */
 	isync
 
-	lis	%r3, KERNBASE@h
-	ori	%r3, %r3, KERNBASE@l	/* EPN = KERNBASE */
-#ifdef SMP
+	LOAD_ADDR(%r3, KERNBASE)
 	ori	%r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */
-#endif
 	mtspr	SPR_MAS2, %r3
 	isync
 
@@ -224,11 +257,19 @@ __start:
 	/* Switch to the above TLB1[1] mapping */
 	bl	4f
 4:	mflr	%r4
-	rlwinm	%r4, %r4, 0, 8, 31	/* Current offset from kernel load address */
+#ifdef __powerpc64__
+	clrldi	%r4, %r4, 38
+	clrrdi	%r3, %r3, 12
+#else
+	rlwinm	%r4, %r4, 0, 6, 31	/* Current offset from kernel load address */
 	rlwinm	%r3, %r3, 0, 0, 19
+#endif
 	add	%r4, %r4, %r3		/* Convert to kernel virtual address */
 	addi	%r4, %r4, (5f - 4b)
 	li	%r3, PSL_DE		/* Note AS=0 */
+#ifdef __powerpc64__
+	oris	%r3, %r3, PSL_CM@h
+#endif
 	mtspr   SPR_SRR0, %r4
 	mtspr   SPR_SRR1, %r3
 	rfi
@@ -242,6 +283,33 @@ __start:
 
 done_mapping:
 
+#ifdef __powerpc64__
+	/* Set up the TOC pointer */
+	b	0f
+	.align 3
+0:	nop
+	bl	1f
+	.llong	__tocbase + 0x8000 - .
+1:	mflr	%r2
+	ld	%r1,0(%r2)
+	add	%r2,%r1,%r2
+	mtspr	SPR_SPRG8, %r2
+
+	/* Get load offset */
+	ld	%r31,-0x8000(%r2) /* First TOC entry is TOC base */
+	subf    %r31,%r31,%r2	/* Subtract from real TOC base to get base */
+
+	/* Set up the stack pointer */
+	ld	%r1,TOC_REF(tmpstack)(%r2)
+	addi	%r1,%r1,TMPSTACKSZ-96
+	add	%r1,%r1,%r31
+	bl	1f
+	.llong _DYNAMIC-.
+1:	mflr	%r3
+	ld	%r4,0(%r3)
+	add	%r3,%r4,%r3
+	mr	%r4,%r31
+#else
 /*
  * Setup a temporary stack
  */
@@ -265,12 +333,15 @@ done_mapping:
 	add	%r4,%r4,%r5
 	lwz	%r4,4(%r4)	/* got[0] is _DYNAMIC link addr */
 	subf	%r4,%r4,%r3	/* subtract to calculate relocbase */
-	bl	elf_reloc_self
+#endif
+	bl	CNAME(elf_reloc_self)
+	TOC_RESTORE
 
 /*
  * Initialise exception vector offsets
  */
-	bl	ivor_setup
+	bl	CNAME(ivor_setup)
+	TOC_RESTORE
 
 /*
  * Set up arguments and jump to system initialization code
@@ -279,15 +350,17 @@ done_mapping:
 	mr	%r4, %r31
 
 	/* Prepare core */
-	bl	booke_init
+	bl	CNAME(booke_init)
+	TOC_RESTORE
 
 	/* Switch to thread0.td_kstack now */
 	mr	%r1, %r3
 	li	%r3, 0
-	stw	%r3, 0(%r1)
+	STORE	%r3, 0(%r1)
 
 	/* Machine independet part, does not return */
-	bl	mi_startup
+	bl	CNAME(mi_startup)
+	TOC_RESTORE
 	/* NOT REACHED */
 5:	b	5b
 
@@ -364,6 +437,9 @@ bp_kernload:
 
 	mfmsr	%r3
 	ori	%r3, %r3, (PSL_IS | PSL_DS)
+#ifdef __powerpc64__
+	oris	%r3, %r3, PSL_CM@h
+#endif
 	bl	3f
 3:	mflr	%r4
 	addi	%r4, %r4, (4f - 3b)
@@ -393,20 +469,31 @@ bp_kernload:
 	mtspr	SPR_MAS1, %r3		/* note TS was not filled, so it's TS=0 */
 	isync
 
-	lis	%r3, KERNBASE@h
-	ori	%r3, %r3, KERNBASE@l	/* EPN = KERNBASE */
+	LOAD_ADDR(%r3, KERNBASE)
 	ori	%r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */
 	mtspr	SPR_MAS2, %r3
 	isync
 
 	/* Retrieve kernel load [physical] address from bp_kernload */
-	bl	5f
-	.long	bp_kernload
-	.long	__boot_page
+#ifdef __powerpc64__
+	b	0f
+	.align	3
+0:
+	nop
+#endif
+	bl 5f
+	ADDR(bp_kernload)
+	ADDR(__boot_page)
 5:	mflr	%r3
+#ifdef __powerpc64__
+	ld	%r4, 0(%r3)
+	ld	%r5, 8(%r3)
+	clrrdi	%r3, %r3, 12
+#else
 	lwz	%r4, 0(%r3)
 	lwz	%r5, 4(%r3)
 	rlwinm	%r3, %r3, 0, 0, 19
+#endif
 	sub	%r4, %r4, %r5	/* offset of bp_kernload within __boot_page */
 	lwzx	%r3, %r4, %r3
 
@@ -426,7 +513,11 @@ bp_kernload:
 	rlwinm	%r3, %r3, 0, 0xfff	/* Offset from boot page start */
 	add	%r3, %r3, %r5		/* Make this virtual address */
 	addi	%r3, %r3, (7f - 6b)
+#ifdef __powerpc64__
+	lis	%r4, PSL_CM@h		/* Note AS=0 */
+#else
 	li	%r4, 0			/* Note AS=0 */
+#endif
 	mtspr	SPR_SRR0, %r3
 	mtspr	SPR_SRR1, %r4
 	rfi
@@ -444,6 +535,27 @@ bp_kernload:
 	mr	%r3, %r28
 	bl	tlb1_inval_entry
 
+#ifdef __powerpc64__
+	/* Set up the TOC pointer */
+	b	0f
+	.align 3
+0:	nop
+	bl	1f
+	.llong	__tocbase + 0x8000 - .
+1:	mflr	%r2
+	ld	%r1,0(%r2)
+	add	%r2,%r1,%r2
+	mtspr	SPR_SPRG8, %r2
+
+	/* Get load offset */
+	ld	%r31,-0x8000(%r2) /* First TOC entry is TOC base */
+	subf    %r31,%r31,%r2	/* Subtract from real TOC base to get base */
+
+	/* Set up the stack pointer */
+	ld	%r1,TOC_REF(tmpstack)(%r2)
+	addi	%r1,%r1,TMPSTACKSZ-96
+	add	%r1,%r1,%r31
+#else
 /*
  * Setup a temporary stack
  */
@@ -454,11 +566,13 @@ bp_kernload:
 	add	%r1,%r1,%r2
 	stw	%r1, 0(%r1)
 	addi	%r1, %r1, (TMPSTACKSZ - 16)
+#endif
 
 /*
  * Initialise exception vector offsets
  */
-	bl	ivor_setup
+	bl	CNAME(ivor_setup)
+	TOC_RESTORE
 
 	/*
 	 * Assign our pcpu instance
@@ -468,16 +582,19 @@ bp_kernload:
 1:	mflr	%r4
 	lwz	%r3, 0(%r4)
 	add	%r3, %r3, %r4
-	lwz	%r3, 0(%r3)
+	LOAD	%r3, 0(%r3)
 	mtsprg0	%r3
 
-	bl	pmap_bootstrap_ap
+	bl	CNAME(pmap_bootstrap_ap)
+	TOC_RESTORE
 
-	bl	cpudep_ap_bootstrap
+	bl	CNAME(cpudep_ap_bootstrap)
+	TOC_RESTORE
 	/* Switch to the idle thread's kstack */
 	mr	%r1, %r3
 	
-	bl	machdep_ap_bootstrap
+	bl	CNAME(machdep_ap_bootstrap)
+	TOC_RESTORE
 
 	/* NOT REACHED */
 6:	b	6b
@@ -594,7 +711,6 @@ tlb1_temp_mapping_as1:
  * r3-r5	scratched
  */
 tlb1_inval_all_but_current:
-	mr	%r6, %r3
 	mfspr	%r3, SPR_TLB1CFG	/* Get number of entries */
 	andi.	%r3, %r3, TLBCFG_NENTRY_MASK@l
 	li	%r4, 0			/* Start from Entry 0 */
@@ -864,14 +980,18 @@ ENTRY(get_spr)
 	.data
 	.align 3
 GLOBAL(__startkernel)
-	.long   begin
+	ADDR(begin)
 GLOBAL(__endkernel)
-	.long   end
+	ADDR(end)
 	.align	4
 tmpstack:
 	.space	TMPSTACKSZ
 tmpstackbound:
 	.space 10240	/* XXX: this really should not be necessary */
+#ifdef __powerpc64__
+TOC_ENTRY(tmpstack)
+TOC_ENTRY(bp_kernload)
+#endif
 
 /*
  * Compiled KERNBASE locations

Modified: head/sys/powerpc/booke/mp_cpudep.c
==============================================================================
--- head/sys/powerpc/booke/mp_cpudep.c	Fri Mar 17 21:24:09 2017	(r315463)
+++ head/sys/powerpc/booke/mp_cpudep.c	Fri Mar 17 21:40:14 2017	(r315464)
@@ -50,7 +50,8 @@ volatile void *ap_pcpu;
 uintptr_t
 cpudep_ap_bootstrap()
 {
-	uint32_t msr, sp, csr;
+	uint32_t msr, csr;
+	uintptr_t sp;
 
 	/* Enable L1 caches */
 	csr = mfspr(SPR_L1CSR0);
@@ -66,7 +67,11 @@ cpudep_ap_bootstrap()
 	}
 
 	/* Set MSR */
+#ifdef __powerpc64__
+	msr = PSL_CM | PSL_ME;
+#else
 	msr = PSL_ME;
+#endif
 	mtmsr(msr);
 
 	/* Assign pcpu fields, return ptr to this AP's idle thread kstack */

Modified: head/sys/powerpc/booke/pmap.c
==============================================================================
--- head/sys/powerpc/booke/pmap.c	Fri Mar 17 21:24:09 2017	(r315463)
+++ head/sys/powerpc/booke/pmap.c	Fri Mar 17 21:40:14 2017	(r315464)
@@ -34,18 +34,42 @@
   * Kernel and user threads run within one common virtual address space
   * defined by AS=0.
   *
+  * 32-bit pmap:
   * Virtual address space layout:
   * -----------------------------
-  * 0x0000_0000 - 0xafff_ffff	: user process
-  * 0xb000_0000 - 0xbfff_ffff	: pmap_mapdev()-ed area (PCI/PCIE etc.)
+  * 0x0000_0000 - 0x7fff_ffff	: user process
+  * 0x8000_0000 - 0xbfff_ffff	: pmap_mapdev()-ed area (PCI/PCIE etc.)
   * 0xc000_0000 - 0xc0ff_ffff	: kernel reserved
   *   0xc000_0000 - data_end	: kernel code+data, env, metadata etc.
-  * 0xc100_0000 - 0xfeef_ffff	: KVA
+  * 0xc100_0000 - 0xffff_ffff	: KVA
   *   0xc100_0000 - 0xc100_3fff : reserved for page zero/copy
   *   0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs
   *   0xc200_4000 - 0xc200_8fff : guard page + kstack0
   *   0xc200_9000 - 0xfeef_ffff	: actual free KVA space
-  * 0xfef0_0000 - 0xffff_ffff	: I/O devices region
+  *
+  * 64-bit pmap:
+  * Virtual address space layout:
+  * -----------------------------
+  * 0x0000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff      : user process
+  *   0x0000_0000_0000_0000 - 0x8fff_ffff_ffff_ffff    : text, data, heap, maps, libraries
+  *   0x9000_0000_0000_0000 - 0xafff_ffff_ffff_ffff    : mmio region
+  *   0xb000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff    : stack
+  * 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff      : kernel reserved
+  *   0xc000_0000_0000_0000 - endkernel-1              : kernel code & data
+  *               endkernel - msgbufp-1                : flat device tree
+  *                 msgbufp - ptbl_bufs-1              : message buffer
+  *               ptbl_bufs - kernel_pdir-1            : kernel page tables
+  *             kernel_pdir - kernel_pp2d-1            : kernel page directory
+  *             kernel_pp2d - .                        : kernel pointers to page directory
+  *      pmap_zero_copy_min - crashdumpmap-1           : reserved for page zero/copy
+  *            crashdumpmap - ptbl_buf_pool_vabase-1   : reserved for ptbl bufs
+  *    ptbl_buf_pool_vabase - virtual_avail-1          : user page directories and page tables
+  *           virtual_avail - 0xcfff_ffff_ffff_ffff    : actual free KVA space
+  * 0xd000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff      : coprocessor region
+  * 0xe000_0000_0000_0000 - 0xefff_ffff_ffff_ffff      : mmio region
+  * 0xf000_0000_0000_0000 - 0xffff_ffff_ffff_ffff      : direct map
+  *   0xf000_0000_0000_0000 - +Maxmem                  : physmem map
+  *                         - 0xffff_ffff_ffff_ffff    : device direct map
   */
 
 #include <sys/cdefs.h>
@@ -83,6 +107,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_pager.h>
 #include <vm/uma.h>
 
+#include <machine/_inttypes.h>
 #include <machine/cpu.h>
 #include <machine/pcb.h>
 #include <machine/platform.h>
@@ -103,6 +128,12 @@ __FBSDID("$FreeBSD$");
 #define debugf(fmt, args...)
 #endif
 
+#ifdef __powerpc64__
+#define	PRI0ptrX	"016lx"
+#else
+#define	PRI0ptrX	"08x"
+#endif
+
 #define TODO			panic("%s: not implemented", __func__);
 
 extern unsigned char _etext[];
@@ -144,6 +175,9 @@ static int mmu_booke_enter_locked(mmu_t,
 
 unsigned int kptbl_min;		/* Index of the first kernel ptbl. */
 unsigned int kernel_ptbls;	/* Number of KVA ptbls. */
+#ifdef __powerpc64__
+unsigned int kernel_pdirs;
+#endif
 
 /*
  * If user pmap is processed with mmu_booke_remove and the resident count
@@ -152,7 +186,9 @@ unsigned int kernel_ptbls;	/* Number of 
 #define PMAP_REMOVE_DONE(pmap) \
 	((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0)
 
+#if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__)
 extern int elf32_nxstack;
+#endif
 
 /**************************************************************************/
 /* TLB and TID handling */
@@ -175,14 +211,17 @@ uint32_t tlb1_entries;
 #define TLB0_ENTRIES_PER_WAY	(tlb0_entries_per_way)
 
 #define TLB1_ENTRIES (tlb1_entries)
-#define TLB1_MAXENTRIES	64
 
 static vm_offset_t tlb1_map_base = VM_MAXUSER_ADDRESS + PAGE_SIZE;
 
 static tlbtid_t tid_alloc(struct pmap *);
 static void tid_flush(tlbtid_t tid);
 
+#ifdef __powerpc64__
+static void tlb_print_entry(int, uint32_t, uint64_t, uint32_t, uint32_t);
+#else
 static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t);
+#endif
 
 static void tlb1_read_entry(tlb_entry_t *, unsigned int);
 static void tlb1_write_entry(tlb_entry_t *, unsigned int);
@@ -219,17 +258,24 @@ static struct ptbl_buf *ptbl_buf_alloc(v
 static void ptbl_buf_free(struct ptbl_buf *);
 static void ptbl_free_pmap_ptbl(pmap_t, pte_t *);
 
+#ifdef __powerpc64__
+static pte_t *ptbl_alloc(mmu_t, pmap_t, pte_t **,
+			 unsigned int, boolean_t);
+static void ptbl_free(mmu_t, pmap_t, pte_t **, unsigned int);
+static void ptbl_hold(mmu_t, pmap_t, pte_t **, unsigned int);
+static int ptbl_unhold(mmu_t, pmap_t, vm_offset_t);
+#else
 static pte_t *ptbl_alloc(mmu_t, pmap_t, unsigned int, boolean_t);
 static void ptbl_free(mmu_t, pmap_t, unsigned int);
 static void ptbl_hold(mmu_t, pmap_t, unsigned int);
 static int ptbl_unhold(mmu_t, pmap_t, unsigned int);
+#endif
 
 static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t);
-static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t);
 static int pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t);
 static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t);
-static void kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr,
-			     vm_offset_t pdir);
+static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t);
+static void kernel_pte_alloc(vm_offset_t, vm_offset_t, vm_offset_t);
 
 static pv_entry_t pv_alloc(void);
 static void pv_free(pv_entry_t);
@@ -239,7 +285,11 @@ static void pv_remove(pmap_t, vm_offset_
 static void booke_pmap_init_qpages(void);
 
 /* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */
+#ifdef __powerpc64__
+#define PTBL_BUFS               (16UL * 16 * 16)
+#else
 #define PTBL_BUFS		(128 * 16)
+#endif
 
 struct ptbl_buf {
 	TAILQ_ENTRY(ptbl_buf) link;	/* list link */
@@ -503,6 +553,364 @@ tlb1_get_tlbconf(void)
 /* Page table related */
 /**************************************************************************/
 
+#ifdef __powerpc64__
+/* Initialize pool of kva ptbl buffers. */
+static void
+ptbl_init(void)
+{
+	int		i;
+
+	mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF);
+	TAILQ_INIT(&ptbl_buf_freelist);
+
+	for (i = 0; i < PTBL_BUFS; i++) {
+		ptbl_bufs[i].kva = ptbl_buf_pool_vabase +
+		    i * MAX(PTBL_PAGES,PDIR_PAGES) * PAGE_SIZE;
+		TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link);
+	}
+}
+
+/* Get an sf_buf from the freelist. */
+static struct ptbl_buf *
+ptbl_buf_alloc(void)
+{
+	struct ptbl_buf *buf;
+
+	mtx_lock(&ptbl_buf_freelist_lock);
+	buf = TAILQ_FIRST(&ptbl_buf_freelist);
+	if (buf != NULL)
+		TAILQ_REMOVE(&ptbl_buf_freelist, buf, link);
+	mtx_unlock(&ptbl_buf_freelist_lock);
+
+	return (buf);
+}
+
+/* Return ptbl buff to free pool. */
+static void
+ptbl_buf_free(struct ptbl_buf *buf)
+{
+	mtx_lock(&ptbl_buf_freelist_lock);
+	TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link);
+	mtx_unlock(&ptbl_buf_freelist_lock);
+}
+
+/*
+ * Search the list of allocated ptbl bufs and find on list of allocated ptbls
+ */
+static void
+ptbl_free_pmap_ptbl(pmap_t pmap, pte_t * ptbl)
+{
+	struct ptbl_buf *pbuf;
+
+	TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) {
+		if (pbuf->kva == (vm_offset_t) ptbl) {
+			/* Remove from pmap ptbl buf list. */
+			TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link);
+
+			/* Free corresponding ptbl buf. */
+			ptbl_buf_free(pbuf);
+
+			break;
+		}
+	}
+}
+
+/* Get a pointer to a PTE in a page table. */
+static __inline pte_t *
+pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va)
+{
+	pte_t         **pdir;
+	pte_t          *ptbl;
+
+	KASSERT((pmap != NULL), ("pte_find: invalid pmap"));
+
+	pdir = pmap->pm_pp2d[PP2D_IDX(va)];
+	if (!pdir)
+		return NULL;
+	ptbl = pdir[PDIR_IDX(va)];
+	return ((ptbl != NULL) ? &ptbl[PTBL_IDX(va)] : NULL);
+}
+
+/*
+ * Search the list of allocated pdir bufs and find on list of allocated pdirs
+ */
+static void
+ptbl_free_pmap_pdir(mmu_t mmu, pmap_t pmap, pte_t ** pdir)
+{
+	struct ptbl_buf *pbuf;
+
+	TAILQ_FOREACH(pbuf, &pmap->pm_pdir_list, link) {
+		if (pbuf->kva == (vm_offset_t) pdir) {
+			/* Remove from pmap ptbl buf list. */
+			TAILQ_REMOVE(&pmap->pm_pdir_list, pbuf, link);
+
+			/* Free corresponding pdir buf. */
+			ptbl_buf_free(pbuf);
+
+			break;
+		}
+	}
+}
+/* Free pdir pages and invalidate pdir entry. */
+static void
+pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx)
+{
+	pte_t         **pdir;
+	vm_paddr_t	pa;
+	vm_offset_t	va;
+	vm_page_t	m;
+	int		i;
+
+	pdir = pmap->pm_pp2d[pp2d_idx];
+
+	KASSERT((pdir != NULL), ("pdir_free: null pdir"));
+
+	pmap->pm_pp2d[pp2d_idx] = NULL;
+
+	for (i = 0; i < PDIR_PAGES; i++) {
+		va = ((vm_offset_t) pdir + (i * PAGE_SIZE));
+		pa = pte_vatopa(mmu, kernel_pmap, va);
+		m = PHYS_TO_VM_PAGE(pa);
+		vm_page_free_zero(m);
+		atomic_subtract_int(&vm_cnt.v_wire_count, 1);
+		pmap_kremove(va);
+	}
+
+	ptbl_free_pmap_pdir(mmu, pmap, pdir);
+}
+
+/*
+ * Decrement pdir pages hold count and attempt to free pdir pages. Called
+ * when removing directory entry from pdir.
+ * 
+ * Return 1 if pdir pages were freed.
+ */
+static int
+pdir_unhold(mmu_t mmu, pmap_t pmap, u_int pp2d_idx)
+{
+	pte_t         **pdir;
+	vm_paddr_t	pa;
+	vm_page_t	m;
+	int		i;
+
+	KASSERT((pmap != kernel_pmap),
+		("pdir_unhold: unholding kernel pdir!"));
+
+	pdir = pmap->pm_pp2d[pp2d_idx];
+
+	KASSERT(((vm_offset_t) pdir >= VM_MIN_KERNEL_ADDRESS),
+	    ("pdir_unhold: non kva pdir"));
+
+	/* decrement hold count */
+	for (i = 0; i < PDIR_PAGES; i++) {
+		pa = pte_vatopa(mmu, kernel_pmap,
+		    (vm_offset_t) pdir + (i * PAGE_SIZE));
+		m = PHYS_TO_VM_PAGE(pa);
+		m->wire_count--;
+	}
+
+	/*
+	 * Free pdir pages if there are no dir entries in this pdir.
+	 * wire_count has the same value for all ptbl pages, so check the
+	 * last page.
+	 */
+	if (m->wire_count == 0) {
+		pdir_free(mmu, pmap, pp2d_idx);
+		return (1);
+	}
+	return (0);
+}
+
+/*
+ * Increment hold count for pdir pages. This routine is used when new ptlb
+ * entry is being inserted into pdir.
+ */
+static void
+pdir_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir)
+{
+	vm_paddr_t	pa;
+	vm_page_t	m;
+	int		i;
+
+	KASSERT((pmap != kernel_pmap),
+		("pdir_hold: holding kernel pdir!"));
+
+	KASSERT((pdir != NULL), ("pdir_hold: null pdir"));
+
+	for (i = 0; i < PDIR_PAGES; i++) {
+		pa = pte_vatopa(mmu, kernel_pmap,
+				(vm_offset_t) pdir + (i * PAGE_SIZE));
+		m = PHYS_TO_VM_PAGE(pa);
+		m->wire_count++;
+	}
+}
+
+/* Allocate page table. */
+static pte_t   *
+ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx,
+    boolean_t nosleep)
+{
+	vm_page_t	mtbl  [PTBL_PAGES];
+	vm_page_t	m;
+	struct ptbl_buf *pbuf;
+	unsigned int	pidx;
+	pte_t          *ptbl;
+	int		i, j;
+	int		req;
+
+	KASSERT((pdir[pdir_idx] == NULL),
+		("%s: valid ptbl entry exists!", __func__));
+
+	pbuf = ptbl_buf_alloc();
+	if (pbuf == NULL)
+		panic("%s: couldn't alloc kernel virtual memory", __func__);
+
+	ptbl = (pte_t *) pbuf->kva;
+
+	for (i = 0; i < PTBL_PAGES; i++) {
+		pidx = (PTBL_PAGES * pdir_idx) + i;
+		req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
+		while ((m = vm_page_alloc(NULL, pidx, req)) == NULL) {
+			PMAP_UNLOCK(pmap);
+			rw_wunlock(&pvh_global_lock);
+			if (nosleep) {
+				ptbl_free_pmap_ptbl(pmap, ptbl);
+				for (j = 0; j < i; j++)
+					vm_page_free(mtbl[j]);
+				atomic_subtract_int(&vm_cnt.v_wire_count, i);
+				return (NULL);
+			}
+			VM_WAIT;
+			rw_wlock(&pvh_global_lock);
+			PMAP_LOCK(pmap);
+		}
+		mtbl[i] = m;
+	}
+
+	/* Mapin allocated pages into kernel_pmap. */
+	mmu_booke_qenter(mmu, (vm_offset_t) ptbl, mtbl, PTBL_PAGES);
+	/* Zero whole ptbl. */
+	bzero((caddr_t) ptbl, PTBL_PAGES * PAGE_SIZE);
+
+	/* Add pbuf to the pmap ptbl bufs list. */
+	TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link);
+
+	return (ptbl);
+}
+
+/* Free ptbl pages and invalidate pdir entry. */
+static void
+ptbl_free(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx)
+{
+	pte_t          *ptbl;
+	vm_paddr_t	pa;
+	vm_offset_t	va;
+	vm_page_t	m;
+	int		i;
+
+	ptbl = pdir[pdir_idx];
+
+	KASSERT((ptbl != NULL), ("ptbl_free: null ptbl"));
+
+	pdir[pdir_idx] = NULL;
+
+	for (i = 0; i < PTBL_PAGES; i++) {
+		va = ((vm_offset_t) ptbl + (i * PAGE_SIZE));
+		pa = pte_vatopa(mmu, kernel_pmap, va);
+		m = PHYS_TO_VM_PAGE(pa);
+		vm_page_free_zero(m);
+		atomic_subtract_int(&vm_cnt.v_wire_count, 1);
+		pmap_kremove(va);
+	}
+
+	ptbl_free_pmap_ptbl(pmap, ptbl);
+}
+
+/*
+ * Decrement ptbl pages hold count and attempt to free ptbl pages. Called
+ * when removing pte entry from ptbl.
+ * 
+ * Return 1 if ptbl pages were freed.
+ */
+static int
+ptbl_unhold(mmu_t mmu, pmap_t pmap, vm_offset_t va)
+{
+	pte_t          *ptbl;
+	vm_paddr_t	pa;
+	vm_page_t	m;
+	u_int		pp2d_idx;
+	pte_t         **pdir;
+	u_int		pdir_idx;
+	int		i;
+
+	pp2d_idx = PP2D_IDX(va);
+	pdir_idx = PDIR_IDX(va);
+
+	KASSERT((pmap != kernel_pmap),
+		("ptbl_unhold: unholding kernel ptbl!"));
+
+	pdir = pmap->pm_pp2d[pp2d_idx];
+	ptbl = pdir[pdir_idx];
+
+	KASSERT(((vm_offset_t) ptbl >= VM_MIN_KERNEL_ADDRESS),
+	    ("ptbl_unhold: non kva ptbl"));
+
+	/* decrement hold count */
+	for (i = 0; i < PTBL_PAGES; i++) {
+		pa = pte_vatopa(mmu, kernel_pmap,
+		    (vm_offset_t) ptbl + (i * PAGE_SIZE));
+		m = PHYS_TO_VM_PAGE(pa);
+		m->wire_count--;
+	}
+
+	/*
+	 * Free ptbl pages if there are no pte entries in this ptbl.
+	 * wire_count has the same value for all ptbl pages, so check the
+	 * last page.
+	 */
+	if (m->wire_count == 0) {
+		/* A pair of indirect entries might point to this ptbl page */
+#if 0
+		tlb_flush_entry(pmap, va & ~((2UL * PAGE_SIZE_1M) - 1),
+				TLB_SIZE_1M, MAS6_SIND);
+		tlb_flush_entry(pmap, (va & ~((2UL * PAGE_SIZE_1M) - 1)) | PAGE_SIZE_1M,
+				TLB_SIZE_1M, MAS6_SIND);
+#endif
+		ptbl_free(mmu, pmap, pdir, pdir_idx);
+		pdir_unhold(mmu, pmap, pp2d_idx);
+		return (1);
+	}
+	return (0);
+}
+
+/*
+ * Increment hold count for ptbl pages. This routine is used when new pte
+ * entry is being inserted into ptbl.
+ */
+static void
+ptbl_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx)
+{
+	vm_paddr_t	pa;
+	pte_t          *ptbl;
+	vm_page_t	m;
+	int		i;
+
+	KASSERT((pmap != kernel_pmap),
+		("ptbl_hold: holding kernel ptbl!"));
+
+	ptbl = pdir[pdir_idx];
+
+	KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl"));
+
+	for (i = 0; i < PTBL_PAGES; i++) {
+		pa = pte_vatopa(mmu, kernel_pmap,
+				(vm_offset_t) ptbl + (i * PAGE_SIZE));
+		m = PHYS_TO_VM_PAGE(pa);
+		m->wire_count++;
+	}
+}
+#else
+
 /* Initialize pool of kva ptbl buffers. */
 static void
 ptbl_init(void)
@@ -518,7 +926,8 @@ ptbl_init(void)
 	TAILQ_INIT(&ptbl_buf_freelist);
 
 	for (i = 0; i < PTBL_BUFS; i++) {
-		ptbl_bufs[i].kva = ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE;
+		ptbl_bufs[i].kva =
+		    ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE;
 		TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link);
 	}
 }
@@ -602,7 +1011,6 @@ ptbl_alloc(mmu_t mmu, pmap_t pmap, unsig
 
 	CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl);
 
-	/* Allocate ptbl pages, this will sleep! */
 	for (i = 0; i < PTBL_PAGES; i++) {
 		pidx = (PTBL_PAGES * pdir_idx) + i;
 		while ((m = vm_page_alloc(NULL, pidx,
@@ -763,6 +1171,7 @@ ptbl_hold(mmu_t mmu, pmap_t pmap, unsign
 		m->wire_count++;
 	}
 }
+#endif
 
 /* Allocate pv_entry structure. */
 pv_entry_t
@@ -843,6 +1252,235 @@ pv_remove(pmap_t pmap, vm_offset_t va, v
 	//debugf("pv_remove: e\n");
 }
 
+#ifdef __powerpc64__
+/*
+ * Clean pte entry, try to free page table page if requested.
+ * 
+ * Return 1 if ptbl pages were freed, otherwise return 0.
+ */
+static int
+pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags)
+{
+	vm_page_t	m;
+	pte_t          *pte;
+
+	pte = pte_find(mmu, pmap, va);
+	KASSERT(pte != NULL, ("%s: NULL pte", __func__));
+
+	if (!PTE_ISVALID(pte))
+		return (0);
+
+	/* Get vm_page_t for mapped pte. */
+	m = PHYS_TO_VM_PAGE(PTE_PA(pte));
+
+	if (PTE_ISWIRED(pte))
+		pmap->pm_stats.wired_count--;
+
+	/* Handle managed entry. */
+	if (PTE_ISMANAGED(pte)) {
+
+		/* Handle modified pages. */
+		if (PTE_ISMODIFIED(pte))
+			vm_page_dirty(m);
+
+		/* Referenced pages. */
+		if (PTE_ISREFERENCED(pte))
+			vm_page_aflag_set(m, PGA_REFERENCED);
+
+		/* Remove pv_entry from pv_list. */
+		pv_remove(pmap, va, m);
+	}
+	mtx_lock_spin(&tlbivax_mutex);
+	tlb_miss_lock();
+
+	tlb0_flush_entry(va);
+	*pte = 0;
+
+	tlb_miss_unlock();
+	mtx_unlock_spin(&tlbivax_mutex);
+
+	pmap->pm_stats.resident_count--;
+
+	if (flags & PTBL_UNHOLD) {
+		return (ptbl_unhold(mmu, pmap, va));
+	}
+	return (0);
+}
+
+/*
+ * allocate a page of pointers to page directories, do not preallocate the
+ * page tables
+ */
+static pte_t  **
+pdir_alloc(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, bool nosleep)
+{
+	vm_page_t	mtbl  [PDIR_PAGES];
+	vm_page_t	m;
+	struct ptbl_buf *pbuf;
+	pte_t         **pdir;
+	unsigned int	pidx;
+	int		i;
+	int		req;
+
+	pbuf = ptbl_buf_alloc();
+
+	if (pbuf == NULL)
+		panic("%s: couldn't alloc kernel virtual memory", __func__);
+

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201703172140.v2HLeEhw044986>