Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 6 Mar 2018 14:28:38 +0000 (UTC)
From:      "Jonathan T. Looney" <jtl@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r330539 - in head/sys: amd64/amd64 amd64/include arm/include conf gdb i386/include mips/include powerpc/include sparc64/include
Message-ID:  <201803061428.w26EScwJ020926@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jtl
Date: Tue Mar  6 14:28:37 2018
New Revision: 330539
URL: https://svnweb.freebsd.org/changeset/base/330539

Log:
  amd64: Protect the kernel text, data, and BSS by setting the RW/NX bits
  correctly for the data contained on each memory page.
  
  There are several components to this change:
   * Add a variable to indicate the start of the R/W portion of the
     initial memory.
   * Stop detecting NX bit support for each AP.  Instead, use the value
     from the BSP and, if supported, activate the feature on the other
     APs just before loading the correct page table.  (Functionally, we
     already assume that the BSP and all APs had the same support or
     lack of support for the NX bit.)
   * Set the RW and NX bits correctly for the kernel text, data, and
     BSS (subject to some caveats below).
   * Ensure DDB can write to memory when necessary (such as to set a
     breakpoint).
   * Ensure GDB can write to memory when necessary (such as to set a
     breakpoint).  For this purpose, add new MD functions gdb_begin_write()
     and gdb_end_write() which the GDB support code can call before and
     after writing to memory.
  
  This change is not comprehensive:
   * It doesn't do anything to protect modules.
   * It doesn't do anything for kernel memory allocated after the kernel
     starts running.
   * In order to avoid excessive memory inefficiency, it may let multiple
     types of data share a 2M page, and assigns the most permissions
     needed for data on that page.
  
  Reviewed by:	jhb, kib
  Discussed with:	emaste
  MFC after:	2 weeks
  Sponsored by:	Netflix
  Differential Revision:	https://reviews.freebsd.org/D14282

Modified:
  head/sys/amd64/amd64/db_interface.c
  head/sys/amd64/amd64/gdb_machdep.c
  head/sys/amd64/amd64/initcpu.c
  head/sys/amd64/amd64/mpboot.S
  head/sys/amd64/amd64/pmap.c
  head/sys/amd64/include/cpu.h
  head/sys/amd64/include/gdb_machdep.h
  head/sys/arm/include/gdb_machdep.h
  head/sys/conf/ldscript.amd64
  head/sys/gdb/gdb_packet.c
  head/sys/i386/include/gdb_machdep.h
  head/sys/mips/include/gdb_machdep.h
  head/sys/powerpc/include/gdb_machdep.h
  head/sys/sparc64/include/gdb_machdep.h

Modified: head/sys/amd64/amd64/db_interface.c
==============================================================================
--- head/sys/amd64/amd64/db_interface.c	Tue Mar  6 14:18:45 2018	(r330538)
+++ head/sys/amd64/amd64/db_interface.c	Tue Mar  6 14:28:37 2018	(r330539)
@@ -36,6 +36,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/kdb.h>
 #include <sys/pcpu.h>
 
+#include <machine/cpufunc.h>
+#include <machine/specialreg.h>
+
 #include <ddb/ddb.h>
 
 /*
@@ -62,6 +65,9 @@ db_read_bytes(vm_offset_t addr, size_t size, char *dat
 
 /*
  * Write bytes to kernel address space for debugger.
+ * We need to disable write protection temporarily so we can write
+ * things (such as break points) that might be in write-protected
+ * memory.
  */
 int
 db_write_bytes(vm_offset_t addr, size_t size, char *data)
@@ -69,15 +75,19 @@ db_write_bytes(vm_offset_t addr, size_t size, char *da
 	jmp_buf jb;
 	void *prev_jb;
 	char *dst;
+	u_long cr0save;
 	int ret;
 
+	cr0save = rcr0();
 	prev_jb = kdb_jmpbuf(jb);
 	ret = setjmp(jb);
 	if (ret == 0) {
+		load_cr0(cr0save & ~CR0_WP);
 		dst = (char *)addr;
 		while (size-- > 0)
 			*dst++ = *data++;
 	}
+	load_cr0(cr0save);
 	(void)kdb_jmpbuf(prev_jb);
 	return (ret);
 }

Modified: head/sys/amd64/amd64/gdb_machdep.c
==============================================================================
--- head/sys/amd64/amd64/gdb_machdep.c	Tue Mar  6 14:18:45 2018	(r330538)
+++ head/sys/amd64/amd64/gdb_machdep.c	Tue Mar  6 14:28:37 2018	(r330539)
@@ -36,11 +36,13 @@ __FBSDID("$FreeBSD$");
 #include <sys/proc.h>
 #include <sys/signal.h>
 
+#include <machine/cpufunc.h>
 #include <machine/frame.h>
 #include <machine/gdb_machdep.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 #include <machine/reg.h>
+#include <machine/specialreg.h>
 #include <machine/trap.h>
 #include <machine/frame.h>
 #include <machine/endian.h>
@@ -121,3 +123,21 @@ gdb_cpu_signal(int type, int code)
 	}
 	return (SIGEMT);
 }
+
+void *
+gdb_begin_write(void)
+{
+	u_long cr0save;
+
+	cr0save = rcr0();
+	load_cr0(cr0save & ~CR0_WP);
+	return ((void *)cr0save);
+}
+
+void
+gdb_end_write(void *arg)
+{
+
+	load_cr0((u_long)arg);
+}
+

Modified: head/sys/amd64/amd64/initcpu.c
==============================================================================
--- head/sys/amd64/amd64/initcpu.c	Tue Mar  6 14:18:45 2018	(r330538)
+++ head/sys/amd64/amd64/initcpu.c	Tue Mar  6 14:28:37 2018	(r330539)
@@ -218,7 +218,7 @@ initializecpu(void)
 	if (!IS_BSP() && (cpu_stdext_feature & CPUID_STDEXT_SMEP))
 		cr4 |= CR4_SMEP;
 	load_cr4(cr4);
-	if ((amd_feature & AMDID_NX) != 0) {
+	if (IS_BSP() && (amd_feature & AMDID_NX) != 0) {
 		msr = rdmsr(MSR_EFER) | EFER_NXE;
 		wrmsr(MSR_EFER, msr);
 		pg_nx = PG_NX;

Modified: head/sys/amd64/amd64/mpboot.S
==============================================================================
--- head/sys/amd64/amd64/mpboot.S	Tue Mar  6 14:18:45 2018	(r330538)
+++ head/sys/amd64/amd64/mpboot.S	Tue Mar  6 14:28:37 2018	(r330539)
@@ -221,15 +221,31 @@ mptramp_end:
 
 	/*
 	 * From here on down is executed in the kernel .text section.
-	 *
-	 * Load a real %cr3 that has all the direct map stuff and switches
-	 * off the 1GB replicated mirror.  Load a stack pointer and jump
-	 * into AP startup code in C.
 	 */
 	.text
 	.code64
 	.p2align 4,0
 entry_64:
+	/*
+	 * If the BSP reported NXE support, enable EFER.NXE for all APs
+	 * prior to loading %cr3. This avoids page faults if the AP
+	 * encounters memory marked with the NX bit prior to detecting and
+	 * enabling NXE support.
+	 */
+	movq	pg_nx, %rbx
+	testq	%rbx, %rbx
+	je	1f
+	movl	$MSR_EFER, %ecx
+	rdmsr
+	orl	$EFER_NXE, %eax
+	wrmsr
+
+1:
+	/*
+	 * Load a real %cr3 that has all the direct map stuff and switches
+	 * off the 1GB replicated mirror.  Load a stack pointer and jump
+	 * into AP startup code in C.
+	 */
 	movq	KPML4phys, %rax
 	movq	%rax, %cr3
 	movq	bootSTK, %rsp

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c	Tue Mar  6 14:18:45 2018	(r330538)
+++ head/sys/amd64/amd64/pmap.c	Tue Mar  6 14:28:37 2018	(r330539)
@@ -870,14 +870,64 @@ nkpt_init(vm_paddr_t addr)
 	nkpt = pt_pages;
 }
 
+/*
+ * Returns the proper write/execute permission for a physical page that is
+ * part of the initial boot allocations.
+ *
+ * If the page has kernel text, it is marked as read-only. If the page has
+ * kernel read-only data, it is marked as read-only/not-executable. If the
+ * page has only read-write data, it is marked as read-write/not-executable.
+ * If the page is below/above the kernel range, it is marked as read-write.
+ *
+ * This function operates on 2M pages, since we map the kernel space that
+ * way.
+ *
+ * Note that this doesn't currently provide any protection for modules.
+ */
+static inline pt_entry_t
+bootaddr_rwx(vm_paddr_t pa)
+{
+
+	/*
+	 * Everything in the same 2M page as the start of the kernel
+	 * should be static. On the other hand, things in the same 2M
+	 * page as the end of the kernel could be read-write/executable,
+	 * as the kernel image is not guaranteed to end on a 2M boundary.
+	 */
+	if (pa < trunc_2mpage(btext - KERNBASE) ||
+	   pa >= trunc_2mpage(_end - KERNBASE))
+		return (X86_PG_RW);
+	/*
+	 * The linker should ensure that the read-only and read-write
+	 * portions don't share the same 2M page, so this shouldn't
+	 * impact read-only data. However, in any case, any page with
+	 * read-write data needs to be read-write.
+	 */
+	if (pa >= trunc_2mpage(brwsection - KERNBASE))
+		return (X86_PG_RW | pg_nx);
+	/*
+	 * Mark any 2M page containing kernel text as read-only. Mark
+	 * other pages with read-only data as read-only and not executable.
+	 * (It is likely a small portion of the read-only data section will
+	 * be marked as read-only, but executable. This should be acceptable
+	 * since the read-only protection will keep the data from changing.)
+	 * Note that fixups to the .text section will still work until we
+	 * set CR0.WP.
+	 */
+	if (pa < round_2mpage(etext - KERNBASE))
+		return (0);
+	return (pg_nx);
+}
+
 static void
 create_pagetables(vm_paddr_t *firstaddr)
 {
-	int i, j, ndm1g, nkpdpe;
+	int i, j, ndm1g, nkpdpe, nkdmpde;
 	pt_entry_t *pt_p;
 	pd_entry_t *pd_p;
 	pdp_entry_t *pdp_p;
 	pml4_entry_t *p4_p;
+	uint64_t DMPDkernphys;
 
 	/* Allocate page table pages for the direct map */
 	ndmpdp = howmany(ptoa(Maxmem), NBPDP);
@@ -896,8 +946,20 @@ create_pagetables(vm_paddr_t *firstaddr)
 	}
 	DMPDPphys = allocpages(firstaddr, ndmpdpphys);
 	ndm1g = 0;
-	if ((amd_feature & AMDID_PAGE1GB) != 0)
+	if ((amd_feature & AMDID_PAGE1GB) != 0) {
+		/*
+		 * Calculate the number of 1G pages that will fully fit in
+		 * Maxmem.
+		 */
 		ndm1g = ptoa(Maxmem) >> PDPSHIFT;
+
+		/*
+		 * Allocate 2M pages for the kernel. These will be used in
+		 * place of the first one or more 1G pages from ndm1g.
+		 */
+		nkdmpde = howmany((vm_offset_t)(brwsection - KERNBASE), NBPDP);
+		DMPDkernphys = allocpages(firstaddr, nkdmpde);
+	}
 	if (ndm1g < ndmpdp)
 		DMPDphys = allocpages(firstaddr, ndmpdp - ndm1g);
 	dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT;
@@ -923,11 +985,10 @@ create_pagetables(vm_paddr_t *firstaddr)
 	KPDphys = allocpages(firstaddr, nkpdpe);
 
 	/* Fill in the underlying page table pages */
-	/* Nominally read-only (but really R/W) from zero to physfree */
 	/* XXX not fully used, underneath 2M pages */
 	pt_p = (pt_entry_t *)KPTphys;
 	for (i = 0; ptoa(i) < *firstaddr; i++)
-		pt_p[i] = ptoa(i) | X86_PG_RW | X86_PG_V | pg_g;
+		pt_p[i] = ptoa(i) | X86_PG_V | pg_g | bootaddr_rwx(ptoa(i));
 
 	/* Now map the page tables at their location within PTmap */
 	pd_p = (pd_entry_t *)KPDphys;
@@ -937,8 +998,8 @@ create_pagetables(vm_paddr_t *firstaddr)
 	/* Map from zero to end of allocations under 2M pages */
 	/* This replaces some of the KPTphys entries above */
 	for (i = 0; (i << PDRSHIFT) < *firstaddr; i++)
-		pd_p[i] = (i << PDRSHIFT) | X86_PG_RW | X86_PG_V | PG_PS |
-		    pg_g;
+		pd_p[i] = (i << PDRSHIFT) | X86_PG_V | PG_PS | pg_g |
+		    bootaddr_rwx(i << PDRSHIFT);
 
 	/*
 	 * Because we map the physical blocks in 2M pages, adjust firstaddr
@@ -978,6 +1039,22 @@ create_pagetables(vm_paddr_t *firstaddr)
 	for (j = 0; i < ndmpdp; i++, j++) {
 		pdp_p[i] = DMPDphys + ptoa(j);
 		pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_U;
+	}
+
+	/*
+	 * Instead of using a 1G page for the memory containing the kernel,
+	 * use 2M pages with appropriate permissions. (If using 1G pages,
+	 * this will partially overwrite the PDPEs above.)
+	 */
+	if (ndm1g) {
+		pd_p = (pd_entry_t *)DMPDkernphys;
+		for (i = 0; i < (NPDEPG * nkdmpde); i++)
+			pd_p[i] = (i << PDRSHIFT) | X86_PG_V | PG_PS | pg_g |
+			    X86_PG_M | X86_PG_A | pg_nx |
+			    bootaddr_rwx(i << PDRSHIFT);
+		for (i = 0; i < nkdmpde; i++)
+			pdp_p[i] = (DMPDkernphys + ptoa(i)) | X86_PG_RW |
+			    X86_PG_V | PG_U;
 	}
 
 	/* And recursively map PML4 to itself in order to get PTmap */

Modified: head/sys/amd64/include/cpu.h
==============================================================================
--- head/sys/amd64/include/cpu.h	Tue Mar  6 14:18:45 2018	(r330538)
+++ head/sys/amd64/include/cpu.h	Tue Mar  6 14:28:37 2018	(r330539)
@@ -68,7 +68,9 @@ struct cpu_ops {
 };
 
 extern struct	cpu_ops cpu_ops;
+extern char	brwsection[];
 extern char	btext[];
+extern char	_end[];
 extern char	etext[];
 
 /* Resume hook for VMM. */

Modified: head/sys/amd64/include/gdb_machdep.h
==============================================================================
--- head/sys/amd64/include/gdb_machdep.h	Tue Mar  6 14:18:45 2018	(r330538)
+++ head/sys/amd64/include/gdb_machdep.h	Tue Mar  6 14:28:37 2018	(r330539)
@@ -47,8 +47,10 @@ gdb_cpu_query(void)
 	return (0);
 }
 
+void *gdb_begin_write(void);
 void *gdb_cpu_getreg(int, size_t *);
 void gdb_cpu_setreg(int, void *);
 int gdb_cpu_signal(int, int);
+void gdb_end_write(void *);
 
 #endif /* !_MACHINE_GDB_MACHDEP_H_ */

Modified: head/sys/arm/include/gdb_machdep.h
==============================================================================
--- head/sys/arm/include/gdb_machdep.h	Tue Mar  6 14:18:45 2018	(r330538)
+++ head/sys/arm/include/gdb_machdep.h	Tue Mar  6 14:28:37 2018	(r330539)
@@ -47,6 +47,19 @@ gdb_cpu_query(void)
 	return (0);
 }
 
+static __inline void *
+gdb_begin_write(void)
+{
+
+	return (NULL);
+}
+
+static __inline void
+gdb_end_write(void *arg __unused)
+{
+
+}
+
 void *gdb_cpu_getreg(int, size_t *);
 void gdb_cpu_setreg(int, void *);
 int gdb_cpu_signal(int, int);

Modified: head/sys/conf/ldscript.amd64
==============================================================================
--- head/sys/conf/ldscript.amd64	Tue Mar  6 14:18:45 2018	(r330538)
+++ head/sys/conf/ldscript.amd64	Tue Mar  6 14:28:37 2018	(r330539)
@@ -80,6 +80,7 @@ SECTIONS
   /* Adjust the address for the data segment.  We want to adjust up to
      the same address within the page on the next page up.  */
   . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+  PROVIDE (brwsection = .);
   /* Exception handling  */
   .eh_frame       : ONLY_IF_RW { KEEP (*(.eh_frame)) }
   .gcc_except_table   : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }

Modified: head/sys/gdb/gdb_packet.c
==============================================================================
--- head/sys/gdb/gdb_packet.c	Tue Mar  6 14:18:45 2018	(r330538)
+++ head/sys/gdb/gdb_packet.c	Tue Mar  6 14:28:37 2018	(r330539)
@@ -147,6 +147,7 @@ gdb_rx_mem(unsigned char *addr, size_t size)
 {
 	unsigned char *p;
 	void *prev;
+	void *wctx;
 	jmp_buf jb;
 	size_t cnt;
 	int ret;
@@ -155,6 +156,7 @@ gdb_rx_mem(unsigned char *addr, size_t size)
 	if (size * 2 != gdb_rxsz)
 		return (-1);
 
+	wctx = gdb_begin_write();
 	prev = kdb_jmpbuf(jb);
 	ret = setjmp(jb);
 	if (ret == 0) {
@@ -170,6 +172,7 @@ gdb_rx_mem(unsigned char *addr, size_t size)
 		kdb_cpu_sync_icache(addr, size);
 	}
 	(void)kdb_jmpbuf(prev);
+	gdb_end_write(wctx);
 	return ((ret == 0) ? 1 : 0);
 }
 

Modified: head/sys/i386/include/gdb_machdep.h
==============================================================================
--- head/sys/i386/include/gdb_machdep.h	Tue Mar  6 14:18:45 2018	(r330538)
+++ head/sys/i386/include/gdb_machdep.h	Tue Mar  6 14:28:37 2018	(r330539)
@@ -47,6 +47,19 @@ gdb_cpu_query(void)
 	return (0);
 }
 
+static __inline void *
+gdb_begin_write(void)
+{
+
+	return (NULL);
+}
+
+static __inline void
+gdb_end_write(void *arg __unused)
+{
+
+}
+
 void *gdb_cpu_getreg(int, size_t *);
 void gdb_cpu_setreg(int, void *);
 int gdb_cpu_signal(int, int);

Modified: head/sys/mips/include/gdb_machdep.h
==============================================================================
--- head/sys/mips/include/gdb_machdep.h	Tue Mar  6 14:18:45 2018	(r330538)
+++ head/sys/mips/include/gdb_machdep.h	Tue Mar  6 14:28:37 2018	(r330539)
@@ -51,6 +51,19 @@ gdb_cpu_query(void)
 	return (0);
 }
 
+static __inline void *
+gdb_begin_write(void)
+{
+
+	return (NULL);
+}
+
+static __inline void
+gdb_end_write(void *arg __unused)
+{
+
+}
+
 void *gdb_cpu_getreg(int, size_t *);
 void gdb_cpu_setreg(int, void *);
 int gdb_cpu_signal(int, int);

Modified: head/sys/powerpc/include/gdb_machdep.h
==============================================================================
--- head/sys/powerpc/include/gdb_machdep.h	Tue Mar  6 14:18:45 2018	(r330538)
+++ head/sys/powerpc/include/gdb_machdep.h	Tue Mar  6 14:28:37 2018	(r330539)
@@ -76,6 +76,19 @@ gdb_cpu_query(void)
 	return (0);
 }
 
+static __inline void *
+gdb_begin_write(void)
+{
+
+	return (NULL);
+}
+
+static __inline void
+gdb_end_write(void *arg __unused)
+{
+
+}
+
 void *gdb_cpu_getreg(int, size_t *);
 void gdb_cpu_setreg(int, void *);
 int gdb_cpu_signal(int, int);

Modified: head/sys/sparc64/include/gdb_machdep.h
==============================================================================
--- head/sys/sparc64/include/gdb_machdep.h	Tue Mar  6 14:18:45 2018	(r330538)
+++ head/sys/sparc64/include/gdb_machdep.h	Tue Mar  6 14:28:37 2018	(r330539)
@@ -53,6 +53,19 @@ gdb_cpu_signal(int vector, int _)
 	return (vector);
 }
 
+static __inline void *
+gdb_begin_write(void)
+{
+
+	return (NULL);
+}
+
+static __inline void
+gdb_end_write(void *arg __unused)
+{
+
+}
+
 void *gdb_cpu_getreg(int, size_t *);
 void gdb_cpu_setreg(int, void *);
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201803061428.w26EScwJ020926>