From owner-svn-src-all@FreeBSD.ORG Fri Mar 21 14:25:10 2014 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id A11D674E; Fri, 21 Mar 2014 14:25:10 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.freebsd.org (Postfix) with ESMTPS id 7E4E486; Fri, 21 Mar 2014 14:25:10 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.8/8.14.8) with ESMTP id s2LEPAvx044607; Fri, 21 Mar 2014 14:25:10 GMT (envelope-from kib@svn.freebsd.org) Received: (from kib@localhost) by svn.freebsd.org (8.14.8/8.14.8/Submit) id s2LEP98p044601; Fri, 21 Mar 2014 14:25:09 GMT (envelope-from kib@svn.freebsd.org) Message-Id: <201403211425.s2LEP98p044601@svn.freebsd.org> From: Konstantin Belousov Date: Fri, 21 Mar 2014 14:25:09 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r263475 - in head/sys: amd64/amd64 kern sys vm X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.17 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 21 Mar 2014 14:25:10 -0000 Author: kib Date: Fri Mar 21 14:25:09 2014 New Revision: 263475 URL: http://svnweb.freebsd.org/changeset/base/263475 Log: Fix two issues with /dev/mem access on amd64, both causing kernel page faults. First, for accesses to direct map region should check for the limit by which direct map is instantiated. Second, for accesses to the kernel map, success returned from the kernacc(9) does not guarantee that consequent attempt to read or write to the checked address succeed, since other thread might invalidate the address meantime. Add a new thread private flag TDP_DEVMEMIO, which instructs vm_fault() to return error when fault happens on the MAP_ENTRY_NOFAULT entry, instead of panicing. The trap handler would then see a page fault from access, and recover in normal way, making /dev/mem access safer. Remove GIANT_REQUIRED from the amd64 memrw(), since it is not needed and having Giant locked does not solve issues for amd64. Note that at least the second issue exists on other architectures, and requires similar patching for md code. Reported and tested by: clusteradm (gjb, sbruno) Sponsored by: The FreeBSD Foundation MFC after: 1 week Modified: head/sys/amd64/amd64/mem.c head/sys/amd64/amd64/trap.c head/sys/kern/subr_trap.c head/sys/sys/proc.h head/sys/vm/vm_fault.c Modified: head/sys/amd64/amd64/mem.c ============================================================================== --- head/sys/amd64/amd64/mem.c Fri Mar 21 14:19:47 2014 (r263474) +++ head/sys/amd64/amd64/mem.c Fri Mar 21 14:25:09 2014 (r263475) @@ -76,14 +76,16 @@ MALLOC_DEFINE(M_MEMDESC, "memdesc", "mem int memrw(struct cdev *dev, struct uio *uio, int flags) { - int o; - u_long c = 0, v; struct iovec *iov; - int error = 0; + u_long c, v; + int error, o, sflags; vm_offset_t addr, eaddr; GIANT_REQUIRED; + error = 0; + c = 0; + sflags = curthread_pflags_set(TDP_DEVMEMIO); while (uio->uio_resid > 0 && error == 0) { iov = uio->uio_iov; if (iov->iov_len == 0) { @@ -98,7 +100,15 @@ memrw(struct cdev *dev, struct uio *uio, kmemphys: o = v & PAGE_MASK; c = min(uio->uio_resid, (u_int)(PAGE_SIZE - o)); - error = uiomove((void *)PHYS_TO_DMAP(v), (int)c, uio); + v = PHYS_TO_DMAP(v); + if (v < DMAP_MIN_ADDRESS || + (v > DMAP_MIN_ADDRESS + dmaplimit && + v <= DMAP_MAX_ADDRESS) || + pmap_kextract(v) == 0) { + error = EFAULT; + goto ret; + } + error = uiomove((void *)v, (int)c, uio); continue; } else if (dev2unit(dev) == CDEV_MINOR_KMEM) { @@ -119,22 +129,30 @@ kmemphys: addr = trunc_page(v); eaddr = round_page(v + c); - if (addr < VM_MIN_KERNEL_ADDRESS) - return (EFAULT); - for (; addr < eaddr; addr += PAGE_SIZE) - if (pmap_extract(kernel_pmap, addr) == 0) - return (EFAULT); - + if (addr < VM_MIN_KERNEL_ADDRESS) { + error = EFAULT; + goto ret; + } + for (; addr < eaddr; addr += PAGE_SIZE) { + if (pmap_extract(kernel_pmap, addr) == 0) { + error = EFAULT; + goto ret; + } + } if (!kernacc((caddr_t)(long)v, c, uio->uio_rw == UIO_READ ? - VM_PROT_READ : VM_PROT_WRITE)) - return (EFAULT); + VM_PROT_READ : VM_PROT_WRITE)) { + error = EFAULT; + goto ret; + } error = uiomove((caddr_t)(long)v, (int)c, uio); continue; } /* else panic! */ } +ret: + curthread_pflags_restore(sflags); return (error); } Modified: head/sys/amd64/amd64/trap.c ============================================================================== --- head/sys/amd64/amd64/trap.c Fri Mar 21 14:19:47 2014 (r263474) +++ head/sys/amd64/amd64/trap.c Fri Mar 21 14:25:09 2014 (r263475) @@ -789,6 +789,12 @@ nogo: frame->tf_rip = (long)curpcb->pcb_onfault; return (0); } + if ((td->td_pflags & TDP_DEVMEMIO) != 0) { + KASSERT(curpcb->pcb_onfault != NULL, + ("/dev/mem without pcb_onfault")); + frame->tf_rip = (long)curpcb->pcb_onfault; + return (0); + } trap_fatal(frame, eva); return (-1); } Modified: head/sys/kern/subr_trap.c ============================================================================== --- head/sys/kern/subr_trap.c Fri Mar 21 14:19:47 2014 (r263474) +++ head/sys/kern/subr_trap.c Fri Mar 21 14:25:09 2014 (r263475) @@ -157,6 +157,8 @@ userret(struct thread *td, struct trapfr td->td_rw_rlocks)); KASSERT((td->td_pflags & TDP_NOFAULTING) == 0, ("userret: Returning with pagefaults disabled")); + KASSERT((td->td_pflags & TDP_DEVMEMIO) == 0, + ("userret: Returning with /dev/mem i/o leaked")); KASSERT(td->td_no_sleeping == 0, ("userret: Returning with sleep disabled")); KASSERT(td->td_pinned == 0 || (td->td_pflags & TDP_CALLCHAIN) != 0, Modified: head/sys/sys/proc.h ============================================================================== --- head/sys/sys/proc.h Fri Mar 21 14:19:47 2014 (r263474) +++ head/sys/sys/proc.h Fri Mar 21 14:25:09 2014 (r263475) @@ -428,6 +428,7 @@ do { \ #define TDP_RESETSPUR 0x04000000 /* Reset spurious page fault history. */ #define TDP_NERRNO 0x08000000 /* Last errno is already in td_errno */ #define TDP_UIOHELD 0x10000000 /* Current uio has pages held in td_ma */ +#define TDP_DEVMEMIO 0x20000000 /* Accessing memory for /dev/mem */ /* * Reasons that the current thread can not be run yet. Modified: head/sys/vm/vm_fault.c ============================================================================== --- head/sys/vm/vm_fault.c Fri Mar 21 14:19:47 2014 (r263474) +++ head/sys/vm/vm_fault.c Fri Mar 21 14:25:09 2014 (r263475) @@ -269,6 +269,10 @@ RetryFault:; map_generation = fs.map->timestamp; if (fs.entry->eflags & MAP_ENTRY_NOFAULT) { + if ((curthread->td_pflags & TDP_DEVMEMIO) != 0) { + vm_map_unlock_read(fs.map); + return (KERN_FAILURE); + } panic("vm_fault: fault on nofault entry, addr: %lx", (u_long)vaddr); }