Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 27 Jun 2002 10:41:36 -0700 (PDT)
From:      Julian Elischer <julian@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 13486 for review
Message-ID:  <200206271741.g5RHfaQZ086630@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://people.freebsd.org/~peter/p4db/chv.cgi?CH=13486

Change 13486 by julian@julian_ref on 2002/06/27 10:40:35

	MFC to try catch some bugfixes in pmap.c

Affected files ...

.. //depot/projects/kse/sys/alpha/alpha/pmap.c#29 integrate
.. //depot/projects/kse/sys/compat/linux/linux_ioctl.c#15 integrate
.. //depot/projects/kse/sys/conf/NOTES#9 integrate
.. //depot/projects/kse/sys/conf/files#40 integrate
.. //depot/projects/kse/sys/conf/options#21 integrate
.. //depot/projects/kse/sys/i386/i386/pmap.c#39 integrate
.. //depot/projects/kse/sys/kern/kern_jail.c#15 integrate
.. //depot/projects/kse/sys/kern/kern_module.c#9 integrate
.. //depot/projects/kse/sys/kern/kern_subr.c#14 integrate
.. //depot/projects/kse/sys/kern/uipc_cow.c#1 branch
.. //depot/projects/kse/sys/kern/uipc_jumbo.c#1 branch
.. //depot/projects/kse/sys/kern/uipc_socket.c#19 integrate
.. //depot/projects/kse/sys/kern/uipc_syscalls.c#18 integrate
.. //depot/projects/kse/sys/modules/ti/Makefile#3 integrate
.. //depot/projects/kse/sys/net/if_media.c#3 integrate
.. //depot/projects/kse/sys/netinet/ip_input.c#23 integrate
.. //depot/projects/kse/sys/netinet/ip_mroute.c#11 integrate
.. //depot/projects/kse/sys/netinet/ip_output.c#24 integrate
.. //depot/projects/kse/sys/pci/if_ti.c#10 integrate
.. //depot/projects/kse/sys/pci/if_tireg.h#2 integrate
.. //depot/projects/kse/sys/pci/ti_fw.h#2 integrate
.. //depot/projects/kse/sys/pci/ti_fw2.h#2 integrate
.. //depot/projects/kse/sys/sparc64/conf/GENERIC#15 integrate
.. //depot/projects/kse/sys/sys/jumbo.h#1 branch
.. //depot/projects/kse/sys/sys/mbuf.h#10 integrate
.. //depot/projects/kse/sys/sys/resource.h#6 integrate
.. //depot/projects/kse/sys/sys/socketvar.h#21 integrate
.. //depot/projects/kse/sys/sys/tiio.h#1 branch
.. //depot/projects/kse/sys/sys/uio.h#9 integrate
.. //depot/projects/kse/sys/ufs/ffs/ffs_vfsops.c#22 integrate
.. //depot/projects/kse/sys/ufs/ufs/ufs_readwrite.c#11 integrate
.. //depot/projects/kse/sys/vm/swap_pager.c#11 integrate
.. //depot/projects/kse/sys/vm/vm_fault.c#13 integrate
.. //depot/projects/kse/sys/vm/vm_map.c#23 integrate
.. //depot/projects/kse/sys/vm/vm_mmap.c#18 integrate
.. //depot/projects/kse/sys/vm/vm_object.c#13 integrate
.. //depot/projects/kse/sys/vm/vm_object.h#7 integrate
.. //depot/projects/kse/sys/vm/vm_page.c#13 integrate
.. //depot/projects/kse/sys/vm/vm_page.h#9 integrate
.. //depot/projects/kse/sys/vm/vm_unix.c#7 integrate

Differences ...

==== //depot/projects/kse/sys/alpha/alpha/pmap.c#29 (text+ko) ====

@@ -43,7 +43,7 @@
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
  *		with some ideas from NetBSD's alpha pmap
- * $FreeBSD: src/sys/alpha/alpha/pmap.c,v 1.92 2002/04/29 07:43:08 peter Exp $
+ * $FreeBSD: src/sys/alpha/alpha/pmap.c,v 1.93 2002/06/27 04:08:45 jeff Exp $
  */
 
 /*
@@ -610,7 +610,7 @@
 	if (initial_pvs < MINPV)
 		initial_pvs = MINPV;
 	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL,
-	    NULL, NULL, UMA_ALIGN_PTR, 0);
+	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
 	uma_zone_set_allocf(pvzone, pmap_allocf);
 	uma_prealloc(pvzone, initial_pvs);
 	/*

==== //depot/projects/kse/sys/compat/linux/linux_ioctl.c#15 (text+ko) ====

@@ -25,7 +25,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/compat/linux/linux_ioctl.c,v 1.85 2002/06/02 20:05:41 schweikh Exp $
+ * $FreeBSD: src/sys/compat/linux/linux_ioctl.c,v 1.86 2002/06/26 15:53:11 arr Exp $
  */
 
 #include <sys/param.h>
@@ -2027,15 +2027,10 @@
 	ifp = NULL;
 	error = 0;
 	
-	mtx_lock(&Giant);
-	if ((error = fget(td, args->fd, &fp)) != 0) {
-		mtx_unlock(&Giant);
+	if ((error = fget(td, args->fd, &fp)) != 0)
 		return (error);
-	}
 	type = fp->f_type;
 	fdrop(fp, td);
-	mtx_unlock(&Giant);
-
 	if (type != DTYPE_SOCKET) {
 		/* not a socket - probably a tap / vmnet device */
 		switch (args->cmd) {
@@ -2243,14 +2238,10 @@
 	struct file *fp;
 	int error, type;
 
-	mtx_lock(&Giant);
-	if ((error = fget(td, args->fd, &fp)) != 0) {
-		mtx_unlock(&Giant);
+	if ((error = fget(td, args->fd, &fp)) != 0)
 		return (error);
-	}
 	type = fp->f_type;
 	fdrop(fp, td);
-	mtx_unlock(&Giant);
 	if (type == DTYPE_SOCKET)
 		return (linux_ioctl_socket(td, args));
 	return (ENOIOCTL);

==== //depot/projects/kse/sys/conf/NOTES#9 (text+ko) ====

@@ -14,7 +14,7 @@
 # This file contains machine independent kernel configuration notes.  For
 # machine dependent notes, look in /sys/<arch>/conf/NOTES.
 #
-# $FreeBSD: src/sys/conf/NOTES,v 1.1040 2002/06/21 19:53:04 rwatson Exp $
+# $FreeBSD: src/sys/conf/NOTES,v 1.1041 2002/06/26 03:34:43 ken Exp $
 #
 
 #
@@ -533,6 +533,13 @@
 options 	DUMMYNET
 options 	BRIDGE
 
+# Zero copy sockets support.  This enables "zero copy" for sending and
+# receving data via a socket.  The send side works for any type of NIC,
+# the receive side only works for NICs that support MTUs greater than the
+# page size of your architecture and that support header splitting.  See
+# zero_copy(9) for more details.
+options 	ZERO_COPY_SOCKETS
+
 #
 # ATM (HARP version) options
 #
@@ -1670,6 +1677,13 @@
 device		ti
 device		fpa	1
 
+# Use "private" jumbo buffers allocated exclusively for the ti(4) driver.
+# This option is incompatible with the TI_JUMBO_HDRSPLIT option below.
+#options 	TI_PRIVATE_JUMBOS
+# Turn on the header splitting option for the ti(4) driver firmware.  This
+# only works for Tigon II chips, and has no effect for Tigon I chips.
+options 	TI_JUMBO_HDRSPLIT
+
 #
 # ATM related options (Cranor version)
 # (note: this driver cannot be used with the HARP ATM stack)
@@ -2255,6 +2269,8 @@
 
 options 	NBUF=512	# Number of buffer headers
 
+options 	MSIZE=256	# mbuf size in bytes
+options 	MCLSHIFT=12	# mbuf cluster shift in bits, 12 == 4KB
 options 	NMBCLUSTERS=1024	# Number of mbuf clusters
 
 options 	SCSI_NCR_DEBUG

==== //depot/projects/kse/sys/conf/files#40 (text+ko) ====

@@ -1,4 +1,4 @@
-# $FreeBSD: src/sys/conf/files,v 1.653 2002/06/24 01:53:26 imp Exp $
+# $FreeBSD: src/sys/conf/files,v 1.654 2002/06/26 03:34:43 ken Exp $
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
@@ -922,7 +922,9 @@
 kern/tty_subr.c		standard
 kern/tty_tty.c		standard
 kern/uipc_accf.c	optional inet
+kern/uipc_cow.c		optional zero_copy_sockets
 kern/uipc_domain.c	standard
+kern/uipc_jumbo.c	standard
 kern/uipc_mbuf.c	standard
 kern/uipc_mbuf2.c	standard
 kern/uipc_proto.c	standard

==== //depot/projects/kse/sys/conf/options#21 (text+ko) ====

@@ -1,4 +1,4 @@
-# $FreeBSD: src/sys/conf/options,v 1.326 2002/06/18 21:30:36 n_hibma Exp $
+# $FreeBSD: src/sys/conf/options,v 1.327 2002/06/26 03:34:43 ken Exp $
 #
 #        On the handling of kernel options
 #
@@ -345,6 +345,11 @@
 DRM_LINUX		opt_drm.h
 DRM_DEBUG		opt_drm.h
 
+
+ZERO_COPY_SOCKETS	opt_zero.h
+TI_PRIVATE_JUMBOS	opt_ti.h
+TI_JUMBO_HDRSPLIT	opt_ti.h
+
 # ATM (HARP version)
 ATM_CORE		opt_atm.h
 ATM_IP			opt_atm.h
@@ -405,6 +410,8 @@
 REGRESSION		opt_global.h
 RESTARTABLE_PANICS	opt_global.h
 VFS_BIO_DEBUG		opt_global.h
+MSIZE			opt_global.h
+MCLSHIFT		opt_global.h
 
 # These are VM related options
 VM_KMEM_SIZE		opt_vm.h

==== //depot/projects/kse/sys/i386/i386/pmap.c#39 (text+ko) ====

@@ -39,7 +39,7 @@
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
- * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.324 2002/06/25 22:14:06 iedowse Exp $
+ * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.326 2002/06/27 06:34:03 arr Exp $
  */
 
 /*
@@ -2016,7 +2016,7 @@
 	register pt_entry_t *ptbase;
 	vm_offset_t pdnxt;
 	pd_entry_t ptpaddr;
-	vm_pindex_t sindex, eindex;
+	vm_offset_t sindex, eindex;
 	int anychanged;
 
 	if (pmap == NULL)
@@ -2499,7 +2499,7 @@
 		    ((objpgs > 0) && (p != NULL));
 		    p = TAILQ_NEXT(p, listq)) {
 
-			if (p->pindex < pindex || p->pindex - pindex > psize) {
+			if (p->pindex < pindex || p->pindex - pindex >= psize) {
 				continue;
 			}
 			tmpidx = p->pindex - pindex;

==== //depot/projects/kse/sys/kern/kern_jail.c#15 (text+ko) ====

@@ -6,7 +6,7 @@
  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
  * ----------------------------------------------------------------------------
  *
- * $FreeBSD: src/sys/kern/kern_jail.c,v 1.23 2002/04/04 21:03:25 jhb Exp $
+ * $FreeBSD: src/sys/kern/kern_jail.c,v 1.24 2002/06/26 00:29:01 arr Exp $
  *
  */
 
@@ -72,7 +72,6 @@
 	if (j.version != 0)
 		return (EINVAL);
 
-	mtx_lock(&Giant);
 	MALLOC(pr, struct prison *, sizeof *pr , M_PRISON, M_WAITOK | M_ZERO);
 	mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
 	pr->pr_securelevel = securelevel;
@@ -97,14 +96,12 @@
 	pr->pr_ref = 1;
 	PROC_UNLOCK(p);
 	crfree(oldcred);
-	mtx_unlock(&Giant);
 	return (0);
 badcred:
 	PROC_UNLOCK(p);
 	crfree(newcred);
 bail:
 	FREE(pr, M_PRISON);
-	mtx_unlock(&Giant);
 	return (error);
 }
 

==== //depot/projects/kse/sys/kern/kern_module.c#9 (text+ko) ====

@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/kern/kern_module.c,v 1.33 2002/03/18 07:45:27 arr Exp $
+ * $FreeBSD: src/sys/kern/kern_module.c,v 1.35 2002/06/26 03:00:40 arr Exp $
  */
 
 #include <sys/param.h>
@@ -256,9 +256,8 @@
 	module_t mod;
 	int error = 0;
 
-	mtx_lock(&Giant);
+	td->td_retval[0] = -1;
 
-	td->td_retval[0] = -1;
 	MOD_SLOCK;
 	if (SCARG(uap, modid) == 0) {
 		mod = TAILQ_FIRST(&modules);
@@ -279,7 +278,6 @@
 		td->td_retval[0] = 0;
 done2:
 	MOD_SUNLOCK;
-	mtx_unlock(&Giant);
 	return (error);
 }
 
@@ -294,8 +292,6 @@
 
 	td->td_retval[0] = -1;
 
-	mtx_lock(&Giant);
-
 	MOD_SLOCK;
 	mod = module_lookupbyid(SCARG(uap, modid));
 	if (mod == NULL) {
@@ -308,7 +304,6 @@
 			td->td_retval[0] = 0;
 	}
 	MOD_SUNLOCK;
-	mtx_unlock(&Giant);
 	return (error);
 }
 
@@ -332,14 +327,11 @@
 	struct module_stat *stat;
 	char *name;
 
-	mtx_lock(&Giant);
-
 	MOD_SLOCK;
 	mod = module_lookupbyid(SCARG(uap, modid));
 	if (mod == NULL) {
 		MOD_SUNLOCK;
-		error = ENOENT;
-		goto out;
+		return (ENOENT);
 	}
 	id = mod->id;
 	refs = mod->refs;
@@ -352,34 +344,29 @@
 	 * Check the version of the user's structure.
 	 */
 	if ((error = copyin(&stat->version, &version, sizeof(version))) != 0)
-		goto out;
+		return (error);
 	if (version != sizeof(struct module_stat_v1)
-	    && version != sizeof(struct module_stat)) {
-		error = EINVAL;
-		goto out;
-	}
+	    && version != sizeof(struct module_stat))
+		return (EINVAL);
 	namelen = strlen(mod->name) + 1;
 	if (namelen > MAXMODNAME)
 		namelen = MAXMODNAME;
 	if ((error = copyout(name, &stat->name[0], namelen)) != 0)
-		goto out;
+		return (error);
 
 	if ((error = copyout(&refs, &stat->refs, sizeof(int))) != 0)
-		goto out;
+		return (error);
 	if ((error = copyout(&id, &stat->id, sizeof(int))) != 0)
-		goto out;
+		return (error);
 
 	/*
 	 * >v1 stat includes module data.
 	 */
-	if (version == sizeof(struct module_stat)) {
+	if (version == sizeof(struct module_stat))
 		if ((error = copyout(&data, &stat->data, 
 		    sizeof(data))) != 0)
-			goto out;
-	}
+			return (error);
 	td->td_retval[0] = 0;
-out:
-	mtx_unlock(&Giant);
 	return (error);
 }
 
@@ -394,9 +381,8 @@
 	module_t mod;
 
 	if ((error = copyinstr(SCARG(uap, name), name, sizeof name, 0)) != 0)
-		goto out;
+		return (error);
 
-	mtx_lock(&Giant);
 	MOD_SLOCK;
 	mod = module_lookupbyname(name);
 	if (mod == NULL)
@@ -404,7 +390,5 @@
 	else
 		td->td_retval[0] = module_getid(mod);
 	MOD_SUNLOCK;
-	mtx_unlock(&Giant);
-out:
 	return (error);
 }

==== //depot/projects/kse/sys/kern/kern_subr.c#14 (text+ko) ====

@@ -36,9 +36,11 @@
  * SUCH DAMAGE.
  *
  *	@(#)kern_subr.c	8.3 (Berkeley) 1/21/94
- * $FreeBSD: src/sys/kern/kern_subr.c,v 1.53 2002/06/20 07:08:43 peter Exp $
+ * $FreeBSD: src/sys/kern/kern_subr.c,v 1.54 2002/06/26 03:34:48 ken Exp $
  */
 
+#include "opt_zero.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -58,6 +60,82 @@
 SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, 
 	"Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)");
 
+#ifdef ZERO_COPY_SOCKETS
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <sys/lock.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
+#include <vm/swap_pager.h>
+#include <sys/mbuf.h>
+#include <machine/cpu.h>
+
+/* Declared in uipc_socket.c */
+extern int so_zero_copy_receive;
+
+static int vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr,
+		       vm_offset_t uaddr);
+static int userspaceco(caddr_t cp, u_int cnt, struct uio *uio,
+			    struct vm_object *obj, int disposable);
+
+static int
+vm_pgmoveco(mapa, srcobj,  kaddr, uaddr)
+        vm_map_t mapa;
+	vm_object_t srcobj;
+	vm_offset_t kaddr, uaddr;
+{
+	vm_map_t map = mapa;
+	vm_page_t kern_pg, user_pg;
+	vm_object_t uobject;
+	vm_map_entry_t entry;
+	vm_pindex_t upindex, kpindex;
+	vm_prot_t prot;
+	boolean_t wired;
+
+	/*
+	 * First lookup the kernel page.
+	 */
+	kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr));
+
+	if ((vm_map_lookup(&map, uaddr,
+			   VM_PROT_READ, &entry, &uobject,
+			   &upindex, &prot, &wired)) != KERN_SUCCESS) {
+		return(EFAULT);
+	}
+	if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) {
+		vm_page_sleep_busy(user_pg, 1, "vm_pgmoveco");
+		pmap_remove(map->pmap, uaddr, uaddr+PAGE_SIZE);
+		vm_page_busy(user_pg);
+		vm_page_free(user_pg);
+	}
+
+	if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) ||
+	    (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) {
+		printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), "
+		       "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex,
+			kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0,
+			kern_pg->hold_count, (u_long)kern_pg->phys_addr);
+		if ((kern_pg->queue - kern_pg->pc) == PQ_FREE)
+			panic("vm_pgmoveco: renaming free page");
+		else
+			panic("vm_pgmoveco: renaming busy page");
+	}
+	kpindex = kern_pg->pindex;
+	vm_page_busy(kern_pg);
+	vm_page_rename(kern_pg, uobject, upindex);
+	vm_page_flag_clear(kern_pg, PG_BUSY);
+	kern_pg->valid = VM_PAGE_BITS_ALL;
+	
+	vm_map_lookup_done(map, entry);
+	return(KERN_SUCCESS);
+}
+#endif /* ZERO_COPY_SOCKETS */
+
 int
 uiomove(cp, n, uio)
 	register caddr_t cp;
@@ -133,16 +211,100 @@
 	return (error);
 }
 
-#ifdef ENABLE_VFS_IOOPT
+#if defined(ENABLE_VFS_IOOPT) || defined(ZERO_COPY_SOCKETS)
 /*
  * Experimental support for zero-copy I/O
  */
+static int
+userspaceco(cp, cnt, uio, obj, disposable)
+	caddr_t cp;
+	u_int cnt;
+	struct uio *uio;
+	struct vm_object *obj;
+	int disposable;
+{
+	struct iovec *iov;
+	int error;
+
+	iov = uio->uio_iov;
+
+#ifdef ZERO_COPY_SOCKETS
+
+	if (uio->uio_rw == UIO_READ) {
+		if ((so_zero_copy_receive != 0)
+		 && (obj != NULL)
+		 && ((cnt & PAGE_MASK) == 0)
+		 && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0)
+		 && ((uio->uio_offset & PAGE_MASK) == 0)
+		 && ((((intptr_t) cp) & PAGE_MASK) == 0)
+		 && (obj->type == OBJT_DEFAULT)
+		 && (disposable != 0)) {
+			/* SOCKET: use page-trading */
+			/*
+			 * We only want to call vm_pgmoveco() on
+			 * disposeable pages, since it gives the
+			 * kernel page to the userland process.
+			 */
+			error =	vm_pgmoveco(&curproc->p_vmspace->vm_map,
+					    obj, (vm_offset_t)cp, 
+					    (vm_offset_t)iov->iov_base);
+
+			/*
+			 * If we get an error back, attempt
+			 * to use copyout() instead.  The
+			 * disposable page should be freed
+			 * automatically if we weren't able to move
+			 * it into userland.
+			 */
+			if (error != 0)
+				error = copyout(cp, iov->iov_base, cnt);
+#ifdef ENABLE_VFS_IOOPT
+		} else if ((vfs_ioopt != 0)
+		 && ((cnt & PAGE_MASK) == 0)
+		 && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0)
+		 && ((uio->uio_offset & PAGE_MASK) == 0)
+		 && ((((intptr_t) cp) & PAGE_MASK) == 0)) {
+			error = vm_uiomove(&curproc->p_vmspace->vm_map, obj,
+					   uio->uio_offset, cnt,
+					   (vm_offset_t) iov->iov_base, NULL);
+#endif /* ENABLE_VFS_IOOPT */
+		} else {
+			error = copyout(cp, iov->iov_base, cnt);
+		}
+	} else {
+		error = copyin(iov->iov_base, cp, cnt);
+	}
+#else /* ZERO_COPY_SOCKETS */
+	if (uio->uio_rw == UIO_READ) {
+#ifdef ENABLE_VFS_IOOPT
+		if ((vfs_ioopt != 0)
+		 && ((cnt & PAGE_MASK) == 0)
+		 && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0)
+		 && ((uio->uio_offset & PAGE_MASK) == 0)
+		 && ((((intptr_t) cp) & PAGE_MASK) == 0)) {
+			error = vm_uiomove(&curproc->p_vmspace->vm_map, obj,
+					   uio->uio_offset, cnt,
+					   (vm_offset_t) iov->iov_base, NULL);
+		} else
+#endif /* ENABLE_VFS_IOOPT */
+		{
+			error = copyout(cp, iov->iov_base, cnt);
+		}
+	} else {
+		error = copyin(iov->iov_base, cp, cnt);
+	}
+#endif /* ZERO_COPY_SOCKETS */
+
+	return (error);
+}
+
 int
-uiomoveco(cp, n, uio, obj)
+uiomoveco(cp, n, uio, obj, disposable)
 	caddr_t cp;
 	int n;
 	struct uio *uio;
 	struct vm_object *obj;
+	int disposable;
 {
 	struct iovec *iov;
 	u_int cnt;
@@ -169,23 +331,9 @@
 		case UIO_USERSPACE:
 			if (ticks - PCPU_GET(switchticks) >= hogticks)
 				uio_yield();
-			if (uio->uio_rw == UIO_READ) {
-#ifdef ENABLE_VFS_IOOPT
-				if (vfs_ioopt && ((cnt & PAGE_MASK) == 0) &&
-					((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) &&
-					((uio->uio_offset & PAGE_MASK) == 0) &&
-					((((intptr_t) cp) & PAGE_MASK) == 0)) {
-						error = vm_uiomove(&curproc->p_vmspace->vm_map, obj,
-								uio->uio_offset, cnt,
-								(vm_offset_t) iov->iov_base, NULL);
-				} else
-#endif
-				{
-					error = copyout(cp, iov->iov_base, cnt);
-				}
-			} else {
-				error = copyin(iov->iov_base, cp, cnt);
-			}
+
+			error = userspaceco(cp, cnt, uio, obj, disposable);
+
 			if (error)
 				return (error);
 			break;
@@ -208,6 +356,9 @@
 	}
 	return (0);
 }
+#endif /* ENABLE_VFS_IOOPT || ZERO_COPY_SOCKETS */
+
+#ifdef ENABLE_VFS_IOOPT
 
 /*
  * Experimental support for zero-copy I/O
@@ -277,7 +428,7 @@
 	}
 	return error;
 }
-#endif
+#endif /* ENABLE_VFS_IOOPT */
 
 /*
  * Give next character to user as result of read.

==== //depot/projects/kse/sys/kern/uipc_socket.c#19 (text+ko) ====

@@ -31,10 +31,11 @@
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket.c	8.3 (Berkeley) 4/15/94
- * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.123 2002/06/20 18:52:54 alfred Exp $
+ * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.124 2002/06/26 03:34:48 ken Exp $
  */
 
 #include "opt_inet.h"
+#include "opt_zero.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -94,6 +95,17 @@
 static int numopensockets;
 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
     &numopensockets, 0, "Number of open sockets");
+#ifdef ZERO_COPY_SOCKETS
+/* These aren't static because they're used in other files. */
+int so_zero_copy_send = 1;
+int so_zero_copy_receive = 1;
+SYSCTL_NODE(_kern_ipc, OID_AUTO, zero_copy, CTLFLAG_RD, 0,
+    "Zero copy controls");
+SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, receive, CTLFLAG_RW,
+    &so_zero_copy_receive, 0, "Enable zero copy receive");
+SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW,
+    &so_zero_copy_send, 0, "Enable zero copy send");
+#endif /* ZERO_COPY_SOCKETS */
 
 
 /*
@@ -471,6 +483,22 @@
  * must check for short counts if EINTR/ERESTART are returned.
  * Data and control buffers are freed on return.
  */
+
+#ifdef ZERO_COPY_SOCKETS
+struct so_zerocopy_stats{
+	int size_ok;
+	int align_ok;
+	int found_ifp;
+};
+struct so_zerocopy_stats so_zerocp_stats = {0,0,0};
+#include <netinet/in.h>
+#include <net/route.h>
+#include <netinet/in_pcb.h>
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+#endif /*ZERO_COPY_SOCKETS*/
+
 int
 sosend(so, addr, uio, top, control, flags, td)
 	register struct socket *so;
@@ -486,6 +514,9 @@
 	register long space, len, resid;
 	int clen = 0, error, s, dontroute, mlen;
 	int atomic = sosendallatonce(so) || top;
+#ifdef ZERO_COPY_SOCKETS
+	int cow_send;
+#endif /* ZERO_COPY_SOCKETS */
 
 	if (uio)
 		resid = uio->uio_resid;
@@ -574,6 +605,9 @@
 			if (flags & MSG_EOR)
 				top->m_flags |= M_EOR;
 		    } else do {
+#ifdef ZERO_COPY_SOCKETS
+			cow_send = 0;
+#endif /* ZERO_COPY_SOCKETS */
 			if (top == 0) {
 				MGETHDR(m, M_TRYWAIT, MT_DATA);
 				if (m == NULL) {
@@ -592,12 +626,32 @@
 				mlen = MLEN;
 			}
 			if (resid >= MINCLSIZE) {
+#ifdef ZERO_COPY_SOCKETS				
+				if (so_zero_copy_send &&
+				    resid>=PAGE_SIZE && 
+				    space>=PAGE_SIZE && 
+				    uio->uio_iov->iov_len>=PAGE_SIZE) {
+					so_zerocp_stats.size_ok++;
+					if (!((vm_offset_t)
+					  uio->uio_iov->iov_base & PAGE_MASK)){
+						so_zerocp_stats.align_ok++;
+						cow_send = socow_setup(m, uio);
+					}
+				} 
+				if (!cow_send){
+#endif /* ZERO_COPY_SOCKETS */
 				MCLGET(m, M_TRYWAIT);
 				if ((m->m_flags & M_EXT) == 0)
 					goto nopages;
 				mlen = MCLBYTES;
 				len = min(min(mlen, resid), space);
 			} else {
+#ifdef ZERO_COPY_SOCKETS
+					len = PAGE_SIZE;
+				}
+					
+			} else {
+#endif /* ZERO_COPY_SOCKETS */
 nopages:
 				len = min(min(mlen, resid), space);
 				/*
@@ -608,6 +662,11 @@
 					MH_ALIGN(m, len);
 			}
 			space -= len;
+#ifdef ZERO_COPY_SOCKETS
+			if (cow_send)
+				error = 0;
+			else
+#endif /* ZERO_COPY_SOCKETS */
 			error = uiomove(mtod(m, caddr_t), (int)len, uio);
 			resid = uio->uio_resid;
 			m->m_len = len;
@@ -719,6 +778,27 @@
 		if (error)
 			goto bad;
 		do {
+#ifdef ZERO_COPY_SOCKETS
+			if (so_zero_copy_receive) {
+				vm_page_t pg;
+				int disposable;
+
+				if ((m->m_flags & M_EXT)
+				 && (m->m_ext.ext_type == EXT_DISPOSABLE))
+					disposable = 1;
+				else
+					disposable = 0;
+
+				pg = PHYS_TO_VM_PAGE(vtophys(mtod(m, caddr_t)));
+				if (uio->uio_offset == -1)
+					uio->uio_offset =IDX_TO_OFF(pg->pindex);
+
+				error = uiomoveco(mtod(m, caddr_t), 
+						  min(uio->uio_resid, m->m_len),
+						  uio, pg->object,
+						  disposable);
+			} else
+#endif /* ZERO_COPY_SOCKETS */
 			error = uiomove(mtod(m, caddr_t),
 			    (int) min(uio->uio_resid, m->m_len), uio);
 			m = m_free(m);
@@ -874,6 +954,28 @@
 		 */
 		if (mp == 0) {
 			splx(s);
+#ifdef ZERO_COPY_SOCKETS
+			if (so_zero_copy_receive) {
+				vm_page_t pg;
+				int disposable;
+
+				if ((m->m_flags & M_EXT)
+				 && (m->m_ext.ext_type == EXT_DISPOSABLE))
+					disposable = 1;
+				else
+					disposable = 0;
+ 
+				pg = PHYS_TO_VM_PAGE(vtophys(mtod(m, caddr_t) +
+					moff));
+
+				if (uio->uio_offset == -1)
+					uio->uio_offset =IDX_TO_OFF(pg->pindex);
+
+				error = uiomoveco(mtod(m, caddr_t) + moff,
+						  (int)len, uio,pg->object,
+						  disposable);
+			} else
+#endif /* ZERO_COPY_SOCKETS */
 			error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
 			s = splnet();
 			if (error)

==== //depot/projects/kse/sys/kern/uipc_syscalls.c#18 (text+ko) ====

@@ -34,7 +34,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
- * $FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.113 2002/06/20 18:52:54 alfred Exp $
+ * $FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.114 2002/06/26 03:34:48 ken Exp $
  */
 
 #include "opt_compat.h"
@@ -74,8 +74,8 @@
 
 static void sf_buf_init(void *arg);
 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
-static struct sf_buf *sf_buf_alloc(void);
-static void sf_buf_free(caddr_t addr, void *args);
+struct sf_buf *sf_buf_alloc(void);
+void sf_buf_free(caddr_t addr, void *args);
 
 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
 static int recvit(struct thread *td, int s, struct msghdr *mp,
@@ -96,9 +96,9 @@
 	struct mtx sf_lock;
 } sf_freelist;
 
-static vm_offset_t sf_base;
-static struct sf_buf *sf_bufs;
-static u_int sf_buf_alloc_want;
+vm_offset_t sf_base;
+struct sf_buf *sf_bufs;
+u_int sf_buf_alloc_want;
 
 /*
  * System call interface to the socket abstraction.
@@ -1570,7 +1570,7 @@
 /*
  * Get an sf_buf from the freelist. Will block if none are available.
  */
-static struct sf_buf *
+struct sf_buf *
 sf_buf_alloc()
 {
 	struct sf_buf *sf;
@@ -1600,7 +1600,7 @@
 /*
  * Detatch mapped page and release resources back to the system.
  */
-static void
+void
 sf_buf_free(caddr_t addr, void *args)
 {
 	struct sf_buf *sf;

==== //depot/projects/kse/sys/modules/ti/Makefile#3 (text+ko) ====

@@ -1,8 +1,9 @@
-# $FreeBSD: src/sys/modules/ti/Makefile,v 1.12 2001/09/05 23:47:02 brooks Exp $
+# $FreeBSD: src/sys/modules/ti/Makefile,v 1.13 2002/06/26 03:34:49 ken Exp $
 
 .PATH: ${.CURDIR}/../../pci
 
 KMOD=	if_ti
-SRCS=	if_ti.c opt_bdg.h device_if.h bus_if.h pci_if.h
+SRCS=	if_ti.c opt_bdg.h device_if.h bus_if.h pci_if.h opt_ti.h opt_zero.h \
+	vnode_if.h
 
 .include <bsd.kmod.mk>

==== //depot/projects/kse/sys/net/if_media.c#3 (text+ko) ====

@@ -1,5 +1,5 @@
 /*	$NetBSD: if_media.c,v 1.1 1997/03/17 02:55:15 thorpej Exp $	*/
-/* $FreeBSD: src/sys/net/if_media.c,v 1.16 2002/03/19 21:54:18 alfred Exp $ */
+/* $FreeBSD: src/sys/net/if_media.c,v 1.17 2002/06/26 03:34:50 ken Exp $ */
 
 /*
  * Copyright (c) 1997
@@ -303,8 +303,10 @@
 
 		if (ifmr->ifm_count != 0) {
 			kptr = (int *)malloc(ifmr->ifm_count * sizeof(int),
-			    M_TEMP, M_WAITOK);
+			    M_TEMP, M_NOWAIT);
 
+			if (kptr == NULL)
+				return (ENOMEM);
 			/*
 			 * Get the media words from the interface's list.
 			 */

==== //depot/projects/kse/sys/netinet/ip_input.c#23 (text+ko) ====

@@ -31,7 +31,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
- * $FreeBSD: src/sys/netinet/ip_input.c,v 1.203 2002/06/23 20:48:26 luigi Exp $
+ * $FreeBSD: src/sys/netinet/ip_input.c,v 1.204 2002/06/27 11:02:06 mux Exp $
  */
 
 #define	_IP_VHL
@@ -308,7 +308,7 @@
 			break;
 
 		case PACKET_TAG_DIVERT:
-			args.divert_rule = (int)m->m_hdr.mh_data & 0xffff;
+			args.divert_rule = (intptr_t)m->m_hdr.mh_data & 0xffff;
 			break;
 
 		case PACKET_TAG_IPFORWARD:

==== //depot/projects/kse/sys/netinet/ip_mroute.c#11 (text+ko) ====

@@ -9,7 +9,7 @@
  * Modified by Bill Fenner, PARC, April 1995
  *
  * MROUTING Revision: 3.5
- * $FreeBSD: src/sys/netinet/ip_mroute.c,v 1.76 2002/05/31 11:52:32 tanimura Exp $
+ * $FreeBSD: src/sys/netinet/ip_mroute.c,v 1.77 2002/06/26 21:00:53 luigi Exp $
  */
 
 #include "opt_mrouting.h"
@@ -2058,6 +2058,11 @@
 		printf("ip_rsvp_vif_done: v_rsvpd = %p so = %p\n",
 		       viftable[i].v_rsvpd, so);
 
+	/*
+	 * XXX as an additional consistency check, one could make sure
+	 * that viftable[i].v_rsvpd == so, otherwise passing so as
+	 * first parameter is pretty useless.
+	 */
 	viftable[i].v_rsvpd = NULL;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-decrement

==== //depot/projects/kse/sys/netinet/ip_output.c#24 (text+ko) ====

@@ -31,7 +31,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
- * $FreeBSD: src/sys/netinet/ip_output.c,v 1.159 2002/06/23 09:15:43 luigi Exp $
+ * $FreeBSD: src/sys/netinet/ip_output.c,v 1.161 2002/06/27 11:02:06 mux Exp $
  */
 
 #define _IP_VHL
@@ -168,7 +168,7 @@
 			break;
 
 		case PACKET_TAG_DIVERT:
-			args.divert_rule = (int)m0->m_data & 0xffff;
+			args.divert_rule = (intptr_t)m0->m_data & 0xffff;
 			break;
 
 		case PACKET_TAG_IPFORWARD:
@@ -917,8 +917,50 @@
 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 
+	if (len > PAGE_SIZE) {
+		/* 
+		 * Fragement large datagrams such that each segment 
+		 * contains a multiple of PAGE_SIZE amount of data, 
+		 * plus headers. This enables a receiver to perform 
+		 * page-flipping zero-copy optimizations.
+		 */
+
+		int newlen;

>>> TRUNCATED FOR MAIL (1000 lines) <<<

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe p4-projects" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200206271741.g5RHfaQZ086630>