From owner-p4-projects@FreeBSD.ORG Mon May 31 15:28:12 2004 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id EC3E016A4D0; Mon, 31 May 2004 15:28:11 -0700 (PDT) Delivered-To: perforce@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id ACB7616A4CE for ; Mon, 31 May 2004 15:28:11 -0700 (PDT) Received: from repoman.freebsd.org (repoman.freebsd.org [216.136.204.115]) by mx1.FreeBSD.org (Postfix) with ESMTP id A400B43D45 for ; Mon, 31 May 2004 15:28:11 -0700 (PDT) (envelope-from bb+lists.freebsd.perforce@cyrus.watson.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.12.11/8.12.11) with ESMTP id i4VMSBIt059779 for ; Mon, 31 May 2004 15:28:11 -0700 (PDT) (envelope-from bb+lists.freebsd.perforce@cyrus.watson.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.12.11/8.12.11/Submit) id i4VMSAT7059776 for perforce@freebsd.org; Mon, 31 May 2004 15:28:10 -0700 (PDT) (envelope-from bb+lists.freebsd.perforce@cyrus.watson.org) Date: Mon, 31 May 2004 15:28:10 -0700 (PDT) Message-Id: <200405312228.i4VMSAT7059776@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to bb+lists.freebsd.perforce@cyrus.watson.org using -f From: Robert Watson To: Perforce Change Reviews Subject: PERFORCE change 53904 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 31 May 2004 22:28:12 -0000 http://perforce.freebsd.org/chv.cgi?CH=53904 Change 53904 by rwatson@rwatson_tislabs on 2004/05/31 15:27:44 Integrate HEAD into netperf_socket: - mbuma2 -- mbufs are now allocated by the UMA slab allocator. Sounds good on many fronts. Many things now tunable, etc. - UMA support for mbuma2 -- zone layering, etc. - Loop back Giant assertions for vn_start_write(), vn_finished_write(). Affected files ... .. //depot/projects/netperf_socket/sys/conf/files#22 integrate .. //depot/projects/netperf_socket/sys/i386/i386/vm_machdep.c#12 integrate .. //depot/projects/netperf_socket/sys/kern/kern_malloc.c#4 integrate .. //depot/projects/netperf_socket/sys/kern/kern_mbuf.c#1 branch .. //depot/projects/netperf_socket/sys/kern/subr_mbuf.c#3 delete .. //depot/projects/netperf_socket/sys/kern/uipc_mbuf.c#5 integrate .. //depot/projects/netperf_socket/sys/kern/uipc_mbuf2.c#4 integrate .. //depot/projects/netperf_socket/sys/kern/uipc_socket.c#11 integrate .. //depot/projects/netperf_socket/sys/kern/uipc_socket2.c#9 integrate .. //depot/projects/netperf_socket/sys/kern/uipc_syscalls.c#11 integrate .. //depot/projects/netperf_socket/sys/kern/vfs_vnops.c#4 integrate .. //depot/projects/netperf_socket/sys/sparc64/sparc64/vm_machdep.c#8 integrate .. //depot/projects/netperf_socket/sys/sys/mbuf.h#11 integrate .. //depot/projects/netperf_socket/sys/vm/uma.h#2 integrate .. //depot/projects/netperf_socket/sys/vm/uma_core.c#5 integrate .. //depot/projects/netperf_socket/sys/vm/uma_dbg.c#2 integrate .. //depot/projects/netperf_socket/sys/vm/uma_int.h#2 integrate .. //depot/projects/netperf_socket/sys/vm/vm_kern.c#5 integrate Differences ... ==== //depot/projects/netperf_socket/sys/conf/files#22 (text+ko) ==== @@ -1,4 +1,4 @@ -# $FreeBSD: src/sys/conf/files,v 1.898 2004/05/28 00:22:58 tackerman Exp $ +# $FreeBSD: src/sys/conf/files,v 1.899 2004/05/31 21:46:03 bmilekic Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -1075,6 +1075,7 @@ kern/kern_lockf.c standard kern/kern_mac.c standard kern/kern_malloc.c standard +kern/kern_mbuf.c standard kern/kern_mib.c standard kern/kern_module.c standard kern/kern_mutex.c standard @@ -1116,7 +1117,6 @@ kern/subr_kobj.c standard kern/subr_log.c standard kern/subr_mbpool.c optional libmbpool -kern/subr_mbuf.c standard kern/subr_mchain.c optional libmchain kern/subr_module.c standard kern/subr_msgbuf.c standard ==== //depot/projects/netperf_socket/sys/i386/i386/vm_machdep.c#12 (text+ko) ==== @@ -41,7 +41,7 @@ */ #include -__FBSDID("$FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.238 2004/05/30 17:57:42 phk Exp $"); +__FBSDID("$FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.239 2004/05/31 21:46:03 bmilekic Exp $"); #include "opt_isa.h" #include "opt_npx.h" @@ -95,6 +95,10 @@ #include #endif +#ifndef NSFBUFS +#define NSFBUFS (512 + maxusers * 16) +#endif + static void cpu_reset_real(void); #ifdef SMP static void cpu_reset_proxy(void); @@ -584,6 +588,9 @@ vm_offset_t sf_base; int i; + nsfbufs = NSFBUFS; + TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); + sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask); TAILQ_INIT(&sf_buf_freelist); sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); ==== //depot/projects/netperf_socket/sys/kern/kern_malloc.c#4 (text+ko) ==== @@ -30,7 +30,7 @@ */ #include -__FBSDID("$FreeBSD: src/sys/kern/kern_malloc.c,v 1.132 2004/04/05 21:03:34 imp Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/kern_malloc.c,v 1.133 2004/05/31 21:46:04 bmilekic Exp $"); #include "opt_vm.h" @@ -191,6 +191,7 @@ int indx; caddr_t va; uma_zone_t zone; + uma_keg_t keg; #ifdef DIAGNOSTIC unsigned long osize = size; #endif @@ -235,6 +236,7 @@ size = (size & ~KMEM_ZMASK) + KMEM_ZBASE; indx = kmemsize[size >> KMEM_ZSHIFT]; zone = kmemzones[indx].kz_zone; + keg = zone->uz_keg; #ifdef MALLOC_PROFILE krequests[size >> KMEM_ZSHIFT]++; #endif @@ -244,10 +246,11 @@ goto out; ksp->ks_size |= 1 << indx; - size = zone->uz_size; + size = keg->uk_size; } else { size = roundup(size, PAGE_SIZE); zone = NULL; + keg = NULL; va = uma_large_malloc(size, flags); mtx_lock(&ksp->ks_mtx); if (va == NULL) @@ -309,7 +312,7 @@ #ifdef INVARIANTS struct malloc_type **mtp = addr; #endif - size = slab->us_zone->uz_size; + size = slab->us_keg->uk_size; #ifdef INVARIANTS /* * Cache a pointer to the malloc_type that most recently freed @@ -325,7 +328,7 @@ sizeof(struct malloc_type *); *mtp = type; #endif - uma_zfree_arg(slab->us_zone, addr, slab); + uma_zfree_arg(LIST_FIRST(&slab->us_keg->uk_zones), addr, slab); } else { size = slab->us_size; uma_large_free(slab); @@ -364,8 +367,8 @@ ("realloc: address %p out of range", (void *)addr)); /* Get the size of the original block */ - if (slab->us_zone) - alloc = slab->us_zone->uz_size; + if (slab->us_keg) + alloc = slab->us_keg->uk_size; else alloc = slab->us_size; @@ -410,7 +413,6 @@ void *dummy; { u_int8_t indx; - u_long npg; u_long mem_size; int i; @@ -428,7 +430,7 @@ * Note that the kmem_map is also used by the zone allocator, * so make sure that there is enough space. */ - vm_kmem_size = VM_KMEM_SIZE; + vm_kmem_size = VM_KMEM_SIZE + nmbclusters * PAGE_SIZE; mem_size = cnt.v_page_count; #if defined(VM_KMEM_SIZE_SCALE) @@ -462,17 +464,8 @@ */ init_param3(vm_kmem_size / PAGE_SIZE); - /* - * In mbuf_init(), we set up submaps for mbufs and clusters, in which - * case we rounddown() (nmbufs * MSIZE) and (nmbclusters * MCLBYTES), - * respectively. Mathematically, this means that what we do here may - * amount to slightly more address space than we need for the submaps, - * but it never hurts to have an extra page in kmem_map. - */ - npg = (nmbufs*MSIZE + nmbclusters*MCLBYTES + vm_kmem_size) / PAGE_SIZE; - kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase, - (vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE)); + (vm_offset_t *)&kmemlimit, vm_kmem_size); kmem_map->system_map = 1; uma_startup2(); ==== //depot/projects/netperf_socket/sys/kern/uipc_mbuf.c#5 (text+ko) ==== @@ -30,7 +30,7 @@ */ #include -__FBSDID("$FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.129 2004/04/18 13:01:28 luigi Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.130 2004/05/31 21:46:04 bmilekic Exp $"); #include "opt_mac.h" #include "opt_param.h" @@ -86,6 +86,161 @@ #endif /* + * Malloc-type for external ext_buf ref counts. + */ +MALLOC_DEFINE(M_MBUF, "mbextcnt", "mbuf external ref counts"); + +/* + * Allocate a given length worth of mbufs and/or clusters (whatever fits + * best) and return a pointer to the top of the allocated chain. If an + * existing mbuf chain is provided, then we will append the new chain + * to the existing one but still return the top of the newly allocated + * chain. + */ +struct mbuf * +m_getm(struct mbuf *m, int len, int how, short type) +{ + struct mbuf *mb, *top, *cur, *mtail; + int num, rem; + int i; + + KASSERT(len >= 0, ("m_getm(): len is < 0")); + + /* If m != NULL, we will append to the end of that chain. */ + if (m != NULL) + for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next); + else + mtail = NULL; + + /* + * Calculate how many mbufs+clusters ("packets") we need and how much + * leftover there is after that and allocate the first mbuf+cluster + * if required. + */ + num = len / MCLBYTES; + rem = len % MCLBYTES; + top = cur = NULL; + if (num > 0) { + if ((top = cur = m_getcl(how, type, 0)) == NULL) + goto failed; + } + num--; + top->m_len = 0; + + for (i = 0; i < num; i++) { + mb = m_getcl(how, type, 0); + if (mb == NULL) + goto failed; + mb->m_len = 0; + cur = (cur->m_next = mb); + } + if (rem > 0) { + mb = (rem > MINCLSIZE) ? + m_getcl(how, type, 0) : m_get(how, type); + if (mb == NULL) + goto failed; + mb->m_len = 0; + if (cur == NULL) + top = mb; + else + cur->m_next = mb; + } + + if (mtail != NULL) + mtail->m_next = top; + return top; +failed: + if (top != NULL) + m_freem(top); + return NULL; +} + +/* + * Free an entire chain of mbufs and associated external buffers, if + * applicable. + */ +void +m_freem(struct mbuf *mb) +{ + + while (mb != NULL) + mb = m_free(mb); +} + +/*- + * Configure a provided mbuf to refer to the provided external storage + * buffer and setup a reference count for said buffer. If the setting + * up of the reference count fails, the M_EXT bit will not be set. If + * successfull, the M_EXT bit is set in the mbuf's flags. + * + * Arguments: + * mb The existing mbuf to which to attach the provided buffer. + * buf The address of the provided external storage buffer. + * size The size of the provided buffer. + * freef A pointer to a routine that is responsible for freeing the + * provided external storage buffer. + * args A pointer to an argument structure (of any type) to be passed + * to the provided freef routine (may be NULL). + * flags Any other flags to be passed to the provided mbuf. + * type The type that the external storage buffer should be + * labeled with. + * + * Returns: + * Nothing. + */ +void +m_extadd(struct mbuf *mb, caddr_t buf, u_int size, + void (*freef)(void *, void *), void *args, int flags, int type) +{ + u_int *ref_cnt = NULL; + + /* XXX Shouldn't be adding EXT_CLUSTER with this API */ + if (type == EXT_CLUSTER) + ref_cnt = (u_int *)uma_find_refcnt(zone_clust, + mb->m_ext.ext_buf); + else if (type == EXT_EXTREF) + ref_cnt = mb->m_ext.ref_cnt; + mb->m_ext.ref_cnt = (ref_cnt == NULL) ? + malloc(sizeof(u_int), M_MBUF, M_NOWAIT) : (u_int *)ref_cnt; + if (mb->m_ext.ref_cnt != NULL) { + *(mb->m_ext.ref_cnt) = 1; + mb->m_flags |= (M_EXT | flags); + mb->m_ext.ext_buf = buf; + mb->m_data = mb->m_ext.ext_buf; + mb->m_ext.ext_size = size; + mb->m_ext.ext_free = freef; + mb->m_ext.ext_args = args; + mb->m_ext.ext_type = type; + } +} + +/* + * Non-directly-exported function to clean up after mbufs with M_EXT + * storage attached to them if the reference count hits 0. + */ +void +mb_free_ext(struct mbuf *m) +{ + + MEXT_REM_REF(m); + if (atomic_cmpset_int(m->m_ext.ref_cnt, 0, 1)) { + if (m->m_ext.ext_type == EXT_PACKET) { + uma_zfree(zone_pack, m); + return; + } else if (m->m_ext.ext_type == EXT_CLUSTER) { + uma_zfree(zone_clust, m->m_ext.ext_buf); + m->m_ext.ext_buf = NULL; + } else { + (*(m->m_ext.ext_free))(m->m_ext.ext_buf, + m->m_ext.ext_args); + if (m->m_ext.ext_type != EXT_EXTREF) + free(m->m_ext.ref_cnt, M_MBUF); + } + } + uma_zfree(zone_mbuf, m); +} + +/* * "Move" mbuf pkthdr from "from" to "to". * "from" must have M_PKTHDR set, and "to" must be empty. */ @@ -364,22 +519,22 @@ struct mbuf *n; /* Get the next new mbuf */ - MGET(n, how, m->m_type); + if (remain >= MINCLSIZE) { + n = m_getcl(how, m->m_type, 0); + nsize = MCLBYTES; + } else { + n = m_get(how, m->m_type); + nsize = MLEN; + } if (n == NULL) goto nospace; - if (top == NULL) { /* first one, must be PKTHDR */ - if (!m_dup_pkthdr(n, m, how)) + + if (top == NULL) { /* First one, must be PKTHDR */ + if (!m_dup_pkthdr(n, m, how)) { + m_free(n); goto nospace; + } nsize = MHLEN; - } else /* not the first one */ - nsize = MLEN; - if (remain >= MINCLSIZE) { - MCLGET(n, how); - if ((n->m_flags & M_EXT) == 0) { - (void)m_free(n); - goto nospace; - } - nsize = MCLBYTES; } n->m_len = 0; @@ -651,41 +806,44 @@ void (*copy)(char *from, caddr_t to, u_int len)) { struct mbuf *m; - struct mbuf *top = 0, **mp = ⊤ + struct mbuf *top = NULL, **mp = ⊤ int len; if (off < 0 || off > MHLEN) return (NULL); - MGETHDR(m, M_DONTWAIT, MT_DATA); - if (m == NULL) - return (NULL); - m->m_pkthdr.rcvif = ifp; - m->m_pkthdr.len = totlen; - len = MHLEN; + while (totlen > 0) { + if (top == NULL) { /* First one, must be PKTHDR */ + if (totlen + off >= MINCLSIZE) { + m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); + len = MCLBYTES; + } else { + m = m_gethdr(M_DONTWAIT, MT_DATA); + len = MHLEN; - while (totlen > 0) { - if (top) { - MGET(m, M_DONTWAIT, MT_DATA); + /* Place initial small packet/header at end of mbuf */ + if (m && totlen + off + max_linkhdr <= MLEN) { + m->m_data += max_linkhdr; + len -= max_linkhdr; + } + } + if (m == NULL) + return NULL; + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.len = totlen; + } else { + if (totlen + off >= MINCLSIZE) { + m = m_getcl(M_DONTWAIT, MT_DATA, 0); + len = MCLBYTES; + } else { + m = m_get(M_DONTWAIT, MT_DATA); + len = MLEN; + } if (m == NULL) { m_freem(top); - return (NULL); + return NULL; } - len = MLEN; } - if (totlen + off >= MINCLSIZE) { - MCLGET(m, M_DONTWAIT); - if (m->m_flags & M_EXT) - len = MCLBYTES; - } else { - /* - * Place initial small packet/header at end of mbuf. - */ - if (top == NULL && totlen + off + max_linkhdr <= len) { - m->m_data += max_linkhdr; - len -= max_linkhdr; - } - } if (off) { m->m_data += off; len -= off; @@ -722,9 +880,10 @@ off -= mlen; totlen += mlen; if (m->m_next == NULL) { - n = m_get_clrd(M_DONTWAIT, m->m_type); + n = m_get(M_DONTWAIT, m->m_type); if (n == NULL) goto out; + bzero(mtod(n, caddr_t), MLEN); n->m_len = min(MLEN, len + off); m->m_next = n; } ==== //depot/projects/netperf_socket/sys/kern/uipc_mbuf2.c#4 (text+ko) ==== @@ -61,7 +61,7 @@ */ #include -__FBSDID("$FreeBSD: src/sys/kern/uipc_mbuf2.c,v 1.24 2004/05/09 05:57:58 sam Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/uipc_mbuf2.c,v 1.25 2004/05/31 21:46:04 bmilekic Exp $"); /*#define PULLDOWN_DEBUG*/ @@ -230,14 +230,10 @@ * now, we need to do the hard way. don't m_copy as there's no room * on both end. */ - MGET(o, M_DONTWAIT, m->m_type); - if (o && len > MLEN) { - MCLGET(o, M_DONTWAIT); - if ((o->m_flags & M_EXT) == 0) { - m_free(o); - o = NULL; - } - } + if (len > MLEN) + o = m_getcl(M_DONTWAIT, m->m_type, 0); + else + o = m_get(M_DONTWAIT, m->m_type); if (!o) { m_freem(m); return NULL; /* ENOBUFS */ @@ -274,29 +270,27 @@ m_dup1(struct mbuf *m, int off, int len, int wait) { struct mbuf *n; - int l; int copyhdr; if (len > MCLBYTES) return NULL; - if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { + if (off == 0 && (m->m_flags & M_PKTHDR) != 0) copyhdr = 1; - MGETHDR(n, wait, m->m_type); - l = MHLEN; + else + copyhdr = 0; + if (len >= MINCLSIZE) { + if (copyhdr == 1) + n = m_getcl(wait, m->m_type, M_PKTHDR); + else + n = m_getcl(wait, m->m_type, 0); } else { - copyhdr = 0; - MGET(n, wait, m->m_type); - l = MLEN; - } - if (n && len > l) { - MCLGET(n, wait); - if ((n->m_flags & M_EXT) == 0) { - m_free(n); - n = NULL; - } + if (copyhdr == 1) + n = m_gethdr(wait, m->m_type); + else + n = m_get(wait, m->m_type); } if (!n) - return NULL; + return NULL; /* ENOBUFS */ if (copyhdr && !m_dup_pkthdr(n, m, wait)) { m_free(n); ==== //depot/projects/netperf_socket/sys/kern/uipc_socket.c#11 (text+ko) ==== @@ -30,7 +30,7 @@ */ #include -__FBSDID("$FreeBSD: src/sys/kern/uipc_socket.c,v 1.170 2004/04/09 13:23:51 rwatson Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/uipc_socket.c,v 1.171 2004/05/31 21:46:04 bmilekic Exp $"); #include "opt_inet.h" #include "opt_mac.h" @@ -533,8 +533,8 @@ { struct mbuf **mp; struct mbuf *m; - long space, len, resid; - int clen = 0, error, dontroute, mlen; + long space, len = 0, resid; + int clen = 0, error, dontroute; int atomic = sosendallatonce(so) || top; #ifdef ZERO_COPY_SOCKETS int cow_send; @@ -626,27 +626,25 @@ cow_send = 0; #endif /* ZERO_COPY_SOCKETS */ SOCKBUF_UNLOCK(&so->so_snd); - if (top == 0) { - MGETHDR(m, M_TRYWAIT, MT_DATA); - if (m == NULL) { - error = ENOBUFS; - SOCKBUF_LOCK(&so->so_snd); /* XXX */ - goto release; - } - mlen = MHLEN; - m->m_pkthdr.len = 0; - m->m_pkthdr.rcvif = (struct ifnet *)0; - } else { - MGET(m, M_TRYWAIT, MT_DATA); - if (m == NULL) { - error = ENOBUFS; - SOCKBUF_LOCK(&so->so_snd); /* XXX */ - goto release; - } - mlen = MLEN; - } if (resid >= MINCLSIZE) { #ifdef ZERO_COPY_SOCKETS + if (top == NULL) { + MGETHDR(m, M_TRYWAIT, MT_DATA); + if (m == NULL) { + error = ENOBUFS; + SOCKBUF_LOCK(&so->so_snd); + goto release; + } + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = (struct ifnet *)0; + } else { + MGET(m, M_TRYWAIT, MT_DATA); + if (m == NULL) { + error = ENOBUFS; + SOCKBUF_LOCK(&so->so_snd); + goto release; + } + } if (so_zero_copy_send && resid>=PAGE_SIZE && space>=PAGE_SIZE && @@ -658,29 +656,49 @@ cow_send = socow_setup(m, uio); } } - if (!cow_send){ + if (!cow_send) { + MCLGET(m, M_TRYWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + m = NULL; + } else { + len = min(min(MCLBYTES, resid), space); + } + } else + len = PAGE_SIZE; +#else /* ZERO_COPY_SOCKETS */ + if (top == NULL) { + m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR); + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = (struct ifnet *)0; + } else + m = m_getcl(M_TRYWAIT, MT_DATA, 0); + len = min(min(MCLBYTES, resid), space); #endif /* ZERO_COPY_SOCKETS */ - MCLGET(m, M_TRYWAIT); - if ((m->m_flags & M_EXT) == 0) - goto nopages; - mlen = MCLBYTES; - len = min(min(mlen, resid), space); } else { -#ifdef ZERO_COPY_SOCKETS - len = PAGE_SIZE; + if (top == NULL) { + m = m_gethdr(M_TRYWAIT, MT_DATA); + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = (struct ifnet *)0; + + len = min(min(MHLEN, resid), space); + /* + * For datagram protocols, leave room + * for protocol headers in first mbuf. + */ + if (atomic && m && len < MHLEN) + MH_ALIGN(m, len); + } else { + m = m_get(M_TRYWAIT, MT_DATA); + len = min(min(MLEN, resid), space); } + } + if (m == NULL) { + error = ENOBUFS; + SOCKBUF_LOCK(&so->so_snd); + goto release; + } - } else { -#endif /* ZERO_COPY_SOCKETS */ -nopages: - len = min(min(mlen, resid), space); - /* - * For datagram protocols, leave room - * for protocol headers in first mbuf. - */ - if (atomic && top == 0 && len < mlen) - MH_ALIGN(m, len); - } space -= len; #ifdef ZERO_COPY_SOCKETS if (cow_send) @@ -739,6 +757,7 @@ } while (resid); release: + SOCKBUF_LOCK_ASSERT(&so->so_snd); sbunlock(&so->so_snd); out: SOCKBUF_UNLOCK(&so->so_snd); ==== //depot/projects/netperf_socket/sys/kern/uipc_socket2.c#9 (text+ko) ==== @@ -30,7 +30,7 @@ */ #include -__FBSDID("$FreeBSD: src/sys/kern/uipc_socket2.c,v 1.124 2004/05/19 00:22:10 ps Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/uipc_socket2.c,v 1.125 2004/05/31 21:46:04 bmilekic Exp $"); #include "opt_mac.h" #include "opt_param.h" @@ -1203,15 +1203,12 @@ if (CMSG_SPACE((u_int)size) > MCLBYTES) return ((struct mbuf *) NULL); - if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) + if (CMSG_SPACE((u_int)size > MLEN)) + m = m_getcl(M_DONTWAIT, MT_CONTROL, 0); + else + m = m_get(M_DONTWAIT, MT_CONTROL); + if (m == NULL) return ((struct mbuf *) NULL); - if (CMSG_SPACE((u_int)size) > MLEN) { - MCLGET(m, M_DONTWAIT); - if ((m->m_flags & M_EXT) == 0) { - m_free(m); - return ((struct mbuf *) NULL); - } - } cp = mtod(m, struct cmsghdr *); m->m_len = 0; KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m), ==== //depot/projects/netperf_socket/sys/kern/uipc_syscalls.c#11 (text+ko) ==== @@ -33,7 +33,7 @@ */ #include -__FBSDID("$FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.183 2004/05/08 02:24:21 rwatson Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.184 2004/05/31 21:46:04 bmilekic Exp $"); #include "opt_compat.h" #include "opt_ktrace.h" @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #ifdef KTRACE @@ -85,6 +86,21 @@ int compat); /* + * NSFBUFS-related variables and associated sysctls + */ +int nsfbufs; +int nsfbufspeak; +int nsfbufsused; + +SYSCTL_DECL(_kern_ipc); +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, + "Maximum number of sendfile(2) sf_bufs available"); +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, + "Number of sendfile(2) sf_bufs at peak usage"); +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, + "Number of sendfile(2) sf_bufs in use"); + +/* * System call interface to the socket abstraction. */ #if defined(COMPAT_43) || defined(COMPAT_SUNOS) ==== //depot/projects/netperf_socket/sys/kern/vfs_vnops.c#4 (text+ko) ==== @@ -35,7 +35,7 @@ */ #include -__FBSDID("$FreeBSD: src/sys/kern/vfs_vnops.c,v 1.198 2004/04/05 21:03:37 imp Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/vfs_vnops.c,v 1.199 2004/05/31 20:56:10 rwatson Exp $"); #include "opt_mac.h" @@ -929,6 +929,8 @@ struct mount *mp; int error; + GIANT_REQUIRED; + /* * If a vnode is provided, get and return the mount point that * to which it will write. @@ -1007,6 +1009,8 @@ struct mount *mp; { + GIANT_REQUIRED; + if (mp == NULL) return; mp->mnt_writeopcount--; ==== //depot/projects/netperf_socket/sys/sparc64/sparc64/vm_machdep.c#8 (text+ko) ==== @@ -40,7 +40,7 @@ * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * from: FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.167 2001/07/12 - * $FreeBSD: src/sys/sparc64/sparc64/vm_machdep.c,v 1.65 2004/05/26 12:09:39 tmm Exp $ + * $FreeBSD: src/sys/sparc64/sparc64/vm_machdep.c,v 1.66 2004/05/31 21:46:04 bmilekic Exp $ */ #include "opt_pmap.h" @@ -86,6 +86,10 @@ #include #include +#ifndef NSFBUFS +#define NSFBUFS (512 + maxusers * 16) +#endif + static void sf_buf_init(void *arg); SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) @@ -351,6 +355,9 @@ vm_offset_t sf_base; int i; + nsfbufs = NSFBUFS; + TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); + mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); SLIST_INIT(&sf_freelist.sf_head); sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); ==== //depot/projects/netperf_socket/sys/sys/mbuf.h#11 (text+ko) ==== @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -27,13 +27,18 @@ * SUCH DAMAGE. * * @(#)mbuf.h 8.5 (Berkeley) 2/19/95 - * $FreeBSD: src/sys/sys/mbuf.h,v 1.147 2004/05/29 05:36:43 maxim Exp $ + * $FreeBSD: src/sys/sys/mbuf.h,v 1.148 2004/05/31 21:46:05 bmilekic Exp $ */ #ifndef _SYS_MBUF_H_ #define _SYS_MBUF_H_ +/* XXX: These includes suck. Sorry! */ #include +#ifdef _KERNEL +#include +#include +#endif /* * Mbufs are of a single size, MSIZE (sys/param.h), which @@ -57,6 +62,16 @@ */ #define mtod(m, t) ((t)((m)->m_data)) #define dtom(x) ((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1))) + +/* + * Argument structure passed to UMA routines during mbuf and packet + * allocations. + */ +struct mb_args { + int flags; /* Flags for mbuf being allocated */ + int how; /* How to allocate: M_WAITOK or M_DONTWAIT */ + short type; /* Type of mbuf being allocated */ +}; #endif /* _KERNEL */ /* @@ -167,6 +182,7 @@ */ #define EXT_CLUSTER 1 /* mbuf cluster */ #define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */ +#define EXT_PACKET 3 /* came out of Packet zone */ #define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */ #define EXT_MOD_TYPE 200 /* custom module's ext_buf type */ #define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */ @@ -223,28 +239,12 @@ #define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */ /* - * Mbuf and cluster allocation statistics PCPU structure. - */ -struct mbpstat { - u_long mb_mbfree; - u_long mb_mbbucks; - u_long mb_clfree; - u_long mb_clbucks; - long mb_mbtypes[MT_NTYPES]; - short mb_active; -}; - -/* * General mbuf allocator statistics structure. - * XXX: Modifications of these are not protected by any mutex locks nor by - * any atomic() manipulations. As a result, we may occasionally lose - * a count or two. Luckily, not all of these fields are modified at all - * and remain static, and those that are manipulated are only manipulated - * in failure situations, which do not occur (hopefully) very often. */ struct mbstat { - u_long m_drops; /* times failed to allocate */ - u_long m_wait; /* times succesfully returned from wait */ + u_long m_mbufs; /* XXX */ + u_long m_mclusts; /* XXX */ + u_long m_drain; /* times drained protocols for space */ u_long m_mcfail; /* XXX: times m_copym failed */ u_long m_mpfail; /* XXX: times m_pullup failed */ @@ -253,10 +253,10 @@ u_long m_minclsize; /* min length of data to allocate a cluster */ u_long m_mlen; /* length of data in an mbuf */ u_long m_mhlen; /* length of data in a header mbuf */ - u_int m_mbperbuck; /* number of mbufs per "bucket" */ - u_int m_clperbuck; /* number of clusters per "bucket" */ - /* Number of mbtypes (gives # elems in mbpstat's mb_mbtypes[] array: */ + + /* Number of mbtypes (gives # elems in mbtypes[] array: */ short m_numtypes; + /* XXX: Sendfile stats should eventually move to their own struct */ u_long sf_iocnt; /* times sendfile had to do disk I/O */ u_long sf_allocfail; /* times sfbuf allocation failed */ @@ -265,14 +265,23 @@ /* * Flags specifying how an allocation should be made. - * M_DONTWAIT means "don't block if nothing is available" whereas - * M_TRYWAIT means "block for mbuf_wait ticks at most if nothing is - * available." + * + * The flag to use is as follows: + * - M_DONTWAIT or M_NOWAIT from an interrupt handler to not block allocation. + * - M_WAIT or M_WAITOK or M_TRYWAIT from wherever it is safe to block. + * + * M_DONTWAIT/M_NOWAIT means that we will not block the thread explicitly + * and if we cannot allocate immediately we may return NULL, + * whereas M_WAIT/M_WAITOK/M_TRYWAIT means that if we cannot allocate + * resources we will block until they are available, and thus never + * return NULL. + * + * XXX Eventually just phase this out to use M_WAITOK/M_NOWAIT. */ -#define M_DONTWAIT 0x4 /* don't conflict with M_NOWAIT */ -#define M_TRYWAIT 0x8 /* or M_WAITOK */ -#define M_WAIT M_TRYWAIT /* XXX: deprecated */ -#define MBTOM(how) ((how) & M_TRYWAIT ? M_WAITOK : M_NOWAIT) +#define MBTOM(how) (how) +#define M_DONTWAIT M_NOWAIT +#define M_TRYWAIT M_WAITOK +#define M_WAIT M_WAITOK #ifdef _KERNEL /*- @@ -296,12 +305,114 @@ #define MEXT_ADD_REF(m) atomic_add_int((m)->m_ext.ref_cnt, 1) /* + * Network buffer allocation API + * + * The rest of it is defined in kern/subr_mbuf.c + */ + +extern uma_zone_t zone_mbuf; +extern uma_zone_t zone_clust; +extern uma_zone_t zone_pack; + +static __inline struct mbuf *m_get(int how, short type); +static __inline struct mbuf *m_gethdr(int how, short type); +static __inline struct mbuf *m_getcl(int how, short type, int flags); +static __inline struct mbuf *m_getclr(int how, short type); /* XXX */ +static __inline struct mbuf *m_free(struct mbuf *m); +static __inline void m_clget(struct mbuf *m, int how); +static __inline void m_chtype(struct mbuf *m, short new_type); +void mb_free_ext(struct mbuf *); + >>> TRUNCATED FOR MAIL (1000 lines) <<<