Date: Sun, 23 Sep 2007 06:25:37 GMT From: Kip Macy <kmacy@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 126711 for review Message-ID: <200709230625.l8N6PbZX023930@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=126711 Change 126711 by kmacy@kmacy_home:ethng on 2007/09/23 06:24:49 - reinstate lazy refcnt assignment, in the common case of ref_cnt == 1 this saves us both a cache miss in uma_find_refcnt and a cache miss mb_free_ext when decrementing the refcnt - avoid touching the mbuf if MB_NOTAGS is passed - this allows us to avoid unnecessary cache misses when freeing an mbuf that is no longer hot in the cache - add M_NOFREE flag to skip freeing an mbuf - this allows us to embed an mbuf header in the cluster itself, giving all zones the packet zone like benefits without the complexity and restrictions that it adds - remove access to the packet zone from mbuf.h Affected files ... .. //depot/projects/ethng/src/sys/kern/kern_mbuf.c#6 edit .. //depot/projects/ethng/src/sys/kern/uipc_mbuf.c#3 edit .. //depot/projects/ethng/src/sys/sys/mbuf.h#6 edit Differences ... ==== //depot/projects/ethng/src/sys/kern/kern_mbuf.c#6 (text+ko) ==== @@ -349,9 +349,10 @@ mb_dtor_mbuf(void *mem, int size, void *arg) { struct mbuf *m; - + unsigned long flags = (unsigned long)arg; + m = (struct mbuf *)mem; - if ((m->m_flags & M_PKTHDR) != 0) + if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0) m_tag_delete_chain(m, NULL); KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); #ifdef INVARIANTS @@ -378,7 +379,6 @@ KASSERT(m->m_ext.ext_args == NULL, ("%s: ext_args != NULL", __func__)); KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); - KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__)); #ifdef INVARIANTS trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); #endif @@ -404,7 +404,6 @@ mb_ctor_clust(void *mem, int size, void *arg, int how) { struct mbuf *m; - u_int *refcnt; int type; uma_zone_t zone; @@ -435,10 +434,7 @@ break; } - if (arg != NULL) { - refcnt = uma_find_refcnt(zone, mem); - *refcnt = 1; m = (struct mbuf *)arg; m->m_ext.ext_buf = (caddr_t)mem; m->m_data = m->m_ext.ext_buf; @@ -447,7 +443,7 @@ m->m_ext.ext_args = NULL; m->m_ext.ext_size = size; m->m_ext.ext_type = type; - m->m_ext.ref_cnt = refcnt; + m->m_ext.ref_cnt = NULL; /* lazy assignment */ } return (0); ==== //depot/projects/ethng/src/sys/kern/uipc_mbuf.c#3 (text+ko) ==== @@ -220,18 +220,40 @@ void mb_free_ext(struct mbuf *m) { + int skipmbuf; + int dofree; + u_int cnt; + KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); - KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__)); + + /* Account for lazy ref count assign. */ + if (m->m_ext.ref_cnt == NULL) + dofree = 1; + else + dofree = 0; + + /* + * check if the header is embedded in the cluster + */ + skipmbuf = (m->m_flags & M_NOFREE); + + /* + * This is tricky. We need to make sure to decrement the + * refcount in a safe way but to also clean up if we're the + * last reference. This method seems to do it without race. + */ + while (dofree == 0) { + cnt = *(m->m_ext.ref_cnt); + if (atomic_cmpset_int(m->m_ext.ref_cnt, cnt, cnt - 1)) { + if (cnt == 1) + dofree = 1; + break; + } + } /* Free attached storage if this mbuf is the only reference to it. */ - if (*(m->m_ext.ref_cnt) == 1 || - atomic_fetchadd_int(m->m_ext.ref_cnt, -1) == 1) { + if (dofree) { switch (m->m_ext.ext_type) { - case EXT_PACKET: /* The packet zone is special. */ - if (*(m->m_ext.ref_cnt) == 0) - *(m->m_ext.ref_cnt) = 1; - uma_zfree(zone_pack, m); - return; /* Job done. */ case EXT_CLUSTER: uma_zfree(zone_clust, m->m_ext.ext_buf); break; @@ -263,6 +285,10 @@ ("%s: unknown ext_type", __func__)); } } + + if (skipmbuf) + return; + /* * Free this mbuf back to the mbuf zone with all m_ext * information purged. @@ -285,13 +311,9 @@ mb_dupcl(struct mbuf *n, struct mbuf *m) { KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); - KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__)); KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); - if (*(m->m_ext.ref_cnt) == 1) - *(m->m_ext.ref_cnt) += 1; - else - atomic_add_int(m->m_ext.ref_cnt, 1); + MEXT_ADD_REF(m); n->m_ext.ext_buf = m->m_ext.ext_buf; n->m_ext.ext_free = m->m_ext.ext_free; n->m_ext.ext_args = m->m_ext.ext_args; ==== //depot/projects/ethng/src/sys/sys/mbuf.h#6 (text+ko) ==== @@ -44,6 +44,48 @@ #endif #endif +/*- + * mbuf external reference count management macros. + * + * MEXT_IS_REF(m): true if (m) is not the only mbuf referencing + * the external buffer ext_buf. + * + * MEXT_REM_REF(m): remove reference to m_ext object. + * + * MEXT_ADD_REF(m): add reference to m_ext object already + * referred to by (m). XXX Note that it is VERY important that you + * always set the second mbuf's m_ext.ref_cnt to point to the first + * one's (i.e., n->m_ext.ref_cnt = m->m_ext.ref_cnt) AFTER you run + * MEXT_ADD_REF(m). This is because m might have a lazy initialized + * ref_cnt (NULL) before this is run and it will only be looked up + * from here. We should make MEXT_ADD_REF() always take two mbufs + * as arguments so that it can take care of this itself. + */ +#define MEXT_IS_REF(m) (((m)->m_ext.ref_cnt != NULL) \ + && (*((m)->m_ext.ref_cnt) > 1)) + +#define MEXT_REM_REF(m) do { \ + KASSERT((m)->m_ext.ref_cnt != NULL, ("m_ext refcnt lazy NULL")); \ + KASSERT(*((m)->m_ext.ref_cnt) > 0, ("m_ext refcnt < 0")); \ + atomic_subtract_int((m)->m_ext.ref_cnt, 1); \ +} while(0) + +#define MEXT_ADD_REF(m) do { \ + if ((m)->m_ext.ref_cnt == NULL) { \ + KASSERT((m)->m_ext.ext_type == EXT_CLUSTER || \ + (m)->m_ext.ext_type == EXT_PACKET || \ + (m)->m_ext.ext_type == EXT_JUMBOP || \ + (m)->m_ext.ext_type == EXT_JUMBO9 || \ + (m)->m_ext.ext_type == EXT_JUMBO16, \ + ("Unexpected mbuf type has lazy refcnt %d", \ + (m)->m_ext.ext_type)); \ + (m)->m_ext.ref_cnt = (u_int *)uma_find_refcnt( \ + zone_clust, (m)->m_ext.ext_buf); \ + *((m)->m_ext.ref_cnt) = 2; \ + } else \ + atomic_add_int((m)->m_ext.ref_cnt, 1); \ +} while (0) + /* * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead. * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in @@ -130,13 +172,13 @@ * Description of external storage mapped into mbuf; valid only if M_EXT is * set. */ -struct m_ext { +struct m_ext_ { caddr_t ext_buf; /* start of buffer */ void (*ext_free) /* free routine if not the usual */ (void *, void *); void *ext_args; /* optional argument pointer */ + volatile u_int *ref_cnt; /* pointer to ref count info */ u_int ext_size; /* size of buffer, for ext_free */ - volatile u_int *ref_cnt; /* pointer to ref count info */ int ext_type; /* type of external storage */ }; @@ -150,7 +192,7 @@ struct { struct pkthdr MH_pkthdr; /* M_PKTHDR set */ union { - struct m_ext MH_ext; /* M_EXT set */ + struct m_ext_ MH_ext; /* M_EXT set */ char MH_databuf[MHLEN]; } MH_dat; } MH; @@ -196,6 +238,7 @@ #define M_LASTFRAG 0x2000 /* packet is last fragment */ #define M_VLANTAG 0x10000 /* ether_vtag is valid */ #define M_PROMISC 0x20000 /* packet was not for us */ +#define M_NOFREE 0x40000 /* do not free the mbuf - its embedded in a cluster */ /* * External buffer types: identify ext_buf type. @@ -207,6 +250,8 @@ #define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ #define EXT_PACKET 6 /* mbuf+cluster from packet zone */ #define EXT_MBUF 7 /* external mbuf reference (M_IOVEC) */ +#define EXT_IOVEC 8 +#define EXT_CLIOVEC 9 #define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */ #define EXT_MOD_TYPE 200 /* custom module's ext_buf type */ #define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */ @@ -251,6 +296,8 @@ #define MT_DATA 1 /* dynamic (data) allocation */ #define MT_HEADER MT_DATA /* packet header, use M_PKTHDR instead */ #define MT_SONAME 8 /* socket name */ +#define MT_IOVEC 9 +#define MT_CLIOVEC 10 #define MT_CONTROL 14 /* extra-data protocol message */ #define MT_OOBDATA 15 /* expedited data */ #define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */ @@ -258,6 +305,9 @@ #define MT_NOINIT 255 /* Not a type but a flag to allocate a non-initialized mbuf */ +#define MB_NOTAGS 0x1UL + + /* * General mbuf allocator statistics structure. * @@ -339,7 +389,6 @@ extern uma_zone_t zone_mbuf; extern uma_zone_t zone_clust; -extern uma_zone_t zone_pack; extern uma_zone_t zone_jumbop; extern uma_zone_t zone_jumbo9; extern uma_zone_t zone_jumbo16; @@ -458,11 +507,7 @@ static __inline struct mbuf * m_getcl(int how, short type, int flags) { - struct mb_args args; - - args.flags = flags; - args.type = type; - return ((struct mbuf *)(uma_zalloc_arg(zone_pack, &args, how))); + return ((struct mbuf *)(m_getjcl(how, type, flags, MCLBYTES))); } /* @@ -501,12 +546,18 @@ if (m->m_flags & M_EXT) mb_free_ext(m); - else + else if ((m->m_flags & M_NOFREE) == 0) uma_zfree(zone_mbuf, m); return (n); } static __inline void +m_free_fast(struct mbuf *m) +{ + uma_zfree_arg(zone_mbuf, m, (void *)MB_NOTAGS); +} + +static __inline void m_clget(struct mbuf *m, int how) { @@ -514,14 +565,6 @@ printf("%s: %p mbuf already has cluster\n", __func__, m); m->m_ext.ext_buf = (char *)NULL; uma_zalloc_arg(zone_clust, m, how); - /* - * On a cluster allocation failure, drain the packet zone and retry, - * we might be able to loosen a few clusters up on the drain. - */ - if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { - zone_drain(zone_pack); - uma_zalloc_arg(zone_clust, m, how); - } } /* @@ -618,7 +661,7 @@ */ #define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \ (!(((m)->m_flags & M_EXT)) || \ - (*((m)->m_ext.ref_cnt) == 1)) ) \ + !MEXT_IS_REF(m))) \ /* Check if the supplied mbuf has a packet header, or else panic. */ #define M_ASSERTPKTHDR(m) \
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200709230625.l8N6PbZX023930>