Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 23 Sep 2007 06:25:37 GMT
From:      Kip Macy <kmacy@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 126711 for review
Message-ID:  <200709230625.l8N6PbZX023930@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=126711

Change 126711 by kmacy@kmacy_home:ethng on 2007/09/23 06:24:49

	- reinstate lazy refcnt assignment, in the common case of ref_cnt == 1 this 
	  saves us both a cache miss in uma_find_refcnt and a cache miss mb_free_ext 
	  when decrementing the refcnt
	- avoid touching the mbuf if MB_NOTAGS is passed - this allows us to avoid 
	  unnecessary cache misses when freeing an mbuf that is no longer hot in the 
	  cache
	- add M_NOFREE flag to skip freeing an mbuf - this allows us to embed an mbuf
	  header in the cluster itself, giving all zones the packet zone like benefits
	  without the complexity and restrictions that it adds
	- remove access to the packet zone from mbuf.h

Affected files ...

.. //depot/projects/ethng/src/sys/kern/kern_mbuf.c#6 edit
.. //depot/projects/ethng/src/sys/kern/uipc_mbuf.c#3 edit
.. //depot/projects/ethng/src/sys/sys/mbuf.h#6 edit

Differences ...

==== //depot/projects/ethng/src/sys/kern/kern_mbuf.c#6 (text+ko) ====

@@ -349,9 +349,10 @@
 mb_dtor_mbuf(void *mem, int size, void *arg)
 {
 	struct mbuf *m;
-
+	unsigned long flags = (unsigned long)arg;
+	
 	m = (struct mbuf *)mem;
-	if ((m->m_flags & M_PKTHDR) != 0)
+	if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0)
 		m_tag_delete_chain(m, NULL);
 	KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
 #ifdef INVARIANTS
@@ -378,7 +379,6 @@
 	KASSERT(m->m_ext.ext_args == NULL, ("%s: ext_args != NULL", __func__));
 	KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
 	KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__));
-	KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__));
 #ifdef INVARIANTS
 	trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
 #endif
@@ -404,7 +404,6 @@
 mb_ctor_clust(void *mem, int size, void *arg, int how)
 {
 	struct mbuf *m;
-	u_int *refcnt;
 	int type;
 	uma_zone_t zone;
 	
@@ -435,10 +434,7 @@
 		break;
 	}
 
-
 	if (arg != NULL) {
-		refcnt = uma_find_refcnt(zone, mem);
-		*refcnt = 1;			
 		m = (struct mbuf *)arg;
 		m->m_ext.ext_buf = (caddr_t)mem;
 		m->m_data = m->m_ext.ext_buf;
@@ -447,7 +443,7 @@
 		m->m_ext.ext_args = NULL;
 		m->m_ext.ext_size = size;
 		m->m_ext.ext_type = type;
-		m->m_ext.ref_cnt = refcnt;
+		m->m_ext.ref_cnt = NULL; /* lazy assignment */
 	}
 
 	return (0);

==== //depot/projects/ethng/src/sys/kern/uipc_mbuf.c#3 (text+ko) ====

@@ -220,18 +220,40 @@
 void
 mb_free_ext(struct mbuf *m)
 {
+	int skipmbuf;
+	int dofree;
+	u_int cnt;
+	
 	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
-	KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
+
+	/* Account for lazy ref count assign. */
+	if (m->m_ext.ref_cnt == NULL)
+		dofree = 1;
+	else
+		dofree = 0;	
+	
+	/*
+	 * check if the header is embedded in the cluster
+	 */	
+	skipmbuf = (m->m_flags & M_NOFREE);
+
+		/*
+	 * This is tricky.  We need to make sure to decrement the
+	 * refcount in a safe way but to also clean up if we're the
+	 * last reference.  This method seems to do it without race.
+	 */
+	while (dofree == 0) {
+		cnt = *(m->m_ext.ref_cnt);
+		if (atomic_cmpset_int(m->m_ext.ref_cnt, cnt, cnt - 1)) {
+			if (cnt == 1)
+				dofree = 1;
+			break;
+		}
+	}
 
 	/* Free attached storage if this mbuf is the only reference to it. */
-	if (*(m->m_ext.ref_cnt) == 1 ||
-	    atomic_fetchadd_int(m->m_ext.ref_cnt, -1) == 1) {
+	if (dofree) {
 		switch (m->m_ext.ext_type) {
-		case EXT_PACKET:	/* The packet zone is special. */
-			if (*(m->m_ext.ref_cnt) == 0)
-				*(m->m_ext.ref_cnt) = 1;
-			uma_zfree(zone_pack, m);
-			return;		/* Job done. */
 		case EXT_CLUSTER:
 			uma_zfree(zone_clust, m->m_ext.ext_buf);
 			break;
@@ -263,6 +285,10 @@
 				("%s: unknown ext_type", __func__));
 		}
 	}
+	
+	if (skipmbuf)
+		return;
+	
 	/*
 	 * Free this mbuf back to the mbuf zone with all m_ext
 	 * information purged.
@@ -285,13 +311,9 @@
 mb_dupcl(struct mbuf *n, struct mbuf *m)
 {
 	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
-	KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
 	KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
 
-	if (*(m->m_ext.ref_cnt) == 1)
-		*(m->m_ext.ref_cnt) += 1;
-	else
-		atomic_add_int(m->m_ext.ref_cnt, 1);
+	MEXT_ADD_REF(m);
 	n->m_ext.ext_buf = m->m_ext.ext_buf;
 	n->m_ext.ext_free = m->m_ext.ext_free;
 	n->m_ext.ext_args = m->m_ext.ext_args;

==== //depot/projects/ethng/src/sys/sys/mbuf.h#6 (text+ko) ====

@@ -44,6 +44,48 @@
 #endif
 #endif
 
+/*-
+ * mbuf external reference count management macros.
+ *
+ * MEXT_IS_REF(m): true if (m) is not the only mbuf referencing
+ *     the external buffer ext_buf.
+ *
+ * MEXT_REM_REF(m): remove reference to m_ext object.
+ *
+ * MEXT_ADD_REF(m): add reference to m_ext object already
+ *     referred to by (m).  XXX Note that it is VERY important that you
+ *     always set the second mbuf's m_ext.ref_cnt to point to the first
+ *     one's (i.e., n->m_ext.ref_cnt = m->m_ext.ref_cnt) AFTER you run
+ *     MEXT_ADD_REF(m).  This is because m might have a lazy initialized
+ *     ref_cnt (NULL) before this is run and it will only be looked up
+ *     from here.  We should make MEXT_ADD_REF() always take two mbufs
+ *     as arguments so that it can take care of this itself.
+ */
+#define	MEXT_IS_REF(m)	(((m)->m_ext.ref_cnt != NULL)			\
+    && (*((m)->m_ext.ref_cnt) > 1))
+
+#define	MEXT_REM_REF(m) do {						\
+	KASSERT((m)->m_ext.ref_cnt != NULL, ("m_ext refcnt lazy NULL")); \
+	KASSERT(*((m)->m_ext.ref_cnt) > 0, ("m_ext refcnt < 0"));	\
+	atomic_subtract_int((m)->m_ext.ref_cnt, 1);			\
+} while(0)
+
+#define	MEXT_ADD_REF(m)	do {						\
+	if ((m)->m_ext.ref_cnt == NULL) {				\
+		KASSERT((m)->m_ext.ext_type == EXT_CLUSTER ||		\
+		    (m)->m_ext.ext_type == EXT_PACKET ||		\
+		    (m)->m_ext.ext_type == EXT_JUMBOP ||		\
+		    (m)->m_ext.ext_type == EXT_JUMBO9 ||		\
+		    (m)->m_ext.ext_type == EXT_JUMBO16,			\
+		    ("Unexpected mbuf type has lazy refcnt %d",		\
+			(m)->m_ext.ext_type));				\
+		(m)->m_ext.ref_cnt = (u_int *)uma_find_refcnt(		\
+		    zone_clust, (m)->m_ext.ext_buf);			\
+		*((m)->m_ext.ref_cnt) = 2;				\
+	} else								\
+		atomic_add_int((m)->m_ext.ref_cnt, 1);			\
+} while (0)
+
 /*
  * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead.
  * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in
@@ -130,13 +172,13 @@
  * Description of external storage mapped into mbuf; valid only if M_EXT is
  * set.
  */
-struct m_ext {
+struct m_ext_ {
 	caddr_t		 ext_buf;	/* start of buffer */
 	void		(*ext_free)	/* free routine if not the usual */
 			    (void *, void *);
 	void		*ext_args;	/* optional argument pointer */
+	volatile u_int	*ref_cnt;	/* pointer to ref count info */
 	u_int		 ext_size;	/* size of buffer, for ext_free */
-	volatile u_int	*ref_cnt;	/* pointer to ref count info */
 	int		 ext_type;	/* type of external storage */
 };
 
@@ -150,7 +192,7 @@
 		struct {
 			struct pkthdr	MH_pkthdr;	/* M_PKTHDR set */
 			union {
-				struct m_ext	MH_ext;	/* M_EXT set */
+				struct m_ext_	MH_ext;	/* M_EXT set */
 				char		MH_databuf[MHLEN];
 			} MH_dat;
 		} MH;
@@ -196,6 +238,7 @@
 #define	M_LASTFRAG	0x2000	/* packet is last fragment */
 #define	M_VLANTAG	0x10000	/* ether_vtag is valid */
 #define	M_PROMISC	0x20000	/* packet was not for us */
+#define	M_NOFREE	0x40000	/* do not free the mbuf - its embedded in a cluster */
 
 /*
  * External buffer types: identify ext_buf type.
@@ -207,6 +250,8 @@
 #define	EXT_JUMBO16	5	/* jumbo cluster 16184 bytes */
 #define	EXT_PACKET	6	/* mbuf+cluster from packet zone */
 #define	EXT_MBUF	7	/* external mbuf reference (M_IOVEC) */
+#define EXT_IOVEC       8
+#define EXT_CLIOVEC     9
 #define	EXT_NET_DRV	100	/* custom ext_buf provided by net driver(s) */
 #define	EXT_MOD_TYPE	200	/* custom module's ext_buf type */
 #define	EXT_DISPOSABLE	300	/* can throw this buffer away w/page flipping */
@@ -251,6 +296,8 @@
 #define	MT_DATA		1	/* dynamic (data) allocation */
 #define	MT_HEADER	MT_DATA	/* packet header, use M_PKTHDR instead */
 #define	MT_SONAME	8	/* socket name */
+#define MT_IOVEC        9
+#define MT_CLIOVEC      10
 #define	MT_CONTROL	14	/* extra-data protocol message */
 #define	MT_OOBDATA	15	/* expedited data  */
 #define	MT_NTYPES	16	/* number of mbuf types for mbtypes[] */
@@ -258,6 +305,9 @@
 #define	MT_NOINIT	255	/* Not a type but a flag to allocate
 				   a non-initialized mbuf */
 
+#define MB_NOTAGS       0x1UL
+
+
 /*
  * General mbuf allocator statistics structure.
  *
@@ -339,7 +389,6 @@
 
 extern uma_zone_t	zone_mbuf;
 extern uma_zone_t	zone_clust;
-extern uma_zone_t	zone_pack;
 extern uma_zone_t	zone_jumbop;
 extern uma_zone_t	zone_jumbo9;
 extern uma_zone_t	zone_jumbo16;
@@ -458,11 +507,7 @@
 static __inline struct mbuf *
 m_getcl(int how, short type, int flags)
 {
-	struct mb_args args;
-
-	args.flags = flags;
-	args.type = type;
-	return ((struct mbuf *)(uma_zalloc_arg(zone_pack, &args, how)));
+	return ((struct mbuf *)(m_getjcl(how, type, flags, MCLBYTES)));
 }
 
 /*
@@ -501,12 +546,18 @@
 
 	if (m->m_flags & M_EXT)
 		mb_free_ext(m);
-	else
+	else if ((m->m_flags & M_NOFREE) == 0)
 		uma_zfree(zone_mbuf, m);
 	return (n);
 }
 
 static __inline void
+m_free_fast(struct mbuf *m)
+{
+	uma_zfree_arg(zone_mbuf, m, (void *)MB_NOTAGS);
+}
+
+static __inline void
 m_clget(struct mbuf *m, int how)
 {
 
@@ -514,14 +565,6 @@
 		printf("%s: %p mbuf already has cluster\n", __func__, m);
 	m->m_ext.ext_buf = (char *)NULL;
 	uma_zalloc_arg(zone_clust, m, how);
-	/*
-	 * On a cluster allocation failure, drain the packet zone and retry,
-	 * we might be able to loosen a few clusters up on the drain.
-	 */
-	if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) {
-		zone_drain(zone_pack);
-		uma_zalloc_arg(zone_clust, m, how);
-	}
 }
 
 /*
@@ -618,7 +661,7 @@
  */
 #define	M_WRITABLE(m)	(!((m)->m_flags & M_RDONLY) &&			\
 			 (!(((m)->m_flags & M_EXT)) ||			\
-			 (*((m)->m_ext.ref_cnt) == 1)) )		\
+			     !MEXT_IS_REF(m)))				\
 
 /* Check if the supplied mbuf has a packet header, or else panic. */
 #define	M_ASSERTPKTHDR(m)						\



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200709230625.l8N6PbZX023930>