Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 12 Nov 1999 01:48:34 -0500 (EST)
From:      Bosko Milekic <bmilekic@dsuper.net>
To:        freebsd-hackers@freebsd.org
Subject:   mbuf wait code (revisited) -- review?
Message-ID:  <Pine.OSF.4.05.9911120105460.3935-100000@oracle.dsuper.net>

next in thread | raw e-mail | index | archive | help

Hi,

	Attached are some diffs that provide a couple of wait routines in the
out-of-mbuf and/or out-of-mbuf-cluster case(s). The attached diffs are for -STABLE
and I would be greatful if somebody could review them/give feedback. I have diffs for
-CURRENT but am not posting them because I haven't had too much of a chance to test
the code -- whereas these below have been tested for a while now on several -STABLE
machines.
	Since the problematic situation has been described numerous times before both
on the list and in several PRs, I am not going to go over it again. Instead, a
[fairly] accurate description of the situation in question can be found at:

	http://www.freebsd.org/cgi/query-pr.cgi?pr=14042

	[Note that the patches posted in the original PR should not be considered.]

	There are several other open PRs which refer to a similar problem.
Furthermore, I've also spotted at least one other PR which addresses a potentially
related issue:

	http://www.freebsd.org/cgi/query-pr.cgi?pr=9883

	The above PR mentions MGET turning the provided mbuf pointer to a NULL
pointer even if the call was made with M_WAIT. I don't see how this can be the case,
especially since presently the code is set to panic() in the m_reclaim when out of
mbufs and calling with M_WAIT. In any case, with the code below, MGET will
potentially be capable of setting that NULL pointer, which is something that really
can't be avoided even if the call is made with M_WAIT. The whole idea behind the
provided diffs is to add a 'sleep' time before actually deciding to explicitly
"fail" -- this sleep time can be modified dynamically, the diffs add a sysctl
kern.ipc.mbuf_wait to tune the sleep time in the tsleep().

	Anyway, I would really appreciate feedback and/or suggestions. Furthermore,
if anybody's interested in testing it, I can post the -CURRENT version of the diffs (which are only
slightly different). Finally, if this looks good, the next step would be to search
and dig through all the code that uses the MGET, MGETHDR, MCLGET, MCLALLOC macros and
m_get, m_gethdr, m_clalloc functions in order to make sure that all of that code
checks whether the returned pointer is referencing a NULL (most of the problematic
code resides in sys/nfs, from what I've seen.


--
 Bosko Milekic <bmilekic@technokratis.com>

	"I counted the steps in my walks and calculated the cubic contents of soup
plates, coffee cups, and pieces of food -- otherwise my meal was unenjoyable. All
repeated acts or operations I performed had to be divisible by three and if I missed
I felt impelled to do it all over again, even if it took hours."
                                   -- Nikola Tesla, 1919. 



(Note: If the diffs below generate problems, please let me know and I'll post this
stuff somewhere on the WWW).
--snip snip--

diff -ruN sys.old/conf/param.c sys/conf/param.c
--- sys.old/conf/param.c        Sun Oct 31 23:34:16 1999
+++ sys/conf/param.c    Mon Nov  1 20:07:46 1999
@@ -82,6 +82,7 @@
 int    maxfiles = MAXFILES;                    /* system wide open files limit */
 int    maxfilesperproc = MAXFILES;             /* per-process open files limit */
 int    ncallout = 16 + NPROC + MAXFILES;       /* maximum # of timer events */
+int    mbuf_wait = 32;                         /* mbuf sleep time */

 /* maximum # of mbuf clusters */
 #ifndef NMBCLUSTERS
diff -ruN sys.old/kern/uipc_mbuf.c sys/kern/uipc_mbuf.c
--- sys.old/kern/uipc_mbuf.c    Wed Sep  8 20:45:50 1999
+++ sys/kern/uipc_mbuf.c        Fri Nov  5 21:44:51 1999
@@ -47,6 +47,10 @@
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>

+#ifdef INVARIANTS
+#include <machine/cpu.h>
+#endif
+
 static void mbinit __P((void *));
 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)

@@ -60,6 +64,8 @@
 int    max_hdr;
 int    max_datalen;

+static u_int m_mballoc_wid = 0, m_clalloc_wid = 0;
+
 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
           &max_linkhdr, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
@@ -67,13 +73,14 @@
 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
           &max_datalen, 0, "");
+SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW,
+          &mbuf_wait, 0, "");
 SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RW, &mbstat, mbstat, "");
 
 static void    m_reclaim __P((void));
 
 /* "number of clusters of pages" */
 #define NCL_INIT       1
-
  #define NMB_INIT       16
 
 /* ARGSUSED*/
@@ -125,6 +132,9 @@
         * any more (nothing is ever freed back to the map) (XXX which
         * is dumb). (however you are not dead as m_reclaim might
         * still be able to free a substantial amount of space).
+        * XXX Furthermore, we can also work with "recycled" mbufs (when
+        * we're calling with M_WAIT the sleep procedure will be waked
+        * up when an mbuf is freed. See m_mballoc_wait()
         */
        if (mb_map_full)
                return (0);
@@ -153,6 +163,55 @@
        return (1);
 }
 
+/*
+ * The m_mballoc_wait function below is used from the mbuf
+ * allocation macros once the mb_map has been exhausted and if
+ * the caller is calling with M_WAIT in order to wait
+ * for a "recycled" mbuf to be freed.
+ */
+struct mbuf *
+m_mballoc_wait(char caller, u_short type)
+{
+       struct mbuf *p;
+
+       /* Sleep until something's available or until we expire. */
+       m_mballoc_wid++;
+       if ((tsleep(&m_mballoc_wid, PVM, "mballc",
+           mbuf_wait)) == EWOULDBLOCK)
+               m_mballoc_wid--;
+      
+       /*
+        * Now that we (think) that we've got something, we will redo an
+        * MGET, but avoid getting into another instance of m_mballoc_wait()
+        */
+       p = 0;
+#define m_mballoc_wait(caller,type)    (struct mbuf *)0
+       if (caller & MGETHDR_C)
+               MGETHDR(p, M_WAIT, type);
+       else
+               MGETHDR(p, M_WAIT, type);
+#undef m_mballoc_wait
+
+       if (p != 0)     /* We waited and got something... */
+               mbstat.m_wait++;
+
+       return (p);
+}
+
+/*
+ * The m_mballoc_wakeup function below is used in order
+ * to wakeup the instance(s) of m_mballoc_wait() which
+ * are waiting on an mbuf to be freed.
+ */
+void
+m_mballoc_wakeup(void)
+{
+       if (m_mballoc_wid) {
+               m_mballoc_wid = 0;
+               wakeup(&m_mballoc_wid);
+       }
+}
+
 #if MCLBYTES > PAGE_SIZE
 static int i_want_my_mcl;
 
@@ -199,7 +258,8 @@
        /*
         * Once we run out of map space, it will be impossible
         * to get any more (nothing is ever freed back to the
-        * map).
+        * map). From this point on, we solely rely on reclaimed
+        * mbufs.
         */
        if (mb_map_full) {
                mbstat.m_drops++;
@@ -242,6 +302,55 @@
 }
 
 /*
+ * The m_clalloc_wait function below is used from the mbuf cluster
+ * allocation macros once the mb_map has been exhausted and if the
+ * caller is calling with M_WAIT in order to wait for a "recycled"
+ * mbuf cluster to be freed.
+ */
+caddr_t
+m_clalloc_wait(void)
+{
+       caddr_t p;
+
+       /* If in an interrupt context, and ifdef INVARIANTS, die. */
+       KASSERT(intr_nesting_level == 0,("CLALLOC: CANNOT WAIT IN INTERRUPT"));
+
+       /* Sleep until something's available or until we expire. */
+       m_clalloc_wid++;
+       if ((tsleep(&m_clalloc_wid, PVM, "mclalc",
+           mbuf_wait)) == EWOULDBLOCK)
+               m_clalloc_wid--;
+      
+       /*
+        * Now that we (think) that we've got something, we will redo and
+        * MGET, but avoid getting into another instance of m_clalloc_wait()
+        */
+       p = 0;
+#define m_clalloc_wait()       (caddr_t)0
+       MCLALLOC(p,M_WAIT);
+#undef m_clalloc_wait
+
+       if (p != 0)     /* We waited and got something... */
+               mbstat.m_wait++;
+
+       return (p);
+}
+
+/*
+ * The m_clalloc_wakeup function below is used in order
+ * to wakeup the instance(s) of m_clalloc_wait() which
+ * are waiting for an mbuf cluster to be freed.
+ */
+void
+m_clalloc_wakeup(void)
+{
+       if (m_clalloc_wid) {
+               m_clalloc_wid = 0;
+               wakeup(&m_clalloc_wid);
+       }
+}
+
+/*
  * When MGET fails, ask protocols to free space when short of memory,
  * then re-attempt to allocate an mbuf.
  */
@@ -254,19 +363,28 @@
        /*
         * Must only do the reclaim if not in an interrupt context.
         */
-       if (i == M_WAIT)
+       if (i == M_WAIT) {
+               KASSERT(intr_nesting_level == 0,
+                   ("MBALLOC: CANNOT WAIT IN INTERRUPT"));
                m_reclaim();
+       }
+
+       /*
+        * Both m_mballoc_wait and m_retry must be nulled because
+        * when the MGET macro is run from here, we deffinately do _not_
+        * want to enter an instance of m_mballoc_wait() or m_retry() (again!)
+        */
+#define m_mballoc_wait(caller,type)    (struct mbuf *)0
 #define m_retry(i, t)  (struct mbuf *)0
        MGET(m, i, t);
 #undef m_retry
+#undef m_mballoc_wait
+
        if (m != NULL) {
                mbstat.m_wait++;
-       } else {
-               if (i == M_DONTWAIT)
-                       mbstat.m_drops++;
-               else
+               mbstat.m_drops++;
+
        return (m);
 }
 
@@ -282,19 +400,23 @@
        /*
         * Must only do the reclaim if not in an interrupt context.
         */
-       if (i == M_WAIT)
+       if (i == M_WAIT) {
+               KASSERT(intr_nesting_level == 0,
+                   ("MBALLOC: CANNOT WAIT IN INTERRUPT"));
                m_reclaim();
+       }
+
+#define m_mballoc_wait(caller,type) (struct mbuf *)0
 #define m_retryhdr(i, t) (struct mbuf *)0
        MGETHDR(m, i, t);
 #undef m_retryhdr
+#undef m_mballoc_wait
+
        if (m != NULL) {
                mbstat.m_wait++;
-       } else {
-               if (i == M_DONTWAIT)
-                       mbstat.m_drops++;
-               else
-                       panic("Out of mbuf clusters");
-       }
+       } else
+               mbstat.m_drops++;
+
        return (m);
 }

diff -ruN sys.old/kern/uipc_socket.c sys/kern/uipc_socket.c
--- sys.old/kern/uipc_socket.c  Wed Sep  8 20:45:50 1999
+++ sys/kern/uipc_socket.c      Fri Nov  5 21:45:49 1999
@@ -486,15 +486,27 @@
                    } else do {
                        if (top == 0) {
                                MGETHDR(m, M_WAIT, MT_DATA);
+                               if (m == NULL) {
+                                       error = ENOBUFS;
+                                       goto release;
+                               }
                                mlen = MHLEN;
                                m->m_pkthdr.len = 0;
                                m->m_pkthdr.rcvif = (struct ifnet *)0;
                        } else {
                                MGET(m, M_WAIT, MT_DATA);
+                               if (m == NULL) {
+                                       error = ENOBUFS;
+                                       goto release;
+                               }
                                mlen = MLEN;
                        }
                        if (resid >= MINCLSIZE) {
                                MCLGET(m, M_WAIT);
+                               if (m == NULL) {
+                                       error = ENOBUFS;
+                                       goto release;
+                               }
                                if ((m->m_flags & M_EXT) == 0)
                                        goto nopages;
                                mlen = MCLBYTES;
@@ -606,6 +618,10 @@
                flags = 0;
        if (flags & MSG_OOB) {
                m = m_get(M_WAIT, MT_DATA);
+               if (m == NULL) {
+                       error = ENOBUFS;
+                       goto release;
+               }
                error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
                if (error)
                        goto bad;
diff -ruN sys.old/kern/uipc_syscalls.c sys/kern/uipc_syscalls.c
--- sys.old/kern/uipc_syscalls.c        Wed Sep  8 20:45:50 1999
+++ sys/kern/uipc_syscalls.c    Fri Nov  5 21:46:27 1999
@@ -1620,6 +1620,10 @@
                 * Get an mbuf header and set it up as having external storage.
                 */
                MGETHDR(m, M_WAIT, MT_DATA);
+               if (m == NULL) {
+                       error = ENOBUFS;
+                       goto done;
+               }
                m->m_ext.ext_free = sf_buf_free;
                m->m_ext.ext_ref = sf_buf_ref;
                m->m_ext.ext_buf = (void *)sf->kva;
diff -ruN sys.old/sys/mbuf.h sys/sys/mbuf.h
--- sys.old/sys/mbuf.h  Sun Oct 31 23:33:54 1999
+++ sys/sys/mbuf.h      Mon Nov  1 20:07:47 1999
@@ -153,6 +153,14 @@
 #define        M_DONTWAIT      1
 #define        M_WAIT          0

+/*
+ * Byte-long flag passed to the m_mballoc_wait function, allowing
+ * us to determine that the call came from an MGETHDR and not an MGET --
+ * this way we are sure to run the MGETHDR macro when the call came from there.
+ */
+#define MGETHDR_C      0x01
+#define MGET_C         0x02
+
 /* Freelists:
  *
  * Normal mbuf clusters are normally treated as character arrays
@@ -187,7 +195,8 @@
  * allocates an mbuf and initializes it to contain a packet header
  * and internal data.
  */
-#define        MGET(m, how, type) { \
+#define        MGET(m, how, type) \
+       do { \
          int _ms = splimp(); \
          if (mmbfree == 0) \
                (void)m_mballoc(1, (how)); \
@@ -201,13 +210,15 @@
                (m)->m_data = (m)->m_dat; \
                (m)->m_flags = 0; \
                splx(_ms); \
-       } else { \
+         } else { \
                splx(_ms); \
-               (m) = m_retry((how), (type)); \
-       } \
-}
+               if (((m)=m_retry((how), (type)))==NULL && (how)==M_WAIT) \
+                       (m) = m_mballoc_wait(MGET_C,(type)); \
+         } \
+       } while(0)

-#define        MGETHDR(m, how, type) { \
+#define        MGETHDR(m, how, type) \
+       do { \
          int _ms = splimp(); \
          if (mmbfree == 0) \
                (void)m_mballoc(1, (how)); \
@@ -221,11 +232,12 @@
                (m)->m_data = (m)->m_pktdat; \
                (m)->m_flags = M_PKTHDR; \
                splx(_ms); \
-       } else { \
+          } else { \
                splx(_ms); \
-               (m) = m_retryhdr((how), (type)); \
-       } \
-}
+               if (((m)=m_retryhdr((how),(type)))==NULL && (how)==M_WAIT) \
+                       (m) = m_mballoc_wait(MGETHDR_C,(type)); \
+          } \
+       } while(0)

 /*
  * Mbuf cluster macros.
@@ -236,15 +248,20 @@
  * freeing the cluster if the reference count has reached 0.
  */
 #define        MCLALLOC(p, how) \
-       MBUFLOCK( \
+       do { \
+         int _ms = splimp(); \
          if (mclfree == 0) \
                (void)m_clalloc(1, (how)); \
          if (((p) = (caddr_t)mclfree) != 0) { \
                ++mclrefcnt[mtocl(p)]; \
                mbstat.m_clfree--; \
                mclfree = ((union mcluster *)(p))->mcl_next; \
+               splx(_ms); \
+         } else if ((how) == M_WAIT) { \
+               splx(_ms); \
+               (p) = m_clalloc_wait(); \
          } \
-       )
+       } while(0)

 #define        MCLGET(m, how) \
        { MCLALLOC((m)->m_ext.ext_buf, (how)); \
@@ -263,6 +280,7 @@
                ((union mcluster *)(p))->mcl_next = mclfree; \
                mclfree = (union mcluster *)(p); \
                mbstat.m_clfree++; \
+               (void)m_clalloc_wakeup(); \
          } \
        )

@@ -284,6 +302,7 @@
                                ((union mcluster *)(p))->mcl_next = mclfree; \
                                mclfree = (union mcluster *)(p); \
                                mbstat.m_clfree++; \
+                               (void)m_clalloc_wakeup(); \
                        } \
                } \
          } \
@@ -292,6 +311,7 @@
          mbstat.m_mtypes[MT_FREE]++; \
          (m)->m_next = mmbfree; \
          mmbfree = (m); \
+         (void)m_mballoc_wakeup(); \
        )

 /*
@@ -397,6 +417,7 @@
 extern int     max_protohdr;           /* largest protocol header */
 extern int     max_hdr;                /* largest link+protocol header */
 extern int     max_datalen;            /* MHLEN - max_hdr */
+extern int     mbuf_wait;              /* mbuf sleep time */

 struct mbuf *m_copym __P((struct mbuf *, int, int, int));
 struct mbuf *m_copypacket __P((struct mbuf *, int));
@@ -408,16 +429,20 @@
 struct mbuf *m_gethdr __P((int, int));
 struct mbuf *m_prepend __P((struct mbuf *,int,int));
 struct mbuf *m_pullup __P((struct mbuf *, int));
+struct mbuf *m_mballoc_wait __P((char,u_short));
 struct mbuf *m_retry __P((int, int));
 struct mbuf *m_retryhdr __P((int, int));
 struct mbuf *m_split __P((struct mbuf *,int,int));
 void   m_adj __P((struct mbuf *, int));
 void   m_cat __P((struct mbuf *,struct mbuf *));
+void   m_mballoc_wakeup __P((void));
+void   m_clalloc_wakeup __P((void));
 int    m_mballoc __P((int, int));
 int    m_clalloc __P((int, int));
 void   m_copyback __P((struct mbuf *, int, int, caddr_t));
 void   m_copydata __P((struct mbuf *,int,int,caddr_t));
 void   m_freem __P((struct mbuf *));
+caddr_t        m_clalloc_wait __P((void));
 #endif /* KERNEL */

 #endif /* !_SYS_MBUF_H_ */

--snip snip--





To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-hackers" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?Pine.OSF.4.05.9911120105460.3935-100000>