Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 22 May 2008 12:02:33 GMT
From:      Oleg <Oleg.Dolgov@gmail.com>
To:        freebsd-gnats-submit@FreeBSD.org
Subject:   kern/123892: if_tap [bug] No buffer space available
Message-ID:  <200805221202.m4MC2Xcr031047@www.freebsd.org>
Resent-Message-ID: <200805221210.m4MCA134082766@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help

>Number:         123892
>Category:       kern
>Synopsis:       if_tap [bug] No buffer space available
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Thu May 22 12:10:01 UTC 2008
>Closed-Date:
>Last-Modified:
>Originator:     Oleg
>Release:        FreeBSD 7-RELEASE amd64
>Organization:
Sunbay
>Environment:
>Description:
Hello

First, I meet this bug along time ago
http://lists.freebsd.org/pipermail/freebsd-net/2007-July/014941.html
and write small program, similar to
http://www.freebsd.org/cgi/query-pr.cgi?pr=95665
but, that work with tap device, to reproduce bug.

And no feedback come.

Later, I make small research while debugging kernel 
http://lists.freebsd.org/pipermail/freebsd-net/2007-April/014064.html
and again no help :(

Today I got this bug agan but with another way:

You need to have multicore machine, FreeBSD 7 kernel with 

options SCHED_ULE
options PREEMPTION
options SMP 

>From NOTES

# PREEMPTION allows the threads that are in the kernel to be preempted
#     by higher priority threads.  It helps with interactivity and
#     allows interrupt threads to run sooner rather than waiting.
#     WARNING! Only tested on amd64 and i386.

When preempt is possible tap-device have 2 unsafe functions (if_tap.c):
tapifstart and tapread.

static int
tapread(struct cdev *dev, struct uio *uio, int flag)
{
	//
	// skipped
	// ..

	/* sleep until we get a packet */
	do {
		s = splimp();
		IF_DEQUEUE(&ifp->if_snd, m);
		splx(s);

		// (1)
		if (m == NULL) {
			if (flag & O_NONBLOCK)
				return (EWOULDBLOCK);

			mtx_lock(&tp->tap_mtx);
			tp->tap_flags |= TAP_RWAIT;
			mtx_unlock(&tp->tap_mtx);
			// (2)
			error = tsleep(tp,PCATCH|(PZERO+1),"taprd",0);
			if (error)
				return (error);
		}
	} while (m == NULL);

	//
	// skipped
	// ..
}


static void
tapifstart(struct ifnet *ifp)
{
	//
	// skipped
	// ..

	s = splimp();
	ifp->if_drv_flags |= IFF_DRV_OACTIVE;

	if (ifp->if_snd.ifq_len != 0) {
		mtx_lock(&tp->tap_mtx);
		if (tp->tap_flags & TAP_RWAIT) {
			tp->tap_flags &= ~TAP_RWAIT;
			// (3)
			wakeup(tp);
		}

		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
			mtx_unlock(&tp->tap_mtx);
			pgsigio(&tp->tap_sigio, SIGIO, 0);
		} else
			mtx_unlock(&tp->tap_mtx);

		selwakeuppri(&tp->tap_rsel, PZERO+1);
		KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0);
		ifp->if_opackets ++; /* obytes are counted in ether_output */
	}

	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
	splx(s);
}

So, when some process make block read on tap-device, tapread called,
it check  if_snd, and if it empty make sleep until we get a packet for
sending (tapifstart called).

Now imagine, that tapread thread see that queue are empty and going to
sleep, but between (1) and (2) interrupted.
While that thread rest interrupted someplace between (1)-(2)
network stack got or generate many packets for tap-device
(50 e.g. >= if_snd.ifq_maxlen), tapifstart called for each packet,
it make wakeup (3) for tapread thread, but they are lost, because no threads
sleeping. Now really if_snd queue are full.

Next, our tapread thread execution returned and go sleep - never wakeup,
because if_snd queue are full, tcp/ip stack will drop new packets for tap-device
and will not call tapifstart.

p.s. tun device have same problem.

Best regards,
   Oleg Dolgov.

>How-To-Repeat:

>Fix:
Here are patch, condition variable with if_snd.ifq_mtx
make necessary synchronization.


Patch attached with submission follows:

Patches for if_tap.c and if_tapvar.h

=========================================================================
if_tap.c.orig:
$FreeBSD: src/sys/net/if_tap.c,v 1.71 2007/03/19 18:17:31 bms Exp $

--- if_tap.c.orig	2008-05-22 13:13:47.000000000 +0300
+++ if_tap.c.patched	2008-05-22 13:13:47.000000000 +0300
@@ -58,6 +58,7 @@
 #include <sys/ttycom.h>
 #include <sys/uio.h>
 #include <sys/queue.h>
+#include <sys/condvar.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
@@ -226,6 +227,7 @@
 	if_free_type(ifp, IFT_ETHER);
 	splx(s);
 
+	cv_destroy(&tp->tap_start_cv);
 	mtx_destroy(&tp->tap_mtx);
 	free(tp, M_TAP);
 }
@@ -413,6 +415,7 @@
 	/* allocate driver storage and create device */
 	MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
 	mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
+	cv_init(&tp->tap_start_cv, "tap_start_cv");
 	mtx_lock(&tapmtx);
 	SLIST_INSERT_HEAD(&taphead, tp, tap_next);
 	mtx_unlock(&tapmtx);
@@ -674,29 +677,22 @@
 	}
 	mtx_unlock(&tp->tap_mtx);
 
-	s = splimp();
-	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
-
-	if (ifp->if_snd.ifq_len != 0) {
-		mtx_lock(&tp->tap_mtx);
-		if (tp->tap_flags & TAP_RWAIT) {
-			tp->tap_flags &= ~TAP_RWAIT;
-			wakeup(tp);
-		}
+	mtx_lock(&tp->tap_mtx);
+	if (tp->tap_flags & TAP_RWAIT) {
+		tp->tap_flags &= ~TAP_RWAIT;
+		cv_broadcast(&tp->tap_start_cv);
+	}
 
-		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
-			mtx_unlock(&tp->tap_mtx);
-			pgsigio(&tp->tap_sigio, SIGIO, 0);
-		} else
-			mtx_unlock(&tp->tap_mtx);
+	if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
+		mtx_unlock(&tp->tap_mtx);
+		pgsigio(&tp->tap_sigio, SIGIO, 0);
+	} else
+		mtx_unlock(&tp->tap_mtx);
 
-		selwakeuppri(&tp->tap_rsel, PZERO+1);
-		KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0);
-		ifp->if_opackets ++; /* obytes are counted in ether_output */
-	}
+	selwakeuppri(&tp->tap_rsel, PZERO+1);
+	KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0);
+	ifp->if_opackets ++; /* obytes are counted in ether_output */
 
-	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-	splx(s);
 } /* tapifstart */
 
 
@@ -860,23 +856,31 @@
 	mtx_unlock(&tp->tap_mtx);
 
 	/* sleep until we get a packet */
+	s = splimp();
+	IF_LOCK(&ifp->if_snd);
 	do {
-		s = splimp();
-		IF_DEQUEUE(&ifp->if_snd, m);
-		splx(s);
+		_IF_DEQUEUE(&ifp->if_snd, m);
 
 		if (m == NULL) {
-			if (flag & O_NONBLOCK)
+			if (flag & O_NONBLOCK) {
+				IF_UNLOCK(&ifp->if_snd);
+				splx(s);
 				return (EWOULDBLOCK);
+			}
 
 			mtx_lock(&tp->tap_mtx);
 			tp->tap_flags |= TAP_RWAIT;
 			mtx_unlock(&tp->tap_mtx);
-			error = tsleep(tp,PCATCH|(PZERO+1),"taprd",0);
-			if (error)
+			error = cv_wait_sig(&tp->tap_start_cv, &ifp->if_snd.ifq_mtx);
+			if (error) {
+				IF_UNLOCK(&ifp->if_snd);
+				splx(s);
 				return (error);
+			}
 		}
 	} while (m == NULL);
+	IF_UNLOCK(&ifp->if_snd); 
+	splx(s);
 
 	/* feed packet to bpf */
 	BPF_MTAP(ifp, m);

=========================================================================
if_tapvar.h.orig:
$FreeBSD: src/sys/net/if_tapvar.h,v 1.10 2005/06/10 16:49:18 brooks Exp $

--- if_tapvar.h.orig	2008-05-22 13:13:47.000000000 +0300
+++ if_tapvar.h.patched	2008-05-22 13:13:47.000000000 +0300
@@ -64,6 +64,7 @@
 	SLIST_ENTRY(tap_softc)	tap_next;	/* next device in chain      */
 	struct cdev *tap_dev;
 	struct mtx	 tap_mtx;		/* per-softc mutex */
+	struct cv	 tap_start_cv;  /* wake up readers */
 };
 
 #endif /* !_NET_IF_TAPVAR_H_ */



>Release-Note:
>Audit-Trail:
>Unformatted:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200805221202.m4MC2Xcr031047>