Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 11 Mar 2005 14:02:34 +0300
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        Luigi Rizzo <rizzo@icir.org>, John Baldwin <jhb@FreeBSD.org>, dima <_pppp@mail.ru>
Cc:        net@FreeBSD.org
Subject:   Re: Giant-free polling [PATCH]
Message-ID:  <20050311110234.GA87255@cell.sick.ru>
In-Reply-To: <20050301162949.A64187@xorpc.icir.org>
References:  <E1D1nPr-000IE5-00._pppp-mail-ru@f37.mail.ru> <20050217161031.A46700@xorpc.icir.org> <200503011642.48813.jhb@FreeBSD.org> <20050301162949.A64187@xorpc.icir.org>

next in thread | previous in thread | raw e-mail | index | archive | help

--X1bOJ3K7DJ5YkBrT
Content-Type: text/plain; charset=koi8-r
Content-Disposition: inline

  Collegues,

On Tue, Mar 01, 2005 at 04:29:49PM -0800, Luigi Rizzo wrote:
L> [cc-ing net@freebsd.org to get more opinions]

this good, that you have CC'ed net@, otherwise we would continue
working in parallel without collaboration.

Here is attached patch made in a collaboration by ru, pjd and me.

We use separate mutex for polling, and we drop it before calling the
poll handler to avoid LOR and contention on this mutex.

We have definite evidence that now idle_poll and ISR poll can run
in parallel: if we remove PRF_RUNNING flag check, we got panics
because poll handlers of many drivers (e.g. if_em) are not reentrable.

I think this patch is a step forward in direction of parallel polling
on SMP, since we now have two polling instances (ISR + idle) working
in parallel.

-- 
Totus tuus, Glebius.
GLEBIUS-RIPN GLEB-RIPE

--X1bOJ3K7DJ5YkBrT
Content-Type: text/plain; charset=koi8-r
Content-Disposition: attachment; filename="kern_poll.LIST.diff"

Index: kern_poll.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_poll.c,v
retrieving revision 1.19
diff -u -r1.19 kern_poll.c
--- kern_poll.c	25 Feb 2005 22:07:50 -0000	1.19
+++ kern_poll.c	11 Mar 2005 10:49:47 -0000
@@ -33,6 +33,7 @@
 #include <sys/kernel.h>
 #include <sys/socket.h>			/* needed by net/if.h		*/
 #include <sys/sysctl.h>
+#include <sys/syslog.h>
 
 #include <net/if.h>			/* for IFF_* flags		*/
 #include <net/netisr.h>			/* for NETISR_POLL		*/
@@ -144,7 +145,7 @@
 SYSCTL_INT(_kern_polling, OID_AUTO, residual_burst, CTLFLAG_RW,
 	&residual_burst, 0, "# of residual cycles in burst");
 
-static u_int32_t poll_handlers; /* next free entry in pr[]. */
+static u_int32_t poll_handlers;
 SYSCTL_UINT(_kern_polling, OID_AUTO, handlers, CTLFLAG_RD,
 	&poll_handlers, 0, "Number of registered poll handlers");
 
@@ -169,20 +170,33 @@
 	&idlepoll_sleeping, 0, "idlepoll is sleeping");
 
 
-#define POLL_LIST_LEN  128
 struct pollrec {
 	poll_handler_t	*handler;
 	struct ifnet	*ifp;
+#define	PRF_RUNNING	0x1
+#define	PRF_PINNED	0x2
+	uint32_t	flags;
+	LIST_ENTRY(pollrec) next;
 };
 
-static struct pollrec pr[POLL_LIST_LEN];
+#define	PR_VALID(pr)	((pr)->handler != NULL &&			\
+			!((pr)->flags & PRF_RUNNING) &&			\
+			((pr)->ifp->if_flags & (IFF_UP|IFF_RUNNING)) ==	\
+			    (IFF_UP|IFF_RUNNING))
+
+static LIST_HEAD(, pollrec) poll_list = LIST_HEAD_INITIALIZER(&poll_list);
+
+static struct mtx	poll_mtx;
 
 static void
 init_device_poll(void)
 {
 
-	netisr_register(NETISR_POLL, (netisr_t *)netisr_poll, NULL, 0);
-	netisr_register(NETISR_POLLMORE, (netisr_t *)netisr_pollmore, NULL, 0);
+	mtx_init(&poll_mtx, "polling", NULL, MTX_DEF);
+	netisr_register(NETISR_POLL, (netisr_t *)netisr_poll, NULL,
+	    NETISR_MPSAFE);
+	netisr_register(NETISR_POLLMORE, (netisr_t *)netisr_pollmore, NULL,
+	    NETISR_MPSAFE);
 }
 SYSINIT(device_poll, SI_SUB_CLOCKS, SI_ORDER_MIDDLE, init_device_poll, NULL)
 
@@ -244,17 +258,26 @@
 void
 ether_poll(int count)
 {
-	int i;
-
-	mtx_lock(&Giant);
+	struct pollrec *pr, *pr1;
 
 	if (count > poll_each_burst)
 		count = poll_each_burst;
-	for (i = 0 ; i < poll_handlers ; i++)
-		if (pr[i].handler && (IFF_UP|IFF_RUNNING) ==
-		    (pr[i].ifp->if_flags & (IFF_UP|IFF_RUNNING)) )
-			pr[i].handler(pr[i].ifp, 0, count); /* quick check */
-	mtx_unlock(&Giant);
+
+	mtx_lock(&poll_mtx);
+	LIST_FOREACH_SAFE(pr, &poll_list, next, pr1) {
+		if (PR_VALID(pr)) {
+			pr->flags |= PRF_RUNNING;
+			if (pr1 != NULL)
+				pr1->flags |= PRF_PINNED;
+			mtx_unlock(&poll_mtx);
+			pr->handler(pr->ifp, 0, count);
+			mtx_lock(&poll_mtx);
+			if (pr1 != NULL)
+				pr1->flags &= ~PRF_PINNED;
+			pr->flags &= ~PRF_RUNNING;
+		}
+	}
+	mtx_unlock(&poll_mtx);
 }
 
 /*
@@ -320,17 +343,17 @@
 
 /*
  * netisr_poll is scheduled by schednetisr when appropriate, typically once
- * per tick. It is called at splnet() so first thing to do is to upgrade to
- * splimp(), and call all registered handlers.
+ * per tick.
  */
 static void
 netisr_poll(void)
 {
+	struct pollrec *pr, *pr1;
 	static int reg_frac_count;
-	int i, cycles;
+	int cycles;
 	enum poll_cmd arg = POLL_ONLY;
-	mtx_lock(&Giant);
 
+	mtx_lock(&poll_mtx);
 	phase = 3;
 	if (residual_burst == 0) { /* first call in this tick */
 		microuptime(&poll_start_t);
@@ -372,25 +395,36 @@
 	residual_burst -= cycles;
 
 	if (polling) {
-		for (i = 0 ; i < poll_handlers ; i++)
-			if (pr[i].handler && (IFF_UP|IFF_RUNNING) ==
-			    (pr[i].ifp->if_flags & (IFF_UP|IFF_RUNNING)) )
-				pr[i].handler(pr[i].ifp, arg, cycles);
+		LIST_FOREACH_SAFE(pr, &poll_list, next, pr1) {
+			if (PR_VALID(pr)) {
+				pr->flags |= PRF_RUNNING;
+				mtx_unlock(&poll_mtx);
+				pr->handler(pr->ifp, arg, cycles);
+				mtx_lock(&poll_mtx);
+				pr->flags &= ~PRF_RUNNING;
+			} else if (pr->handler == NULL &&
+			    !(pr->flags & PRF_PINNED)) {
+				LIST_REMOVE(pr, next);
+				free(pr, M_TEMP);
+			}
+		}
 	} else {	/* unregister */
-		for (i = 0 ; i < poll_handlers ; i++) {
-			if (pr[i].handler &&
-			    pr[i].ifp->if_flags & IFF_RUNNING) {
-				pr[i].ifp->if_flags &= ~IFF_POLLING;
-				pr[i].handler(pr[i].ifp, POLL_DEREGISTER, 1);
+		LIST_FOREACH_SAFE(pr, &poll_list, next, pr1) {
+			if (pr->handler != NULL &&
+			    pr->ifp->if_flags & IFF_RUNNING) {
+				pr->ifp->if_flags &= ~IFF_POLLING;
+				mtx_unlock(&poll_mtx);
+				pr->handler(pr->ifp, POLL_DEREGISTER, 1);
+				mtx_lock(&poll_mtx);
 			}
-			pr[i].handler=NULL;
+			pr->handler = NULL;
 		}
 		residual_burst = 0;
 		poll_handlers = 0;
 	}
 	/* on -stable, schednetisr(NETISR_POLLMORE); */
 	phase = 4;
-	mtx_unlock(&Giant);
+	mtx_unlock(&poll_mtx);
 }
 
 /*
@@ -399,12 +433,12 @@
  * A device is not supposed to register itself multiple times.
  *
  * This is called from within the *_intr() functions, so we do not need
- * further locking.
+ * further ifnet locking.
  */
 int
 ether_poll_register(poll_handler_t *h, struct ifnet *ifp)
 {
-	int s;
+	struct pollrec *pr;
 
 	if (polling == 0) /* polling disabled, cannot register */
 		return 0;
@@ -415,30 +449,27 @@
 	if (ifp->if_flags & IFF_POLLING)	/* already polling	*/
 		return 0;
 
-	s = splhigh();
-	if (poll_handlers >= POLL_LIST_LEN) {
-		/*
-		 * List full, cannot register more entries.
-		 * This should never happen; if it does, it is probably a
-		 * broken driver trying to register multiple times. Checking
-		 * this at runtime is expensive, and won't solve the problem
-		 * anyways, so just report a few times and then give up.
-		 */
-		static int verbose = 10 ;
-		splx(s);
-		if (verbose >0) {
-			printf("poll handlers list full, "
-				"maybe a broken driver ?\n");
-			verbose--;
+	mtx_lock(&poll_mtx);
+	LIST_FOREACH(pr, &poll_list, next)
+		if (pr->ifp == ifp && pr->handler != NULL) {
+			mtx_unlock(&poll_mtx);
+			log(LOG_DEBUG, "ether_poll_register: %s: handler"
+			    " already registered\n", ifp->if_xname);
+			return (0);
 		}
-		return 0; /* no polling for you */
+	pr = malloc(sizeof(*pr), M_TEMP, M_NOWAIT);
+	if (pr == NULL) {
+		mtx_unlock(&poll_mtx);
+		log(LOG_DEBUG, "ether_poll_register: malloc() failed\n");
+		return (0);
 	}
 
-	pr[poll_handlers].handler = h;
-	pr[poll_handlers].ifp = ifp;
+	pr->handler = h;
+	pr->ifp = ifp;
+	LIST_INSERT_HEAD(&poll_list, pr, next);
 	poll_handlers++;
 	ifp->if_flags |= IFF_POLLING;
-	splx(s);
+	mtx_unlock(&poll_mtx);
 	if (idlepoll_sleeping)
 		wakeup(&idlepoll_sleeping);
 	return 1; /* polling enabled in next call */
@@ -453,29 +484,24 @@
 int
 ether_poll_deregister(struct ifnet *ifp)
 {
-	int i;
+	struct pollrec *pr;
 
-	mtx_lock(&Giant);
 	if ( !ifp || !(ifp->if_flags & IFF_POLLING) ) {
-		mtx_unlock(&Giant);
 		return 0;
 	}
-	for (i = 0 ; i < poll_handlers ; i++)
-		if (pr[i].ifp == ifp) /* found it */
-			break;
-	ifp->if_flags &= ~IFF_POLLING; /* found or not... */
-	if (i == poll_handlers) {
-		mtx_unlock(&Giant);
-		printf("ether_poll_deregister: ifp not found!!!\n");
-		return 0;
-	}
-	poll_handlers--;
-	if (i < poll_handlers) { /* Last entry replaces this one. */
-		pr[i].handler = pr[poll_handlers].handler;
-		pr[i].ifp = pr[poll_handlers].ifp;
-	}
-	mtx_unlock(&Giant);
-	return 1;
+	mtx_lock(&poll_mtx);
+	LIST_FOREACH(pr, &poll_list, next)
+		if (pr->ifp == ifp && pr->handler != NULL) {
+			pr->handler = NULL;
+			poll_handlers--;
+			mtx_unlock(&poll_mtx);
+			ifp->if_flags &= ~IFF_POLLING;
+			return (1);
+		}
+	mtx_unlock(&poll_mtx);
+	log(LOG_DEBUG, "ether_poll_deregister: %s: not found!!!\n",
+	    ifp->if_xname);
+	return (0);
 }
 
 static void
@@ -495,10 +521,7 @@
 	for (;;) {
 		if (poll_in_idle_loop && poll_handlers > 0) {
 			idlepoll_sleeping = 0;
-			mtx_lock(&Giant);
 			ether_poll(poll_each_burst);
-			mtx_unlock(&Giant);
-			mtx_assert(&Giant, MA_NOTOWNED);
 			mtx_lock_spin(&sched_lock);
 			mi_switch(SW_VOL, NULL);
 			mtx_unlock_spin(&sched_lock);

--X1bOJ3K7DJ5YkBrT--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20050311110234.GA87255>