Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 5 Feb 2013 10:24:24 -0500
From:      Randall Stewart <rrs@lakerest.net>
To:        John Baldwin <jhb@FreeBSD.org>
Cc:        freebsd-net <freebsd-net@FreeBSD.org>, Robert Watson <rwatson@FreeBSD.org>, Kip Macy <kmacy@FreeBSD.org>, Jack Vogel <jfvogel@gmail.com>
Subject:   Re: Driver patch to look at...
Message-ID:  <B02D5035-650B-4958-85E9-B6D984E720D5@lakerest.net>
In-Reply-To: <39571D84-A8C0-46A4-8EFA-CF74D862EAAE@lakerest.net>
References:  <D3AA078A-CD19-4228-A019-BE9C985895E2@lakerest.net> <201302041524.58699.jhb@freebsd.org> <45AD1046-A630-4C96-B4D2-B8A7D6A6DC45@lakerest.net> <39571D84-A8C0-46A4-8EFA-CF74D862EAAE@lakerest.net>

next in thread | previous in thread | raw e-mail | index | archive | help

--Apple-Mail=_3460D60E-1D83-4AEB-A9DF-11C2A6881F5A
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
	charset=windows-1252

Here is an updated patch=85 sigh.. I foobar'd the ALTQ stuff.. lots of =
crashes ;-D


R


--Apple-Mail=_3460D60E-1D83-4AEB-A9DF-11C2A6881F5A
Content-Disposition: attachment;
	filename=driver_patch.txt
Content-Type: text/plain;
	x-unix-mode=0644;
	name="driver_patch.txt"
Content-Transfer-Encoding: quoted-printable

Index: dev/e1000/if_em.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- dev/e1000/if_em.c	(revision 246357)
+++ dev/e1000/if_em.c	(working copy)
@@ -894,7 +894,7 @@ static int
 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf =
*m)
 {
 	struct adapter  *adapter =3D txr->adapter;
-        struct mbuf     *next;
+        struct mbuf     *next, *snext;
         int             err =3D 0, enq =3D 0;
=20
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=3D=

@@ -905,22 +905,25 @@ em_mq_start_locked(struct ifnet *ifp, struct tx_ri
 	}
=20
 	enq =3D 0;
-	if (m =3D=3D NULL) {
-		next =3D drbr_dequeue(ifp, txr->br);
-	} else if (drbr_needs_enqueue(ifp, txr->br)) {
-		if ((err =3D drbr_enqueue(ifp, txr->br, m)) !=3D 0)
+	if (m) {
+		err =3D drbr_enqueue(ifp, txr->br, m);
+		if (err) {
 			return (err);
-		next =3D drbr_dequeue(ifp, txr->br);
-	} else
-		next =3D m;
+		}
+	}=20
=20
 	/* Process the queue */
-	while (next !=3D NULL) {
+	while ((next =3D drbr_peek(ifp, txr->br)) !=3D NULL) {
+		snext =3D next;
 		if ((err =3D em_xmit(txr, &next)) !=3D 0) {
-                        if (next !=3D NULL)
-                                err =3D drbr_enqueue(ifp, txr->br, =
next);
-                        break;
+			if (next =3D=3D NULL) {
+				drbr_advance(ifp, txr->br);
+			} else {
+				drbr_putback(ifp, txr->br, next, snext);
+			}
+			break;
 		}
+		drbr_advance(ifp, txr->br);
 		enq++;
 		ifp->if_obytes +=3D next->m_pkthdr.len;
 		if (next->m_flags & M_MCAST)
@@ -928,7 +931,6 @@ em_mq_start_locked(struct ifnet *ifp, struct tx_ri
 		ETHER_BPF_MTAP(ifp, next);
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) =3D=3D 0)
                         break;
-		next =3D drbr_dequeue(ifp, txr->br);
 	}
=20
 	if (enq > 0) {
Index: dev/e1000/if_igb.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- dev/e1000/if_igb.c	(revision 246357)
+++ dev/e1000/if_igb.c	(working copy)
@@ -981,7 +981,7 @@ static int
 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
 {
 	struct adapter  *adapter =3D txr->adapter;
-        struct mbuf     *next;
+        struct mbuf     *next, *snext;
         int             err =3D 0, enq;
=20
 	IGB_TX_LOCK_ASSERT(txr);
@@ -994,12 +994,23 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_r
 	enq =3D 0;
=20
 	/* Process the queue */
-	while ((next =3D drbr_dequeue(ifp, txr->br)) !=3D NULL) {
+	while ((next =3D drbr_peek(ifp, txr->br)) !=3D NULL) {
+		snext =3D next;
 		if ((err =3D igb_xmit(txr, &next)) !=3D 0) {
-			if (next !=3D NULL)
-				err =3D drbr_enqueue(ifp, txr->br, =
next);
+			if (next =3D=3D NULL) {
+				/* It was freed, move forward */
+				drbr_advance(ifp, txr->br);
+			} else {
+				/*=20
+				 * Still have one left, it may not be
+				 * the same since the transmit function
+				 * may have changed it.
+				 */
+				drbr_putback(ifp, txr->br, next, snext);
+			}
 			break;
 		}
+		drbr_advance(ifp, txr->br);
 		enq++;
 		ifp->if_obytes +=3D next->m_pkthdr.len;
 		if (next->m_flags & M_MCAST)
Index: dev/oce/oce_if.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- dev/oce/oce_if.c	(revision 246357)
+++ dev/oce/oce_if.c	(working copy)
@@ -1154,6 +1154,7 @@ oce_multiq_transmit(struct ifnet *ifp, struct mbuf
 	POCE_SOFTC sc =3D ifp->if_softc;
 	int status =3D 0, queue_index =3D 0;
 	struct mbuf *next =3D NULL;
+	struct mbuf *snext;
 	struct buf_ring *br =3D NULL;
=20
 	br  =3D wq->br;
@@ -1166,29 +1167,28 @@ oce_multiq_transmit(struct ifnet *ifp, struct =
mbuf
 		return status;
 	}
=20
-	if (m =3D=3D NULL)
-		next =3D drbr_dequeue(ifp, br);	=09
-	else if (drbr_needs_enqueue(ifp, br)) {
+	 if (m) {
 		if ((status =3D drbr_enqueue(ifp, br, m)) !=3D 0)
 			return status;
-		next =3D drbr_dequeue(ifp, br);
-	} else
-		next =3D m;
-
-	while (next !=3D NULL) {
+	}=20
+	while ((next =3D drbr_peek(ifp, br)) !=3D NULL) {
+		snext =3D next;
 		if (oce_tx(sc, &next, queue_index)) {
-			if (next !=3D NULL) {
+			if (next =3D=3D NULL) {
+				drbr_advance(ifp, br);
+			} else {
+				drbr_putback(ifp, br, next, snext);
 				wq->tx_stats.tx_stops ++;
 				ifp->if_drv_flags |=3D IFF_DRV_OACTIVE;
 				status =3D drbr_enqueue(ifp, br, next);
 			} =20
 			break;
 		}
+		drbr_advance(ifp, br);
 		ifp->if_obytes +=3D next->m_pkthdr.len;
 		if (next->m_flags & M_MCAST)
 			ifp->if_omcasts++;
 		ETHER_BPF_MTAP(ifp, next);
-		next =3D drbr_dequeue(ifp, br);
 	}
=20
 	return status;
Index: dev/ixgbe/ixgbe.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- dev/ixgbe/ixgbe.c	(revision 246357)
+++ dev/ixgbe/ixgbe.c	(working copy)
@@ -821,7 +821,7 @@ static int
 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct =
mbuf *m)
 {
 	struct adapter  *adapter =3D txr->adapter;
-        struct mbuf     *next;
+        struct mbuf     *next, *snext;
         int             enqueued, err =3D 0;
=20
 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) =3D=3D 0) ||
@@ -832,22 +832,25 @@ ixgbe_mq_start_locked(struct ifnet *ifp, struct tx
 	}
=20
 	enqueued =3D 0;
-	if (m =3D=3D NULL) {
-		next =3D drbr_dequeue(ifp, txr->br);
-	} else if (drbr_needs_enqueue(ifp, txr->br)) {
-		if ((err =3D drbr_enqueue(ifp, txr->br, m)) !=3D 0)
+	if (m) {
+		err =3D drbr_enqueue(ifp, txr->br, m);
+		if (err) {
 			return (err);
-		next =3D drbr_dequeue(ifp, txr->br);
-	} else
-		next =3D m;
+		}
+	}
=20
 	/* Process the queue */
-	while (next !=3D NULL) {
+	while ((next =3D drbr_peek(ifp, txr->br)) !=3D NULL) {
+		snext =3D next;
 		if ((err =3D ixgbe_xmit(txr, &next)) !=3D 0) {
-			if (next !=3D NULL)
-				err =3D drbr_enqueue(ifp, txr->br, =
next);
+			if (next =3D=3D NULL) {
+				drbr_advance(ifp, txr->br);
+			} else {
+				drbr_putback(ifp, txr->br, next, snext);
+			}
 			break;
 		}
+		drbr_advance(ifp, txr->br);
 		enqueued++;
 		/* Send a copy of the frame to the BPF listener */
 		ETHER_BPF_MTAP(ifp, next);
@@ -855,7 +858,6 @@ ixgbe_mq_start_locked(struct ifnet *ifp, struct tx
 			break;
 		if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
 			ixgbe_txeof(txr);
-		next =3D drbr_dequeue(ifp, txr->br);
 	}
=20
 	if (enqueued > 0) {
Index: dev/ixgbe/ixv.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- dev/ixgbe/ixv.c	(revision 246357)
+++ dev/ixgbe/ixv.c	(working copy)
@@ -605,7 +605,7 @@ static int
 ixv_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf =
*m)
 {
 	struct adapter  *adapter =3D txr->adapter;
-        struct mbuf     *next;
+        struct mbuf     *next, *snext;
         int             enqueued, err =3D 0;
=20
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=3D=

@@ -620,22 +620,24 @@ ixv_mq_start_locked(struct ifnet *ifp, struct tx_r
 		ixv_txeof(txr);
=20
 	enqueued =3D 0;
-	if (m =3D=3D NULL) {
-		next =3D drbr_dequeue(ifp, txr->br);
-	} else if (drbr_needs_enqueue(ifp, txr->br)) {
-		if ((err =3D drbr_enqueue(ifp, txr->br, m)) !=3D 0)
+	if (m) {
+		err =3D drbr_enqueue(ifp, txr->br, m);
+		if (err) {
 			return (err);
-		next =3D drbr_dequeue(ifp, txr->br);
-	} else
-		next =3D m;
-
+		}
+	}
 	/* Process the queue */
-	while (next !=3D NULL) {
+	while ((next =3D drbr_peek(ifp, txr->br)) !=3D NULL) {
+		snext =3D next;
 		if ((err =3D ixv_xmit(txr, &next)) !=3D 0) {
-			if (next !=3D NULL)
-				err =3D drbr_enqueue(ifp, txr->br, =
next);
+			if (next =3D=3D NULL) {
+				drbr_advance(ifp, txr->br);
+			} else {
+				drbr_putback(ifp, txr->br, next, snext);
+			}
 			break;
 		}
+		drbr_advance(ifp, txr->br);
 		enqueued++;
 		ifp->if_obytes +=3D next->m_pkthdr.len;
 		if (next->m_flags & M_MCAST)
@@ -648,7 +650,6 @@ ixv_mq_start_locked(struct ifnet *ifp, struct tx_r
 			ifp->if_drv_flags |=3D IFF_DRV_OACTIVE;
 			break;
 		}
-		next =3D drbr_dequeue(ifp, txr->br);
 	}
=20
 	if (enqueued > 0) {
Index: dev/bxe/if_bxe.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- dev/bxe/if_bxe.c	(revision 246357)
+++ dev/bxe/if_bxe.c	(working copy)
@@ -9491,7 +9491,7 @@ bxe_tx_mq_start_locked(struct ifnet *ifp,
     struct bxe_fastpath *fp, struct mbuf *m)
 {
 	struct bxe_softc *sc;
-	struct mbuf *next;
+	struct mbuf *next, *snext;
 	int depth, rc, tx_count;
=20
 	sc =3D fp->sc;
@@ -9506,24 +9506,16 @@ bxe_tx_mq_start_locked(struct ifnet *ifp,
=20
 	BXE_FP_LOCK_ASSERT(fp);
=20
-	if (m =3D=3D NULL) {
-		/* No new work, check for pending frames. */
-		next =3D drbr_dequeue(ifp, fp->br);
-	} else if (drbr_needs_enqueue(ifp, fp->br)) {
-		/* Both new and pending work, maintain packet order. */
+	if (m) {
 		rc =3D drbr_enqueue(ifp, fp->br, m);
 		if (rc !=3D 0) {
 			fp->tx_soft_errors++;
 			goto bxe_tx_mq_start_locked_exit;
 		}
-		next =3D drbr_dequeue(ifp, fp->br);
-	} else
-		/* New work only, nothing pending. */
-		next =3D m;
-
+	}
  	/* Keep adding entries while there are frames to send. */
-	while (next !=3D NULL) {
-
+	while ((next =3D drbr_peek(ifp, fp->br)) !=3D NULL) {
+		snext =3D next;
 		/* The transmit mbuf now belongs to us, keep track of =
it. */
 		fp->tx_mbuf_alloc++;
=20
@@ -9537,23 +9529,22 @@ bxe_tx_mq_start_locked(struct ifnet *ifp,
 		if (__predict_false(rc !=3D 0)) {
 			fp->tx_encap_failures++;
 			/* Very Bad Frames(tm) may have been dropped. */
-			if (next !=3D NULL) {
+			if (next =3D=3D NULL) {
+				drbr_advance(ifp, fp->br);
+			} else {
+				drbr_putback(ifp, fp->br, next, snext);
 				/*
 				 * Mark the TX queue as full and save
 				 * the frame.
 				 */
 				ifp->if_drv_flags |=3D IFF_DRV_OACTIVE;
 				fp->tx_frame_deferred++;
-
-				/* This may reorder frame. */
-				rc =3D drbr_enqueue(ifp, fp->br, next);
 				fp->tx_mbuf_alloc--;
 			}
-
 			/* Stop looking for more work. */
 			break;
 		}
-
+		drbr_advance(ifp, fp->br);
 		/* The transmit frame was enqueued successfully. */
 		tx_count++;
=20
@@ -9574,8 +9565,6 @@ bxe_tx_mq_start_locked(struct ifnet *ifp,
 			ifp->if_drv_flags &=3D ~IFF_DRV_OACTIVE;
 			break;
 		}
-
-		next =3D drbr_dequeue(ifp, fp->br);
 	}
=20
 	/* No TX packets were dequeued. */
Index: net/if_var.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- net/if_var.h	(revision 246357)
+++ net/if_var.h	(working copy)
@@ -622,6 +622,46 @@ drbr_enqueue(struct ifnet *ifp, struct buf_ring *b
 }
=20
 static __inline void
+drbr_putback(struct ifnet *ifp, struct buf_ring *br, struct mbuf *new, =
struct mbuf *prev)
+{
+	/*
+	 * The top of the list needs to be swapped=20
+	 * for this one.
+	 */
+#ifdef ALTQ
+	if (ifp !=3D NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+		/*=20
+		 * Peek in altq case dequeued it
+		 * so put it back.
+		 */
+		IFQ_DRV_PREPEND(&ifp->if_snd, new);
+		return;
+	}
+#endif
+	if (new !=3D prev)=20
+		buf_ring_swap(br, new, prev);
+}
+
+static __inline struct mbuf *
+drbr_peek(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+	struct mbuf *m;
+	if (ifp !=3D NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+		/*=20
+		 * Pull it off like a dequeue
+		 * since drbr_advance() does nothing
+		 * for altq and drbr_putback() will
+		 * use the old prepend function.
+		 */
+		IFQ_DEQUEUE(&ifp->if_snd, m);
+		return (m);
+	}
+#endif
+	return(buf_ring_peek(br));
+}
+
+static __inline void
 drbr_flush(struct ifnet *ifp, struct buf_ring *br)
 {
 	struct mbuf *m;
@@ -656,6 +696,18 @@ drbr_dequeue(struct ifnet *ifp, struct buf_ring *b
 	return (buf_ring_dequeue_sc(br));
 }
=20
+static __inline void
+drbr_advance(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+	/* Nothing to do here since peek dequeues in altq case */
+	if (ALTQ_IS_ENABLED(&ifp->if_snd))
+		return;
+#endif
+	return (buf_ring_advance_sc(br));
+}
+
+
 static __inline struct mbuf *
 drbr_dequeue_cond(struct ifnet *ifp, struct buf_ring *br,
     int (*func) (struct mbuf *, void *), void *arg)=20
Index: sys/buf_ring.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- sys/buf_ring.h	(revision 246357)
+++ sys/buf_ring.h	(working copy)
@@ -208,6 +208,51 @@ buf_ring_dequeue_sc(struct buf_ring *br)
 }
=20
 /*
+ * single-consumer advance after a peek
+ * use where it is protected by a lock
+ * e.g. a network driver's tx queue lock
+ */
+static __inline void
+buf_ring_advance_sc(struct buf_ring *br)
+{
+	uint32_t cons_head, cons_next;
+	uint32_t prod_tail;
+=09
+	cons_head =3D br->br_cons_head;
+	prod_tail =3D br->br_prod_tail;
+=09
+	cons_next =3D (cons_head + 1) & br->br_cons_mask;
+=09
+	if (cons_head =3D=3D prod_tail)=20
+		return;
+
+	br->br_cons_head =3D cons_next;
+	br->br_cons_tail =3D cons_next;
+}
+
+
+/*
+ * Used to return a differnt mbuf to the
+ * top of the ring. This can happen if
+ * the driver changed the packets (some defragmentation
+ * for example) and then realized the transmit
+ * ring was full. In such a case the old packet
+ * is now freed, but we want the order of the actual
+ * data (being sent in the new packet) to remain
+ * the same.
+ */
+static __inline void
+buf_ring_swap(struct buf_ring *br, void *new, void *old)
+{
+	int ret;
+	if (br->br_cons_head =3D=3D br->br_prod_tail)=20
+		/* Huh? */
+		return;
+	ret =3D atomic_cmpset_long((uint64_t =
*)&br->br_ring[br->br_cons_head], (uint64_t)old, (uint64_t)new);
+	KASSERT(ret, ("Swap out failed old:%p new:%p ret:%d", old, new, =
ret));
+}
+
+/*
  * return a pointer to the first entry in the ring
  * without modifying it, or NULL if the ring is empty
  * race-prone if not protected by a lock
Index: ofed/drivers/net/mlx4/en_tx.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- ofed/drivers/net/mlx4/en_tx.c	(revision 246357)
+++ ofed/drivers/net/mlx4/en_tx.c	(working copy)
@@ -919,7 +919,7 @@ mlx4_en_transmit_locked(struct ifnet *dev, int tx_
 {
 	struct mlx4_en_priv *priv =3D netdev_priv(dev);
 	struct mlx4_en_tx_ring *ring;
-	struct mbuf *next;
+	struct mbuf *next, *snext;
 	int enqueued, err =3D 0;
=20
 	ring =3D &priv->tx_ring[tx_ind];
@@ -931,22 +931,22 @@ mlx4_en_transmit_locked(struct ifnet *dev, int tx_
 	}
=20
 	enqueued =3D 0;
-	if (m =3D=3D NULL) {
-		next =3D drbr_dequeue(dev, ring->br);
-	} else if (drbr_needs_enqueue(dev, ring->br)) {
+	if (m) {
 		if ((err =3D drbr_enqueue(dev, ring->br, m)) !=3D 0)
 			return (err);
-		next =3D drbr_dequeue(dev, ring->br);
-	} else
-		next =3D m;
-
+	}
 	/* Process the queue */
-	while (next !=3D NULL) {
+	while ((next =3D drbr_peek(ifp, txr->br)) !=3D NULL) {
+		snext =3D next;
 		if ((err =3D mlx4_en_xmit(dev, tx_ind, &next)) !=3D 0) {
-			if (next !=3D NULL)
-				err =3D drbr_enqueue(dev, ring->br, =
next);
+			if (next =3D=3D NULL) {
+				drbr_advance(ifp, txr->br);
+			} else {
+				drbr_putback(ifp, txr->br, next, snext);
+			}
 			break;
 		}
+		drbr_advance(ifp, txr->br);
 		enqueued++;
 		dev->if_obytes +=3D next->m_pkthdr.len;
 		if (next->m_flags & M_MCAST)
@@ -955,7 +955,6 @@ mlx4_en_transmit_locked(struct ifnet *dev, int tx_
 		ETHER_BPF_MTAP(dev, next);
 		if ((dev->if_drv_flags & IFF_DRV_RUNNING) =3D=3D 0)
 			break;
-		next =3D drbr_dequeue(dev, ring->br);
 	}
=20
 	if (enqueued > 0)

--Apple-Mail=_3460D60E-1D83-4AEB-A9DF-11C2A6881F5A
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
	charset=windows-1252


On Feb 5, 2013, at 6:49 AM, Randall Stewart wrote:

> John:
>=20
> Here is an updated patch, per your suggestions. Note that I also
> expanded and the only driver that uses these methods I did not touch
> is the cxgb, but thats because I am not really sure it has the =
problem=85 it
> does not quite enqueue the same way (it appears) that the other =
drivers do ;-)
>=20
> R
>=20
> <driver_patch.txt>
> On Feb 5, 2013, at 5:49 AM, Randy Stewart wrote:
>=20
>>=20
>> On Feb 4, 2013, at 3:24 PM, John Baldwin wrote:
>>=20
>>> On Monday, February 04, 2013 12:22:49 pm Randy Stewart wrote:
>>>> All:
>>>>=20
>>>> I have been working with TCP in gigabit networks (igb driver =
actually) and have
>>>> found a very nasty problem with the way the driver is doing its put =
back when
>>>> it fills the out-bound transmit queue.
>>>>=20
>>>> Basically it has taken a packet from the head of the ring buffer, =
and then=20
>>>> realizes it can't fit it into the transmit queue. So it just =
re-enqueue's it
>>>> into the ring buffer. Whats wrong with that? Well most of the time =
there
>>>> are anywhere from 10-50 packets (maybe more) in that ring buffer =
when you are
>>>> operating at full speed (or trying to). This means you will see 10 =
duplicate
>>>> ACKs, do a fast retransmit and cut your cwnd in half.. not very =
nice actually.
>>>>=20
>>>> The patch I have attached makes it so that
>>>>=20
>>>> 1) There are ways to swap back.
>>>> 2) Use the peek in the ring buffer and only
>>>> dequeue the packet if we put it into the transmit ring
>>>> 3) If something goes wrong and the transmit frees the packet we =
dequeue it.
>>>> 4) If the transmit changed it (defrag etc) then swap out the new =
mbuf that
>>>> has taken its place.
>>>>=20
>>>> I have fixed the four intel drivers that had this systemic issue, =
but there
>>>> are still more to fix.
>>>>=20
>>>> Comments/review .. rotten egg's etc.. would be most welcome before
>>>> I commit this..
>>>=20
>>> Does this only happen in drivers that use buffering?
>>=20
>> Yep, there are a lot of drivers that *do not* use the drbr_xxxx() =
functions and
>> for those they do the IFQ_DRV_PREPEND().. its only the newer drivers =
like the
>> intel 1Gig and 10Gig ones that seem effected
>>=20
>> Also effected are :
>>=20
>> bxe
>> cxgb
>> oce
>> en
>>=20
>> I have not fixed those yet.
>>=20
>>> I seem to recall that
>>> drivers using IFQ would just stuff the packet at the head of the IFQ =
via
>>> IFQ_DRV_PREPEND() in this case so it is still the next packet to =
transmit.
>>> See, for example, this bit in dc_start_locked():
>>>=20
>>> 	for (queued =3D 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd); ) {
>>> 		/*
>>> 		 * If there's no way we can send any packets, return =
now.
>>> 		 */
>>> 		if (sc->dc_cdata.dc_tx_cnt > DC_TX_LIST_CNT - =
DC_TX_LIST_RSVD) {
>>> 			ifp->if_drv_flags |=3D IFF_DRV_OACTIVE;
>>> 			break;
>>> 		}
>>> 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
>>> 		if (m_head =3D=3D NULL)
>>> 			break;
>>>=20
>>> 		if (dc_encap(sc, &m_head)) {
>>> 			if (m_head =3D=3D NULL)
>>> 				break;
>>> 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
>>> 			ifp->if_drv_flags |=3D IFF_DRV_OACTIVE;
>>> 			break;
>>> 		}
>>>=20
>>> It sounds like drbr/buf_ring just don't handle this case correctly?  =
It
>>> seems a shame to have to duplicate so much code in the various =
drivers to
>>> fix this, but that seems to be par for the course when using =
buf_ring. :(
>>> (buggy in edge cases and lots of duplicated code that is).
>>=20
>>> Also, doing the drbr_swap() just so that drbr_dequeue() returns what =
you
>>> just swapped in seems... odd.  It seems that it would be nicer =
instead
>>> to have some sort of drbr_peek() / drbr_advance() where the latter =
just
>>> skips over whatever the current head is?  Then you could have =
something
>>> like:
>>>=20
>>> 	while ((next =3D drbr_peek()) !=3D NULL) {
>>> 		if (!foo_encap(&next)) {
>>> 			if (next =3D=3D NULL)
>>> 				drbr_advance();
>>> 			break;
>>> 		}
>>> 		drbr_advance();
>>> 	}
>>>=20
>>=20
>> That was what I originally did (without the rename), but there is a =
sure crash waiting in that.
>> The only difference from what I originally had was just =
drbr_dequeue().. but
>> I was being a bit lazy and not wanting to add yet another function to =
the=20
>> drbr_xxxx code since essential it would be a clone of drbr_dequeue() =
without
>> returning the mbuf.
>>=20
>> The crash potential here is in that foo_encap(&next) often times will =
return
>> a different mbuf (at least in the igb driver it does). I believe its =
due
>> to either the m_pullup() which could change the lead mbuf you want
>> in the drbr_ring, or the m_defrag all within igb_xmit. Thus you have
>> to track what comes back from the !foo_encap() call and compare it to=20=

>> see if you must swap it out.=20
>>=20
>>=20
>>> I guess the sticky widget is the case of ATLQ as you need to dequeue =
the
>>> packet always in the ALTQ case and put it back if the encap fails.
>>=20
>> Yeah ALTQ is ugly and IMO we need to re-write it anyway.. maybe =
re-think
>> this whole layer ;-0
>>=20
>>> For
>>> your patch it's not clear to me how that works.  It seems that if =
the
>>> encap routine frees the mbuf you try to dereference a freed pointer =
when
>>> you call drbr_dequeue().
>>=20
>> Hmm you are right.. I forgot how we keep those using the mbuf =
itself...
>>=20
>>> I really think you will instead need some sort
>>> of 'drbr_putback()' and have 'drbr_peek()' dequeue in the ALTQ case =
and
>>> use 'drbr_putback()' to put it back (PREPEND) in the ALTQ case.
>>=20
>> We could do that but drbr_putback() would probably need both the old
>> and new pointers and then we could make it do the ring_swap() to put
>> the right mbuf in place..
>>=20
>> Let me go explore that and come up with a better patch ;-)
>>=20
>> R
>>=20
>>=20
>>>=20
>>> --=20
>>> John Baldwin
>>> _______________________________________________
>>> freebsd-net@freebsd.org mailing list
>>> http://lists.freebsd.org/mailman/listinfo/freebsd-net
>>> To unsubscribe, send any mail to =
"freebsd-net-unsubscribe@freebsd.org"
>>>=20
>>=20
>> -----
>> Randall Stewart
>> randall@lakerest.net
>>=20
>>=20
>>=20
>>=20
>> _______________________________________________
>> freebsd-net@freebsd.org mailing list
>> http://lists.freebsd.org/mailman/listinfo/freebsd-net
>> To unsubscribe, send any mail to =
"freebsd-net-unsubscribe@freebsd.org"
>>=20
>=20
> ------------------------------
> Randall Stewart
> 803-317-4952 (cell)
>=20
> _______________________________________________
> freebsd-net@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-net
> To unsubscribe, send any mail to "freebsd-net-unsubscribe@freebsd.org"

------------------------------
Randall Stewart
803-317-4952 (cell)


--Apple-Mail=_3460D60E-1D83-4AEB-A9DF-11C2A6881F5A--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?B02D5035-650B-4958-85E9-B6D984E720D5>