Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 21 Aug 2011 16:27:59 +0000 (UTC)
From:      Adrian Chadd <adrian@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r225061 - user/adrian/if_ath_tx/sys/dev/ath
Message-ID:  <201108211627.p7LGRx2K011017@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: adrian
Date: Sun Aug 21 16:27:59 2011
New Revision: 225061
URL: http://svn.freebsd.org/changeset/base/225061

Log:
  Begin adding some support framework needed to try and get a handle on
  aggregation throughput.
  
  The main problem at the present moment is that - at least for userspace
  applications (ie iperf) generating traffic - the scheduler is called
  too often (and likely calling taskqueue_enabled on each queued frame
  is .. bad, I'll deal with that mistake of mine later) and thus the
  software queue isn't "large" enough for aggregates to be formed.
  This is noticable for UDP, where over 80% of the frames are sent
  as single frames rather than being aggregated, limiting performance
  on my MIPS boards to < 30mbit.
  
  The eventual aim is to only queue enough frames to the hardware TX
  queue to keep the MAC busy sending said frames, rather than filling
  its TXQ with as many frames as possible. As long as packets are
  queued faster than individual frames are scheduled, the software
  queue will back up and give the aggregation logic something to work
  with.
  
  * Add a couple of schedule calls which kick along the TID scheduling
    after a completion handler has occured. These aren't strictly needed
    here (as the call is only needed when frames are added onto the TID
    queue) but they'll be needed later on when the TID scheduling is
    delayed to "encourage" aggregates.
  
  * Add hwq_depth to the TID state, which tracks how many buffers are
    queued to the hardware. Aggregate buffers are (for now) counted as
    a single buffer. Since completion functions can (unfortunately)
    be called from both the ath task and when flushing/draining a queue
    (ie, from various other contexts), lock access to it.
  
  * modify the aggregate queue function to limit scheduling aggregate
    frames based on how many hardware-queued frames from this TID are
    queued. This may flip back to being based on the hardware queue
    after some more testing.

Modified:
  user/adrian/if_ath_tx/sys/dev/ath/if_ath_tx.c
  user/adrian/if_ath_tx/sys/dev/ath/if_ath_tx.h
  user/adrian/if_ath_tx/sys/dev/ath/if_athvar.h

Modified: user/adrian/if_ath_tx/sys/dev/ath/if_ath_tx.c
==============================================================================
--- user/adrian/if_ath_tx/sys/dev/ath/if_ath_tx.c	Sun Aug 21 16:11:57 2011	(r225060)
+++ user/adrian/if_ath_tx/sys/dev/ath/if_ath_tx.c	Sun Aug 21 16:27:59 2011	(r225061)
@@ -431,8 +431,6 @@ ath_tx_setds_11n(struct ath_softc *sc, s
 	    bf_first->bf_state.bfs_ctsrate,
 	    bf_first->bf_state.bfs_ctsduration);
 
-
-
 	/*
 	 * Setup the last descriptor in the list.
 	 * bf_prev points to the last; bf is NULL here.
@@ -1957,7 +1955,6 @@ ath_tx_swq(struct ath_softc *sc, struct 
 	ATH_TXQ_INSERT_TAIL(atid, bf, bf_list);
 	ATH_TXQ_UNLOCK(atid);
 
-	/* Mark the given tid as having packets to dequeue */
 	ATH_TXQ_LOCK(txq);
 	ath_tx_tid_sched(sc, an, tid);
 	ATH_TXQ_UNLOCK(txq);
@@ -2001,6 +1998,7 @@ ath_tx_tid_init(struct ath_softc *sc, st
 		atid->baw_head = atid->baw_tail = 0;
 		atid->paused = 0;
 		atid->sched = 0;
+		atid->hwq_depth = 0;
 		atid->cleanup_inprogress = 0;
 		if (i == IEEE80211_NONQOS_TID)
 			atid->ac = WME_AC_BE;
@@ -2180,8 +2178,21 @@ ath_tx_tid_cleanup(struct ath_softc *sc,
 void
 ath_tx_normal_comp(struct ath_softc *sc, struct ath_buf *bf, int fail)
 {
-	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: bf=%p: fail=%d\n", __func__,
-	    bf, fail);
+	struct ieee80211_node *ni = bf->bf_node;
+	struct ath_node *an = ATH_NODE(ni);
+	int tid = bf->bf_state.bfs_tid;
+	struct ath_tid *atid = &an->an_tid[tid];
+
+	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: bf=%p: fail=%d, hwq_depth now %d\n",
+	    __func__, bf, fail, atid->hwq_depth - 1);
+
+	ATH_TXQ_LOCK(atid);
+	atid->hwq_depth--;
+	if (atid->hwq_depth < 0)
+		device_printf(sc->sc_dev, "%s: hwq_depth < 0: %d\n",
+		    __func__, atid->hwq_depth);
+	ATH_TXQ_UNLOCK(atid);
+
 	ath_tx_default_comp(sc, bf, fail);
 }
 
@@ -2384,8 +2395,13 @@ ath_tx_aggr_retry_unaggr(struct ath_soft
 		}
 #endif
 
+		ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
+		ath_tx_tid_sched(sc, an, atid->tid);
+		ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
+
 		/* Free buffer, bf is free after this call */
 		ath_tx_default_comp(sc, bf, 0);
+
 		return;
 	}
 
@@ -2542,9 +2558,6 @@ ath_tx_comp_aggr_error(struct ath_softc 
 	}
 	ATH_TXQ_UNLOCK(tid);
 
-	/*
-	 * Kick the queue
-	 */
 	ATH_TXQ_LOCK(txq);
 	ath_tx_tid_sched(sc, an, tid->tid);
 	ATH_TXQ_UNLOCK(txq);
@@ -2611,7 +2624,15 @@ ath_tx_aggr_comp_aggr(struct ath_softc *
 	struct ath_txq *txq = sc->sc_ac2q[atid->ac];
 	int np = 0;
 
-	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: called\n", __func__);
+	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: called; hwq_depth=%d\n",
+	    __func__, atid->hwq_depth);
+
+	ATH_TXQ_LOCK(atid);
+	atid->hwq_depth--;
+	if (atid->hwq_depth < 0)
+		device_printf(sc->sc_dev, "%s: hwq_depth < 0: %d\n",
+		    __func__, atid->hwq_depth);
+	ATH_TXQ_UNLOCK(atid);
 
 	/*
 	 * Punt cleanup to the relevant function, not our problem now
@@ -2728,10 +2749,6 @@ ath_tx_aggr_comp_aggr(struct ath_softc *
 	}
 	ATH_TXQ_UNLOCK(atid);
 
-	/*
-	 * Kick the queue if it needs it
-	 * XXX too aggressive?
-	 */
 	ATH_TXQ_LOCK(txq);
 	ath_tx_tid_sched(sc, an, atid->tid);
 	ATH_TXQ_UNLOCK(txq);
@@ -2758,8 +2775,20 @@ ath_tx_aggr_comp_unaggr(struct ath_softc
 
 	if (tid == IEEE80211_NONQOS_TID)
 		device_printf(sc->sc_dev, "%s: TID=16!\n", __func__);
-	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: bf=%p: tid=%d\n",
-	    __func__, bf, bf->bf_state.bfs_tid);
+
+	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: bf=%p: tid=%d, hwq_depth=%d\n",
+	    __func__, bf, bf->bf_state.bfs_tid, atid->hwq_depth);
+
+	ATH_TXQ_LOCK(atid);
+	atid->hwq_depth--;
+	if (atid->hwq_depth < 0)
+		device_printf(sc->sc_dev, "%s: hwq_depth < 0: %d\n",
+		    __func__, atid->hwq_depth);
+	ATH_TXQ_UNLOCK(atid);
+
+	ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
+	ath_tx_tid_sched(sc, an, atid->tid);
+	ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
 
 	/*
 	 * If a cleanup is in progress, punt to comp_cleanup;
@@ -2957,19 +2986,22 @@ ath_tx_tid_hw_queue_aggr(struct ath_soft
 		/* Punt to txq */
 		ATH_TXQ_LOCK(txq);
 		ath_tx_handoff(sc, txq, bf);
-		if (txq->axq_depth > ATH_AGGR_MIN_QDEPTH) {
-			ATH_TXQ_UNLOCK(txq);
+		ATH_TXQ_UNLOCK(txq);
+
+		/* Track outstanding buffer count to hardware */
+		/* aggregates are "one" buffer */
+		ATH_TXQ_LOCK(atid);
+		atid->hwq_depth++;
+		if (atid->hwq_depth >= ATH_AGGR_SCHED_LOW) {
+			ATH_TXQ_UNLOCK(atid);
 			break;
 		}
-		ATH_TXQ_UNLOCK(txq);
+		ATH_TXQ_UNLOCK(atid);
 
 		/*
 		 * Break out if ath_tx_form_aggr() indicated
 		 * there can't be any further progress (eg BAW is full.)
 		 * Checking for an empty txq is done above.
-		 *
-		 * Later on, enforce ATH_AGGR_MIN_QDEPTH to try and
-		 * keep aggregation flowing.
 		 */
 		if (status == ATH_AGGR_BAW_CLOSED)
 			break;
@@ -3033,6 +3065,12 @@ ath_tx_tid_hw_queue_norm(struct ath_soft
 		ath_tx_chaindesclist(sc, bf);
 		ath_tx_set_ratectrl(sc, ni, bf);
 
+		/* Track outstanding buffer count to hardware */
+		/* aggregates are "one" buffer */
+		ATH_TXQ_LOCK(atid);
+		atid->hwq_depth++;
+		ATH_TXQ_UNLOCK(atid);
+
 		/* Punt to hardware or software txq */
 		ATH_TXQ_LOCK(txq);
 		ath_tx_handoff(sc, txq, bf);
@@ -3061,7 +3099,7 @@ ath_txq_sched(struct ath_softc *sc, stru
 	 * Don't schedule if the hardware queue is busy.
 	 * This (hopefully) gives some more time to aggregate
 	 * some packets in the aggregation queue.
-	 * XXX TODO: only do this based on AMPDU buffersj, not
+	 * XXX TODO: only do this based on AMPDU buffers, not
 	 * XXX normal ones.
 	 */
 	ATH_TXQ_LOCK(txq);
@@ -3105,7 +3143,9 @@ ath_txq_sched(struct ath_softc *sc, stru
 		ATH_TXQ_LOCK(txq);
 		if (atid->axq_depth == 0)
 			ath_tx_tid_unsched(sc, atid->an, atid->tid);
-		if (txq->axq_depth > ATH_AGGR_MIN_QDEPTH) {
+
+		/* Give the software queue time to aggregate more packets */
+		if (txq->axq_depth >= ATH_AGGR_MIN_QDEPTH) {
 			//ATH_TXQ_UNLOCK(txq);
 			break;
 		}

Modified: user/adrian/if_ath_tx/sys/dev/ath/if_ath_tx.h
==============================================================================
--- user/adrian/if_ath_tx/sys/dev/ath/if_ath_tx.h	Sun Aug 21 16:11:57 2011	(r225060)
+++ user/adrian/if_ath_tx/sys/dev/ath/if_ath_tx.h	Sun Aug 21 16:27:59 2011	(r225061)
@@ -49,6 +49,19 @@
 #define	ATH_AGGR_MIN_QDEPTH		2
 
 /*
+ * Watermark for scheduling TIDs in order to maximise aggregation.
+ *
+ * If hwq_depth is greater than this, don't schedule the TID
+ * for packet scheduling - the hardware is already busy servicing
+ * this TID.
+ *
+ * If hwq_depth is less than this, schedule the TID for packet
+ * scheduling in the completion handler.
+ */
+#define	ATH_AGGR_SCHED_HIGH		4
+#define	ATH_AGGR_SCHED_LOW		2
+
+/*
  * return whether a bit at index _n in bitmap _bm is set
  * _sz is the size of the bitmap
  */

Modified: user/adrian/if_ath_tx/sys/dev/ath/if_athvar.h
==============================================================================
--- user/adrian/if_ath_tx/sys/dev/ath/if_athvar.h	Sun Aug 21 16:11:57 2011	(r225060)
+++ user/adrian/if_ath_tx/sys/dev/ath/if_athvar.h	Sun Aug 21 16:27:59 2011	(r225061)
@@ -98,6 +98,7 @@ struct ath_tid {
 	struct ath_node		*an;		/* pointer to parent */
 	int			tid;		/* tid */
 	int			ac;		/* which AC gets this trafic */
+	int			hwq_depth;	/* how many buffers are on HW */
 
 	/*
 	 * Entry on the ath_txq; when there's traffic



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201108211627.p7LGRx2K011017>