Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 18 Oct 2015 00:59:28 +0000 (UTC)
From:      Adrian Chadd <adrian@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r289476 - head/sys/mips/atheros
Message-ID:  <201510180059.t9I0xSrJ053943@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: adrian
Date: Sun Oct 18 00:59:28 2015
New Revision: 289476
URL: https://svnweb.freebsd.org/changeset/base/289476

Log:
  if_arge: fix up TX workaround; add TX/RX requirements for busdma; add stats
  
  The early ethernet MACs (I think AR71xx and AR913x) require that both
  TX and RX require 4-byte alignment for all packets.
  
  The later MACs have started relaxing the requirements.
  
  For now, the 1-byte TX and 1-byte RX alignment requirements are only for
  the QCA955x SoCs.  I'll add in the relaxed requirements as I review the
  datasheets and do testing.
  
  * Add a hardware flags field and 1-byte / 4-byte TX/RX alignment.
  * .. defaulting to 4-byte TX and 4-byte RX alignment.
  * Only enforce the TX alignment fixup if the hardware requires a 4-byte
    TX alignment.  This avoids a call to m_defrag().
  * Add counters for various situations for further debugging.
  * Set the 1-byte and 4-byte busdma alignment requirement when
    the tag is created.
  
  This improves the straight bridging performance from 130mbit/sec
  to 180mbit/sec, purely by removing the need for TX path bounce buffers.
  
  The main performance issue is the RX alignment requirement and any RX
  bounce buffering that's occuring.  (In a local test, removing the RX
  fixup path and just aligning buffers raises the performance to above
  400mbit/sec.
  
  In theory it's a no-op for SoCs before the QCA955x.
  
  Tested:
  
  * QCA9558 SoC in AP135 board, using software bridging between arge0/arge1.

Modified:
  head/sys/mips/atheros/if_arge.c
  head/sys/mips/atheros/if_argevar.h

Modified: head/sys/mips/atheros/if_arge.c
==============================================================================
--- head/sys/mips/atheros/if_arge.c	Sat Oct 17 22:41:30 2015	(r289475)
+++ head/sys/mips/atheros/if_arge.c	Sun Oct 18 00:59:28 2015	(r289476)
@@ -298,6 +298,29 @@ arge_attach_sysctl(device_t dev)
 		"tx_pkts_unaligned", CTLFLAG_RW, &sc->stats.tx_pkts_unaligned,
 		0, "number of TX unaligned packets");
 
+	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+		"tx_pkts_unaligned_start", CTLFLAG_RW, &sc->stats.tx_pkts_unaligned_start,
+		0, "number of TX unaligned packets (start)");
+
+	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+		"tx_pkts_unaligned_len", CTLFLAG_RW, &sc->stats.tx_pkts_unaligned_len,
+		0, "number of TX unaligned packets (len)");
+
+	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+		"tx_pkts_nosegs", CTLFLAG_RW, &sc->stats.tx_pkts_nosegs,
+		0, "number of TX packets fail with no ring slots avail");
+
+	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+		"intr_stray_filter", CTLFLAG_RW, &sc->stats.intr_stray,
+		0, "number of stray interrupts (filter)");
+
+	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+		"intr_stray_intr", CTLFLAG_RW, &sc->stats.intr_stray2,
+		0, "number of stray interrupts (intr)");
+
+	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+		"intr_ok", CTLFLAG_RW, &sc->stats.intr_ok,
+		0, "number of OK interrupts");
 #ifdef	ARGE_DEBUG
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_prod",
 	    CTLFLAG_RW, &sc->arge_cdata.arge_tx_prod, 0, "");
@@ -627,6 +650,22 @@ arge_attach(device_t dev)
 	}
 
 	/*
+	 * Hardware workarounds.
+	 */
+	switch (ar71xx_soc) {
+	case AR71XX_SOC_QCA9556:
+	case AR71XX_SOC_QCA9558:
+		/* Arbitrary alignment */
+		sc->arge_hw_flags |= ARGE_HW_FLG_TX_DESC_ALIGN_1BYTE;
+		sc->arge_hw_flags |= ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE;
+		break;
+	default:
+		sc->arge_hw_flags |= ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE;
+		sc->arge_hw_flags |= ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE;
+		break;
+	}
+
+	/*
 	 * Some units (eg the TP-Link WR-1043ND) do not have a convenient
 	 * EEPROM location to read the ethernet MAC address from.
 	 * OpenWRT simply snaffles it from a fixed location.
@@ -825,6 +864,9 @@ arge_attach(device_t dev)
 	ARGE_WRITE(sc, AR71XX_MAC_FIFO_CFG0,
 	    FIFO_CFG0_ALL << FIFO_CFG0_ENABLE_SHIFT);
 
+	/*
+	 * SoC specific bits.
+	 */
 	switch (ar71xx_soc) {
 		case AR71XX_SOC_AR7240:
 		case AR71XX_SOC_AR7241:
@@ -1351,24 +1393,35 @@ arge_init_locked(struct arge_softc *sc)
  * Return whether the mbuf chain is correctly aligned
  * for the arge TX engine.
  *
- * The TX engine requires each fragment to be aligned to a
- * 4 byte boundary and the size of each fragment except
- * the last to be a multiple of 4 bytes.
+ * All the MACs have a length requirement: any non-final
+ * fragment (ie, descriptor with MORE bit set) needs to have
+ * a length divisible by 4.
  *
- * XXX TODO: I believe this is only a bug on the AR71xx and
- * AR913x MACs. The later MACs (AR724x and later) does not
- * need this workaround.
+ * The AR71xx, AR913x require the start address also be
+ * DWORD aligned.  The later MACs don't.
  */
 static int
-arge_mbuf_chain_is_tx_aligned(struct mbuf *m0)
+arge_mbuf_chain_is_tx_aligned(struct arge_softc *sc, struct mbuf *m0)
 {
 	struct mbuf *m;
 
 	for (m = m0; m != NULL; m = m->m_next) {
-		if((mtod(m, intptr_t) & 3) != 0)
+		/*
+		 * Only do this for chips that require it.
+		 */
+		if ((sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE) &&
+		    (mtod(m, intptr_t) & 3) != 0) {
+			sc->stats.tx_pkts_unaligned_start++;
 			return 0;
-		if ((m->m_next != NULL) && ((m->m_len & 0x03) != 0))
+		}
+
+		/*
+		 * All chips have this requirement for length.
+		 */
+		if ((m->m_next != NULL) && ((m->m_len & 0x03) != 0)) {
+			sc->stats.tx_pkts_unaligned_len++;
 			return 0;
+		}
 	}
 	return 1;
 }
@@ -1389,15 +1442,10 @@ arge_encap(struct arge_softc *sc, struct
 	ARGE_LOCK_ASSERT(sc);
 
 	/*
-	 * Fix mbuf chain, all fragments should be 4 bytes aligned and
-	 * even 4 bytes
-	 *
-	 * XXX TODO: I believe this is only a bug on the AR71xx and
-	 * AR913x MACs. The later MACs (AR724x and later) does not
-	 * need this workaround.
+	 * Fix mbuf chain based on hardware alignment constraints.
 	 */
 	m = *m_head;
-	if (! arge_mbuf_chain_is_tx_aligned(m)) {
+	if (! arge_mbuf_chain_is_tx_aligned(sc, m)) {
 		sc->stats.tx_pkts_unaligned++;
 		m = m_defrag(*m_head, M_NOWAIT);
 		if (m == NULL) {
@@ -1427,6 +1475,7 @@ arge_encap(struct arge_softc *sc, struct
 	/* Check number of available descriptors. */
 	if (sc->arge_cdata.arge_tx_cnt + nsegs >= (ARGE_TX_RING_COUNT - 1)) {
 		bus_dmamap_unload(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap);
+		sc->stats.tx_pkts_nosegs++;
 		return (ENOBUFS);
 	}
 
@@ -1444,7 +1493,9 @@ arge_encap(struct arge_softc *sc, struct
 		desc = &sc->arge_rdata.arge_tx_ring[prod];
 		desc->packet_ctrl = ARGE_DMASIZE(txsegs[i].ds_len);
 
-		if (txsegs[i].ds_addr & 3)
+		/* XXX Note: only relevant for older MACs; but check length! */
+		if ((sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE) &&
+		    (txsegs[i].ds_addr & 3))
 			panic("TX packet address unaligned\n");
 
 		desc->packet_addr = txsegs[i].ds_addr;
@@ -1715,6 +1766,16 @@ arge_dma_alloc(struct arge_softc *sc)
 	struct arge_txdesc	*txd;
 	struct arge_rxdesc	*rxd;
 	int			error, i;
+	int			arge_tx_align, arge_rx_align;
+
+	/* Assume 4 byte alignment by default */
+	arge_tx_align = 4;
+	arge_rx_align = 4;
+
+	if (sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_1BYTE)
+		arge_tx_align = 1;
+	if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE)
+		arge_rx_align = 1;
 
 	/* Create parent DMA tag. */
 	error = bus_dma_tag_create(
@@ -1775,7 +1836,7 @@ arge_dma_alloc(struct arge_softc *sc)
 	/* Create tag for Tx buffers. */
 	error = bus_dma_tag_create(
 	    sc->arge_cdata.arge_parent_tag,	/* parent */
-	    sizeof(uint32_t), 0,	/* alignment, boundary */
+	    arge_tx_align, 0,		/* alignment, boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
@@ -1793,7 +1854,7 @@ arge_dma_alloc(struct arge_softc *sc)
 	/* Create tag for Rx buffers. */
 	error = bus_dma_tag_create(
 	    sc->arge_cdata.arge_parent_tag,	/* parent */
-	    ARGE_RX_ALIGN, 0,		/* alignment, boundary */
+	    arge_rx_align, 0,		/* alignment, boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
@@ -2108,6 +2169,11 @@ arge_newbuf(struct arge_softc *sc, int i
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_len = m->m_pkthdr.len = MCLBYTES;
+
+	/*
+	 * Add extra space to "adjust" (copy) the packet back to be aligned
+	 * for purposes of IPv4/IPv6 header contents.
+	 */
 	m_adj(m, sizeof(uint64_t));
 
 	if (bus_dmamap_load_mbuf_sg(sc->arge_cdata.arge_rx_tag,
@@ -2126,7 +2192,8 @@ arge_newbuf(struct arge_softc *sc, int i
 	sc->arge_cdata.arge_rx_sparemap = map;
 	rxd->rx_m = m;
 	desc = rxd->desc;
-	if (segs[0].ds_addr & 3)
+	if ((sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE) &&
+	    segs[0].ds_addr & 3)
 		panic("RX packet address unaligned");
 	desc->packet_addr = segs[0].ds_addr;
 	desc->packet_ctrl = ARGE_DESC_EMPTY | ARGE_DMASIZE(segs[0].ds_len);
@@ -2331,10 +2398,12 @@ arge_intr_filter(void *arg)
 	if (status & DMA_INTR_ALL) {
 		sc->arge_intr_status |= status;
 		ARGE_WRITE(sc, AR71XX_DMA_INTR, 0);
+		sc->stats.intr_ok++;
 		return (FILTER_SCHEDULE_THREAD);
 	}
 
 	sc->arge_intr_status = 0;
+	sc->stats.intr_stray++;
 	return (FILTER_STRAY);
 }
 
@@ -2355,8 +2424,10 @@ arge_intr(void *arg)
 	/*
 	 * Is it our interrupt at all?
 	 */
-	if (status == 0)
+	if (status == 0) {
+		sc->stats.intr_stray2++;
 		return;
+	}
 
 	if (status & DMA_INTR_RX_BUS_ERROR) {
 		ARGE_WRITE(sc, AR71XX_DMA_RX_STATUS, DMA_RX_STATUS_BUS_ERROR);

Modified: head/sys/mips/atheros/if_argevar.h
==============================================================================
--- head/sys/mips/atheros/if_argevar.h	Sat Oct 17 22:41:30 2015	(r289475)
+++ head/sys/mips/atheros/if_argevar.h	Sun Oct 18 00:59:28 2015	(r289476)
@@ -37,7 +37,10 @@
 #define	ARGE_TX_DMA_SIZE	ARGE_TX_RING_COUNT * sizeof(struct arge_desc)
 #define	ARGE_MAXFRAGS		8
 #define ARGE_RING_ALIGN		sizeof(struct arge_desc)
-#define ARGE_RX_ALIGN		sizeof(uint32_t)
+#define ARGE_RX_ALIGN_4BYTE	sizeof(uint32_t)
+#define ARGE_RX_ALIGN_1BYTE	sizeof(char)
+#define ARGE_TX_ALIGN_4BYTE	sizeof(uint32_t)
+#define ARGE_TX_ALIGN_1BYTE	sizeof(char)
 #define ARGE_MAXFRAGS		8
 #define	ARGE_TX_RING_ADDR(sc, i)	\
     ((sc)->arge_rdata.arge_tx_ring_paddr + sizeof(struct arge_desc) * (i))
@@ -149,6 +152,22 @@ struct arge_pll_data {
 	uint32_t pll_1000;
 };
 
+/*
+ * Hardware specific behaviours.
+ */
+
+/*
+ * Older chips support 4 byte only transmit and receive
+ * addresses.
+ *
+ * Later chips support arbitrary TX and later later,
+ * arbitrary RX addresses.
+ */
+#define	ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE	0x00000001
+#define	ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE	0x00000002
+#define	ARGE_HW_FLG_TX_DESC_ALIGN_1BYTE	0x00000004
+#define	ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE	0x00000008
+
 struct arge_softc {
 	struct ifnet		*arge_ifp;	/* interface info */
 	device_t		arge_dev;
@@ -180,13 +199,20 @@ struct arge_softc {
 	uint32_t		arge_intr_status;
 	int			arge_mac_unit;
 	int			arge_if_flags;
+	uint32_t		arge_hw_flags;
 	uint32_t		arge_debug;
 	uint32_t		arge_mdiofreq;
 	struct {
 		uint32_t	tx_pkts_unaligned;
+		uint32_t	tx_pkts_unaligned_start;
+		uint32_t	tx_pkts_unaligned_len;
+		uint32_t	tx_pkts_nosegs;
 		uint32_t	tx_pkts_aligned;
 		uint32_t	rx_overflow;
 		uint32_t	tx_underflow;
+		uint32_t	intr_stray;
+		uint32_t	intr_stray2;
+		uint32_t	intr_ok;
 	} stats;
 };
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201510180059.t9I0xSrJ053943>