Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 6 Dec 2014 00:13:56 +0000 (UTC)
From:      Navdeep Parhar <np@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r275539 - head/sys/dev/cxgbe
Message-ID:  <201412060013.sB60Dujd029084@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: np
Date: Sat Dec  6 00:13:56 2014
New Revision: 275539
URL: https://svnweb.freebsd.org/changeset/base/275539

Log:
  cxgbe(4): Allow for different pad and pack boundaries for different
  adapters.  Set the pack boundary for T5 cards to be the same as the
  PCIe max payload size.  The chip likes it this way.
  
  In this revision the driver allocate rx buffers that align on both
  boundaries.  This is not a strict requirement and a followup commit
  will switch the driver to a more relaxed allocation strategy.
  
  MFC after:	2 weeks

Modified:
  head/sys/dev/cxgbe/adapter.h
  head/sys/dev/cxgbe/t4_sge.c

Modified: head/sys/dev/cxgbe/adapter.h
==============================================================================
--- head/sys/dev/cxgbe/adapter.h	Sat Dec  6 00:12:58 2014	(r275538)
+++ head/sys/dev/cxgbe/adapter.h	Sat Dec  6 00:13:56 2014	(r275539)
@@ -148,7 +148,7 @@ enum {
 #else
 	SW_ZONE_SIZES = 3,	/* cluster, jumbo9k, jumbo16k */
 #endif
-	CL_METADATA_SIZE = CACHE_LINE_SIZE,
+	CL_METADATA_SIZE = 256,	/* same as MSIZE for now */
 
 	SGE_MAX_WR_NDESC = SGE_MAX_WR_LEN / EQ_ESIZE, /* max WR size in desc */
 	TX_SGL_SEGS = 36,
@@ -695,6 +695,7 @@ struct sge {
 	struct sge_iq **iqmap;	/* iq->cntxt_id to iq mapping */
 	struct sge_eq **eqmap;	/* eq->cntxt_id to eq mapping */
 
+	int pad_boundary;
 	int pack_boundary;
 	int8_t safe_hwidx1;	/* may not have room for metadata */
 	int8_t safe_hwidx2;	/* with room for metadata and maybe more */

Modified: head/sys/dev/cxgbe/t4_sge.c
==============================================================================
--- head/sys/dev/cxgbe/t4_sge.c	Sat Dec  6 00:12:58 2014	(r275538)
+++ head/sys/dev/cxgbe/t4_sge.c	Sat Dec  6 00:13:56 2014	(r275539)
@@ -120,19 +120,10 @@ TUNABLE_INT("hw.cxgbe.buffer_packing", &
 /*
  * Start next frame in a packed buffer at this boundary.
  * -1: driver should figure out a good value.
- * T4:
- * ---
- * if fl_pad != 0
- * 	value specified here will be overridden by fl_pad.
- * else
- * 	power of 2 from 32 to 4096 (both inclusive) is a valid value here.
- * T5:
- * ---
- * 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value.
+ * T4: driver will ignore this and use the same value as fl_pad above.
+ * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value.
  */
 static int fl_pack = -1;
-static int t4_fl_pack;
-static int t5_fl_pack;
 TUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack);
 
 /*
@@ -175,8 +166,7 @@ static int service_iq(struct sge_iq *, i
 static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t);
 static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *);
 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int);
-static inline void init_fl(struct adapter *, struct sge_fl *, int, int, int,
-    char *);
+static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *);
 static inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t,
     char *);
 static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
@@ -264,15 +254,6 @@ static counter_u64_t extfree_rels;
 void
 t4_sge_modload(void)
 {
-	int pad;
-
-	/* set pad to a reasonable powerof2 between 16 and 4096 (inclusive) */
-#if defined(__i386__) || defined(__amd64__)
-	pad = max(cpu_clflush_line_size, 16);
-#else
-	pad = max(CACHE_LINE_SIZE, 16);
-#endif
-	pad = min(pad, 4096);
 
 	if (fl_pktshift < 0 || fl_pktshift > 7) {
 		printf("Invalid hw.cxgbe.fl_pktshift value (%d),"
@@ -280,35 +261,6 @@ t4_sge_modload(void)
 		fl_pktshift = 2;
 	}
 
-	if (fl_pad != 0 &&
-	    (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad))) {
-
-		if (fl_pad != -1) {
-			printf("Invalid hw.cxgbe.fl_pad value (%d),"
-			    " using %d instead.\n", fl_pad, max(pad, 32));
-		}
-		fl_pad = max(pad, 32);
-	}
-
-	/*
-	 * T4 has the same pad and pack boundary.  If a pad boundary is set,
-	 * pack boundary must be set to the same value.  Otherwise take the
-	 * specified value or auto-calculate something reasonable.
-	 */
-	if (fl_pad)
-		t4_fl_pack = fl_pad;
-	else if (fl_pack < 32 || fl_pack > 4096 || !powerof2(fl_pack))
-		t4_fl_pack = max(pad, 32);
-	else
-		t4_fl_pack = fl_pack;
-
-	/* T5's pack boundary is independent of the pad boundary. */
-	if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 ||
-	    !powerof2(fl_pack))
-	       t5_fl_pack = max(pad, CACHE_LINE_SIZE);
-	else
-	       t5_fl_pack = fl_pack;
-
 	if (spg_len != 64 && spg_len != 128) {
 		int len;
 
@@ -366,6 +318,71 @@ t4_init_sge_cpl_handlers(struct adapter 
 	t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, t4_handle_fw_rpl);
 }
 
+static inline void
+setup_pad_and_pack_boundaries(struct adapter *sc)
+{
+	uint32_t v, m;
+	int pad, pack;
+
+	pad = fl_pad;
+	if (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad)) {
+		/*
+		 * If there is any chance that we might use buffer packing and
+		 * the chip is a T4, then pick 64 as the pad/pack boundary.  Set
+		 * it to 32 in all other cases.
+		 */
+		pad = is_t4(sc) && buffer_packing ? 64 : 32;
+
+		/*
+		 * For fl_pad = 0 we'll still write a reasonable value to the
+		 * register but all the freelists will opt out of padding.
+		 * We'll complain here only if the user tried to set it to a
+		 * value greater than 0 that was invalid.
+		 */
+		if (fl_pad > 0) {
+			device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value"
+			    " (%d), using %d instead.\n", fl_pad, pad);
+		}
+	}
+	m = V_INGPADBOUNDARY(M_INGPADBOUNDARY);
+	v = V_INGPADBOUNDARY(ilog2(pad) - 5);
+	t4_set_reg_field(sc, A_SGE_CONTROL, m, v);
+
+	if (is_t4(sc)) {
+		if (fl_pack != -1 && fl_pack != pad) {
+			/* Complain but carry on. */
+			device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored,"
+			    " using %d instead.\n", fl_pack, pad);
+		}
+		return;
+	}
+
+	pack = fl_pack;
+	if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 ||
+	    !powerof2(fl_pack)) {
+		pack = max(sc->params.pci.mps, CACHE_LINE_SIZE);
+		MPASS(powerof2(pack));
+		if (pack < 16)
+			pack = 16;
+		if (pack == 32)
+			pack = 64;
+		if (pack > 4096)
+			pack = 4096;
+		if (fl_pack != -1) {
+			device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value"
+			    " (%d), using %d instead.\n", fl_pack, pack);
+		}
+	}
+	m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY);
+	if (pack == 16)
+		v = V_INGPACKBOUNDARY(0);
+	else
+		v = V_INGPACKBOUNDARY(ilog2(pack) - 5);
+
+	MPASS(!is_t4(sc));	/* T4 doesn't have SGE_CONTROL2 */
+	t4_set_reg_field(sc, A_SGE_CONTROL2, m, v);
+}
+
 /*
  * adap->params.vpd.cclk must be set up before this is called.
  */
@@ -398,24 +415,9 @@ t4_tweak_chip_settings(struct adapter *s
 	m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE;
 	v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
 	    V_EGRSTATUSPAGESIZE(spg_len == 128);
-	if (is_t4(sc) && (fl_pad || buffer_packing)) {
-		/* t4_fl_pack has the correct value even when fl_pad = 0 */
-		m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
-		v |= V_INGPADBOUNDARY(ilog2(t4_fl_pack) - 5);
-	} else if (is_t5(sc) && fl_pad) {
-		m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
-		v |= V_INGPADBOUNDARY(ilog2(fl_pad) - 5);
-	}
 	t4_set_reg_field(sc, A_SGE_CONTROL, m, v);
 
-	if (is_t5(sc) && buffer_packing) {
-		m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY);
-		if (t5_fl_pack == 16)
-			v = V_INGPACKBOUNDARY(0);
-		else
-			v = V_INGPACKBOUNDARY(ilog2(t5_fl_pack) - 5);
-		t4_set_reg_field(sc, A_SGE_CONTROL2, m, v);
-	}
+	setup_pad_and_pack_boundaries(sc);
 
 	v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
 	    V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
@@ -486,15 +488,25 @@ t4_tweak_chip_settings(struct adapter *s
 }
 
 /*
- * SGE wants the buffer to be at least 64B and then a multiple of the pad
- * boundary or 16, whichever is greater.
+ * SGE wants the buffer to be at least 64B and then a multiple of 16.  If
+ * padding and packing are enabled, the buffer's start and end need to be
+ * correctly aligned as well.  We'll just make sure that the size is a multiple
+ * of the alignment, it is up to other parts .
  */
 static inline int
-hwsz_ok(int hwsz)
+hwsz_ok(struct adapter *sc, int hwsz)
 {
-	int mask = max(fl_pad, 16) - 1;
+	int align = 16;
+
+	if (fl_pad) {
+		MPASS(sc->sge.pad_boundary > align);
+		align = sc->sge.pad_boundary;
+	}
+	if (buffer_packing && sc->sge.pack_boundary > align)
+		align = sc->sge.pack_boundary;
+	align--;	/* now a mask */
+	return (hwsz >= 64 && (hwsz & align) == 0);
 
-	return (hwsz >= 64 && (hwsz & mask) == 0);
 }
 
 /*
@@ -521,33 +533,22 @@ t4_read_chip_settings(struct adapter *sc
 	m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE;
 	v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
 	    V_EGRSTATUSPAGESIZE(spg_len == 128);
-	if (is_t4(sc) && (fl_pad || buffer_packing)) {
-		m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
-		v |= V_INGPADBOUNDARY(ilog2(t4_fl_pack) - 5);
-	} else if (is_t5(sc) && fl_pad) {
-		m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
-		v |= V_INGPADBOUNDARY(ilog2(fl_pad) - 5);
-	}
 	r = t4_read_reg(sc, A_SGE_CONTROL);
 	if ((r & m) != v) {
 		device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r);
 		rc = EINVAL;
 	}
+	s->pad_boundary = 1 << (G_INGPADBOUNDARY(r) + 5);
 
-	if (is_t5(sc) && buffer_packing) {
-		m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY);
-		if (t5_fl_pack == 16)
-			v = V_INGPACKBOUNDARY(0);
-		else
-			v = V_INGPACKBOUNDARY(ilog2(t5_fl_pack) - 5);
+	if (is_t4(sc))
+		s->pack_boundary = s->pad_boundary;
+	else {
 		r = t4_read_reg(sc, A_SGE_CONTROL2);
-		if ((r & m) != v) {
-			device_printf(sc->dev,
-			    "invalid SGE_CONTROL2(0x%x)\n", r);
-			rc = EINVAL;
-		}
+		if (G_INGPACKBOUNDARY(r) == 0)
+			s->pack_boundary = 16;
+		else
+			s->pack_boundary = 1 << (G_INGPACKBOUNDARY(r) + 5);
 	}
-	s->pack_boundary = is_t4(sc) ? t4_fl_pack : t5_fl_pack;
 
 	v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
 	    V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
@@ -568,13 +569,22 @@ t4_read_chip_settings(struct adapter *sc
 	for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) {
 		r = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i));
 		hwb->size = r;
-		hwb->zidx = hwsz_ok(r) ? -1 : -2;
+		hwb->zidx = hwsz_ok(sc, r) ? -1 : -2;
 		hwb->next = -1;
 	}
 
 	/*
 	 * Create a sorted list in decreasing order of hw buffer sizes (and so
 	 * increasing order of spare area) for each software zone.
+	 *
+	 * If padding is enabled then the start and end of the buffer must align
+	 * to the pad boundary; if packing is enabled then they must align with
+	 * the pack boundary as well.  Allocations from the cluster zones are
+	 * aligned to min(size, 4K), so the buffer starts at that alignment and
+	 * ends at hwb->size alignment.  If mbuf inlining is allowed the
+	 * starting alignment will be reduced to MSIZE and the driver will
+	 * exercise appropriate caution when deciding on the best buffer layout
+	 * to use.
 	 */
 	n = 0;	/* no usable buffer size to begin with */
 	swz = &s->sw_zone_info[0];
@@ -586,6 +596,15 @@ t4_read_chip_settings(struct adapter *sc
 		swz->zone = m_getzone(swz->size);
 		swz->type = m_gettype(swz->size);
 
+		if (swz->size < PAGE_SIZE) {
+			MPASS(powerof2(swz->size));
+			if (fl_pad && (swz->size % sc->sge.pad_boundary != 0))
+				continue;
+			if (buffer_packing &&
+			    (swz->size % sc->sge.pack_boundary != 0))
+				continue;
+		}
+
 		if (swz->size == safest_rx_cluster)
 			safe_swz = swz;
 
@@ -593,6 +612,12 @@ t4_read_chip_settings(struct adapter *sc
 		for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) {
 			if (hwb->zidx != -1 || hwb->size > swz->size)
 				continue;
+#ifdef INVARIANTS
+			if (fl_pad)
+				MPASS(hwb->size % sc->sge.pad_boundary == 0);
+			if (buffer_packing)
+				MPASS(hwb->size % sc->sge.pack_boundary == 0);
+#endif
 			hwb->zidx = i;
 			if (head == -1)
 				head = tail = j;
@@ -640,14 +665,17 @@ t4_read_chip_settings(struct adapter *sc
 			int spare;
 
 			hwb = &s->hw_buf_info[i];
+#ifdef INVARIANTS
+			if (fl_pad)
+				MPASS(hwb->size % sc->sge.pad_boundary == 0);
+			if (buffer_packing)
+				MPASS(hwb->size % sc->sge.pack_boundary == 0);
+#endif
 			spare = safe_swz->size - hwb->size;
-			if (spare < CL_METADATA_SIZE)
-				continue;
-			if (s->safe_hwidx2 == -1 ||
-			    spare == CL_METADATA_SIZE + MSIZE)
+			if (spare >= CL_METADATA_SIZE) {
 				s->safe_hwidx2 = i;
-			if (spare >= CL_METADATA_SIZE + MSIZE)
 				break;
+			}
 		}
 	}
 
@@ -745,17 +773,6 @@ t4_create_dma_tag(struct adapter *sc)
 	return (rc);
 }
 
-static inline int
-enable_buffer_packing(struct adapter *sc)
-{
-
-	if (sc->flags & BUF_PACKING_OK &&
-	    ((is_t5(sc) && buffer_packing) ||	/* 1 or -1 both ok for T5 */
-	    (is_t4(sc) && buffer_packing == 1)))
-		return (1);
-	return (0);
-}
-
 void
 t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx,
     struct sysctl_oid_list *children)
@@ -769,7 +786,7 @@ t4_sge_sysctls(struct adapter *sc, struc
 	    NULL, fl_pktshift, "payload DMA offset in rx buffer (bytes)");
 
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD,
-	    NULL, fl_pad, "payload pad boundary (bytes)");
+	    NULL, sc->sge.pad_boundary, "payload pad boundary (bytes)");
 
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD,
 	    NULL, spg_len, "status page size (bytes)");
@@ -777,10 +794,6 @@ t4_sge_sysctls(struct adapter *sc, struc
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD,
 	    NULL, cong_drop, "congestion drop setting");
 
-	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "buffer_packing", CTLFLAG_RD,
-	    NULL, enable_buffer_packing(sc),
-	    "pack multiple frames in one fl buffer");
-
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD,
 	    NULL, sc->sge.pack_boundary, "payload pack boundary (bytes)");
 }
@@ -958,7 +971,6 @@ mtu_to_max_payload(struct adapter *sc, i
 #ifdef TCP_OFFLOAD
 	}
 #endif
-	payload = roundup2(payload, fl_pad);
 
 	return (payload);
 }
@@ -983,7 +995,7 @@ t4_setup_port_queues(struct port_info *p
 	struct ifnet *ifp = pi->ifp;
 	struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
 	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
-	int maxp, pack, mtu = ifp->if_mtu;
+	int maxp, mtu = ifp->if_mtu;
 
 	/* Interrupt vector to start from (when using multiple vectors) */
 	intr_idx = first_vector(pi);
@@ -994,7 +1006,6 @@ t4_setup_port_queues(struct port_info *p
 	 * b) allocate queue iff it will take direct interrupts.
 	 */
 	maxp = mtu_to_max_payload(sc, mtu, 0);
-	pack = enable_buffer_packing(sc);
 	if (pi->flags & INTR_RXQ) {
 		oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq",
 		    CTLFLAG_RD, NULL, "rx queues");
@@ -1005,7 +1016,7 @@ t4_setup_port_queues(struct port_info *p
 
 		snprintf(name, sizeof(name), "%s rxq%d-fl",
 		    device_get_nameunit(pi->dev), i);
-		init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, maxp, pack, name);
+		init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, maxp, name);
 
 		if (pi->flags & INTR_RXQ) {
 			rxq->iq.flags |= IQ_INTR;
@@ -1029,7 +1040,7 @@ t4_setup_port_queues(struct port_info *p
 
 		snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
 		    device_get_nameunit(pi->dev), i);
-		init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, maxp, pack, name);
+		init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, maxp, name);
 
 		if (pi->flags & INTR_OFLD_RXQ) {
 			ofld_rxq->iq.flags |= IQ_INTR;
@@ -1572,8 +1583,14 @@ get_scatter_segment(struct adapter *sc, 
 	caddr_t payload;
 
 	len = min(total, hwb->size - fl->rx_offset);
-	padded_len = roundup2(len, fl->buf_boundary);
 	payload = sd->cl + cll->region1 + fl->rx_offset;
+	if (fl->flags & FL_BUF_PACKING) {
+		padded_len = roundup2(len, fl->buf_boundary);
+		MPASS(fl->rx_offset + padded_len <= hwb->size);
+	} else {
+		padded_len = hwb->size;
+		MPASS(fl->rx_offset == 0);	/* not packing */
+	}
 
 	if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) {
 
@@ -2121,14 +2138,15 @@ init_iq(struct sge_iq *iq, struct adapte
 }
 
 static inline void
-init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, int pack,
-    char *name)
+init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name)
 {
 
 	fl->qsize = qsize;
 	fl->sidx = qsize - spg_len / EQ_ESIZE;
 	strlcpy(fl->lockname, name, sizeof(fl->lockname));
-	if (pack)
+	if (sc->flags & BUF_PACKING_OK &&
+	    ((!is_t4(sc) && buffer_packing) ||	/* T5+: enabled unless 0 */
+	    (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */
 		fl->flags |= FL_BUF_PACKING;
 	find_best_refill_source(sc, fl, maxp);
 	find_safe_refill_source(sc, fl);
@@ -2277,11 +2295,13 @@ alloc_iq_fl(struct port_info *pi, struct
 
 		if (fl->flags & FL_BUF_PACKING) {
 			fl->lowat = roundup2(sc->sge.fl_starve_threshold2, 8);
-			fl->buf_boundary = max(fl_pad, sc->sge.pack_boundary);
+			fl->buf_boundary = sc->sge.pack_boundary;
 		} else {
 			fl->lowat = roundup2(sc->sge.fl_starve_threshold, 8);
-			fl->buf_boundary = fl_pad;
+			fl->buf_boundary = 16;
 		}
+		if (fl_pad && fl->buf_boundary < sc->sge.pad_boundary)
+			fl->buf_boundary = sc->sge.pad_boundary;
 
 		c.iqns_to_fl0congen |=
 		    htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
@@ -2452,6 +2472,10 @@ add_fl_sysctls(struct sysctl_ctx_list *c
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id",
 	    CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I",
 	    "SGE context id of the freelist");
+	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL,
+	    fl_pad ? 1 : 0, "padding enabled");
+	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL,
+	    fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled");
 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx,
 	    0, "consumer index");
 	if (fl->flags & FL_BUF_PACKING) {
@@ -4367,6 +4391,17 @@ done:
 
 		if (allow_mbufs_in_cluster == 0 || hwb->size < maxp)
 			break;
+
+		/*
+		 * Do not inline mbufs if doing so would violate the pad/pack
+		 * boundary alignment requirement.
+		 */
+		if (fl_pad && (MSIZE % sc->sge.pad_boundary) != 0)
+			continue;
+		if (fl->flags & FL_BUF_PACKING &&
+		    (MSIZE % sc->sge.pack_boundary) != 0)
+			continue;
+
 		if (spare < CL_METADATA_SIZE + MSIZE)
 			continue;
 		n = (spare - CL_METADATA_SIZE) / MSIZE;
@@ -4449,7 +4484,9 @@ find_safe_refill_source(struct adapter *
 	spare = swz->size - hwb->size;
 	fl->cll_alt.hwidx = hwidx;
 	fl->cll_alt.zidx = hwb->zidx;
-	if (allow_mbufs_in_cluster)
+	if (allow_mbufs_in_cluster &&
+	    (fl_pad == 0 || (MSIZE % sc->sge.pad_boundary) == 0) &&
+	    (!(fl->flags & FL_BUF_PACKING) || (MSIZE % sc->sge.pack_boundary) == 0))
 		fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE;
 	else
 		fl->cll_alt.region1 = 0;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201412060013.sB60Dujd029084>