Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 29 Apr 2019 03:10:58 +0000 (UTC)
From:      Navdeep Parhar <np@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r346871 - in stable/11/sys/dev/cxgbe: . common tom
Message-ID:  <201904290310.x3T3Awwl059372@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: np
Date: Mon Apr 29 03:10:58 2019
New Revision: 346871
URL: https://svnweb.freebsd.org/changeset/base/346871

Log:
  MFC r336718, r336720, r336734-r336735, r337398, r337439, and r337540.
  These are all related to tx rate limiting in cxgbe.
  
  r336718:
  cxgbe(4): Validate only those parameters that are relevant to the
  type of rate limiter being programmed.  Skip the ones that are not
  applicable.
  
  Sponsored by:	Chelsio Communications
  
  r336720:
  cxgbe(4): Remove useless code that crept in with r336718.
  
  X-MFC With:	336718
  
  r336734:
  cxgbe(4): Better defaults for all cl-rl rate limiters.
  
  Start in "class" instead of "flow" mode.  This eliminates the need to
  specify an MTU, which is not available that early anyway.  It also
  allows the user to manually configure ch-rl rate limiting after attach.
  This used to fail because ch-rl isn't supported if cl-rl "flow" mode is
  configured.
  
  Set all traffic classes to 1Gbps during initialization.  The goal is to
  start off with _any_ valid configuration and 1Gbps works even for
  gigabit cards.
  
  Sponsored by:	Chelsio Communications
  
  r336735:
  cxgbe(4): Consider rateunit before ratemode when displaying information
  about a traffic class.  This matches the order in which the firmware
  evaluates unit and mode internally.
  
  Sponsored by:	Chelsio Communications
  
  r337398:
  cxgbe(4): Allow user-configured and driver-configured traffic classes to
  be used simultaneously.  Move sysctl_tc and sysctl_tc_params to
  t4_sched.c while here.
  
  Sponsored by:	Chelsio Communications
  
  r337439:
  cxgbe(4): Allow the driver to specify a burst size when configuring a
  traffic class for rate limiting.
  
  Add experimental knobs that allow the user to specify a default pktsize
  and burstsize for traffic classes associated with a port:
  
  dev.<ifname>.<instance>.tc.pktsize
  dev.<ifname>.<instance>.tc.burstsize
  
  Sponsored by:	Chelsio Communications
  
  r337540:
  cxgbe(4): Display pkt-size and burst-size in traffic class parameters.

Modified:
  stable/11/sys/dev/cxgbe/adapter.h
  stable/11/sys/dev/cxgbe/common/common.h
  stable/11/sys/dev/cxgbe/common/t4_hw.c
  stable/11/sys/dev/cxgbe/t4_main.c
  stable/11/sys/dev/cxgbe/t4_sched.c
  stable/11/sys/dev/cxgbe/t4_sge.c
  stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c
  stable/11/sys/dev/cxgbe/tom/t4_tom.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/dev/cxgbe/adapter.h
==============================================================================
--- stable/11/sys/dev/cxgbe/adapter.h	Mon Apr 29 02:38:51 2019	(r346870)
+++ stable/11/sys/dev/cxgbe/adapter.h	Mon Apr 29 03:10:58 2019	(r346871)
@@ -236,18 +236,21 @@ struct tx_ch_rl_params {
 };
 
 enum {
-	TX_CLRL_REFRESH	= (1 << 0),	/* Need to update hardware state. */
-	TX_CLRL_ERROR	= (1 << 1),	/* Error, hardware state unknown. */
+	CLRL_USER	= (1 << 0),	/* allocated manually. */
+	CLRL_SYNC	= (1 << 1),	/* sync hw update in progress. */
+	CLRL_ASYNC	= (1 << 2),	/* async hw update requested. */
+	CLRL_ERR	= (1 << 3),	/* last hw setup ended in error. */
 };
 
 struct tx_cl_rl_params {
 	int refcount;
-	u_int flags;
+	uint8_t flags;
 	enum fw_sched_params_rate ratemode;	/* %port REL or ABS value */
 	enum fw_sched_params_unit rateunit;	/* kbps or pps (when ABS) */
 	enum fw_sched_params_mode mode;		/* aggr or per-flow */
 	uint32_t maxrate;
 	uint16_t pktsize;
+	uint16_t burstsize;
 };
 
 /* Tx scheduler parameters for a channel/port */
@@ -258,7 +261,9 @@ struct tx_sched_params {
 	/* Class WRR */
 	/* XXX */
 
-	/* Class Rate Limiter */
+	/* Class Rate Limiter (including the default pktsize and burstsize). */
+	int pktsize;
+	int burstsize;
 	struct tx_cl_rl_params cl_rl[];
 };
 
@@ -1186,7 +1191,9 @@ int t4_init_tx_sched(struct adapter *);
 int t4_free_tx_sched(struct adapter *);
 void t4_update_tx_sched(struct adapter *);
 int t4_reserve_cl_rl_kbps(struct adapter *, int, u_int, int *);
-void t4_release_cl_rl_kbps(struct adapter *, int, int);
+void t4_release_cl_rl(struct adapter *, int, int);
+int sysctl_tc(SYSCTL_HANDLER_ARGS);
+int sysctl_tc_params(SYSCTL_HANDLER_ARGS);
 
 /* t4_filter.c */
 int get_filter_mode(struct adapter *, uint32_t *);

Modified: stable/11/sys/dev/cxgbe/common/common.h
==============================================================================
--- stable/11/sys/dev/cxgbe/common/common.h	Mon Apr 29 02:38:51 2019	(r346870)
+++ stable/11/sys/dev/cxgbe/common/common.h	Mon Apr 29 03:10:58 2019	(r346871)
@@ -819,7 +819,7 @@ int t4_sched_config(struct adapter *adapter, int type,
 int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
 		    int rateunit, int ratemode, int channel, int cl,
 		    int minrate, int maxrate, int weight, int pktsize,
-		    int sleep_ok);
+		    int burstsize, int sleep_ok);
 int t4_sched_params_ch_rl(struct adapter *adapter, int channel, int ratemode,
 			  unsigned int maxrate, int sleep_ok);
 int t4_sched_params_cl_wrr(struct adapter *adapter, int channel, int cl,

Modified: stable/11/sys/dev/cxgbe/common/t4_hw.c
==============================================================================
--- stable/11/sys/dev/cxgbe/common/t4_hw.c	Mon Apr 29 02:38:51 2019	(r346870)
+++ stable/11/sys/dev/cxgbe/common/t4_hw.c	Mon Apr 29 03:10:58 2019	(r346871)
@@ -10034,7 +10034,7 @@ int t4_sched_config(struct adapter *adapter, int type,
 int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
 		    int rateunit, int ratemode, int channel, int cl,
 		    int minrate, int maxrate, int weight, int pktsize,
-		    int sleep_ok)
+		    int burstsize, int sleep_ok)
 {
 	struct fw_sched_cmd cmd;
 
@@ -10056,6 +10056,7 @@ int t4_sched_params(struct adapter *adapter, int type,
 	cmd.u.params.max = cpu_to_be32(maxrate);
 	cmd.u.params.weight = cpu_to_be16(weight);
 	cmd.u.params.pktsize = cpu_to_be16(pktsize);
+	cmd.u.params.burstsize = cpu_to_be16(burstsize);
 
 	return t4_wr_mbox_meat(adapter,adapter->mbox, &cmd, sizeof(cmd),
 			       NULL, sleep_ok);

Modified: stable/11/sys/dev/cxgbe/t4_main.c
==============================================================================
--- stable/11/sys/dev/cxgbe/t4_main.c	Mon Apr 29 02:38:51 2019	(r346870)
+++ stable/11/sys/dev/cxgbe/t4_main.c	Mon Apr 29 03:10:58 2019	(r346871)
@@ -662,7 +662,6 @@ static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
 static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
 static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
 static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
-static int sysctl_tc_params(SYSCTL_HANDLER_ARGS);
 static int sysctl_cpus(SYSCTL_HANDLER_ARGS);
 #ifdef TCP_OFFLOAD
 static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS);
@@ -6138,6 +6137,7 @@ cxgbe_sysctls(struct port_info *pi)
 	struct adapter *sc = pi->adapter;
 	int i;
 	char name[16];
+	static char *tc_flags = {"\20\1USER\2SYNC\3ASYNC\4ERR"};
 
 	ctx = device_get_sysctl_ctx(pi->dev);
 
@@ -6183,6 +6183,13 @@ cxgbe_sysctls(struct port_info *pi)
 	 */
 	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL,
 	    "Tx scheduler traffic classes (cl_rl)");
+	children2 = SYSCTL_CHILDREN(oid);
+	SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "pktsize",
+	    CTLFLAG_RW, &pi->sched_params->pktsize, 0,
+	    "pktsize for per-flow cl-rl (0 means up to the driver )");
+	SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "burstsize",
+	    CTLFLAG_RW, &pi->sched_params->burstsize, 0,
+	    "burstsize for per-flow cl-rl (0 means up to the driver)");
 	for (i = 0; i < sc->chip_params->nsched_cls; i++) {
 		struct tx_cl_rl_params *tc = &pi->sched_params->cl_rl[i];
 
@@ -6190,8 +6197,9 @@ cxgbe_sysctls(struct port_info *pi)
 		children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx,
 		    SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL,
 		    "traffic class"));
-		SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "flags", CTLFLAG_RD,
-		    &tc->flags, 0, "flags");
+		SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "flags",
+		    CTLTYPE_STRING | CTLFLAG_RD, tc_flags, (uintptr_t)&tc->flags,
+		    sysctl_bitfield_8b, "A", "flags");
 		SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount",
 		    CTLFLAG_RD, &tc->refcount, 0, "references to this class");
 		SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params",
@@ -8805,82 +8813,6 @@ sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
 		}
 	}
 	rc = sbuf_finish(sb);
-	sbuf_delete(sb);
-
-	return (rc);
-}
-
-static int
-sysctl_tc_params(SYSCTL_HANDLER_ARGS)
-{
-	struct adapter *sc = arg1;
-	struct tx_cl_rl_params tc;
-	struct sbuf *sb;
-	int i, rc, port_id, mbps, gbps;
-
-	rc = sysctl_wire_old_buffer(req, 0);
-	if (rc != 0)
-		return (rc);
-
-	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
-	if (sb == NULL)
-		return (ENOMEM);
-
-	port_id = arg2 >> 16;
-	MPASS(port_id < sc->params.nports);
-	MPASS(sc->port[port_id] != NULL);
-	i = arg2 & 0xffff;
-	MPASS(i < sc->chip_params->nsched_cls);
-
-	mtx_lock(&sc->tc_lock);
-	tc = sc->port[port_id]->sched_params->cl_rl[i];
-	mtx_unlock(&sc->tc_lock);
-
-	if (tc.flags & TX_CLRL_ERROR) {
-		sbuf_printf(sb, "error");
-		goto done;
-	}
-
-	if (tc.ratemode == SCHED_CLASS_RATEMODE_REL) {
-		/* XXX: top speed or actual link speed? */
-		gbps = port_top_speed(sc->port[port_id]);
-		sbuf_printf(sb, " %u%% of %uGbps", tc.maxrate, gbps);
-	} else if (tc.ratemode == SCHED_CLASS_RATEMODE_ABS) {
-		switch (tc.rateunit) {
-		case SCHED_CLASS_RATEUNIT_BITS:
-			mbps = tc.maxrate / 1000;
-			gbps = tc.maxrate / 1000000;
-			if (tc.maxrate == gbps * 1000000)
-				sbuf_printf(sb, " %uGbps", gbps);
-			else if (tc.maxrate == mbps * 1000)
-				sbuf_printf(sb, " %uMbps", mbps);
-			else
-				sbuf_printf(sb, " %uKbps", tc.maxrate);
-			break;
-		case SCHED_CLASS_RATEUNIT_PKTS:
-			sbuf_printf(sb, " %upps", tc.maxrate);
-			break;
-		default:
-			rc = ENXIO;
-			goto done;
-		}
-	}
-
-	switch (tc.mode) {
-	case SCHED_CLASS_MODE_CLASS:
-		sbuf_printf(sb, " aggregate");
-		break;
-	case SCHED_CLASS_MODE_FLOW:
-		sbuf_printf(sb, " per-flow");
-		break;
-	default:
-		rc = ENXIO;
-		goto done;
-	}
-
-done:
-	if (rc == 0)
-		rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);

Modified: stable/11/sys/dev/cxgbe/t4_sched.c
==============================================================================
--- stable/11/sys/dev/cxgbe/t4_sched.c	Mon Apr 29 02:38:51 2019	(r346870)
+++ stable/11/sys/dev/cxgbe/t4_sched.c	Mon Apr 29 03:10:58 2019	(r346871)
@@ -74,7 +74,8 @@ set_sched_class_params(struct adapter *sc, struct t4_s
 {
 	int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode;
 	struct port_info *pi;
-	struct tx_cl_rl_params *tc;
+	struct tx_cl_rl_params *tc, old;
+	bool check_pktsize = false;
 
 	if (p->level == SCHED_CLASS_LEVEL_CL_RL)
 		fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL;
@@ -85,28 +86,20 @@ set_sched_class_params(struct adapter *sc, struct t4_s
 	else
 		return (EINVAL);
 
-	if (p->mode == SCHED_CLASS_MODE_CLASS)
-		fw_mode = FW_SCHED_PARAMS_MODE_CLASS;
-	else if (p->mode == SCHED_CLASS_MODE_FLOW)
-		fw_mode = FW_SCHED_PARAMS_MODE_FLOW;
-	else
-		return (EINVAL);
+	if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
+		if (p->mode == SCHED_CLASS_MODE_CLASS)
+			fw_mode = FW_SCHED_PARAMS_MODE_CLASS;
+		else if (p->mode == SCHED_CLASS_MODE_FLOW) {
+			check_pktsize = true;
+			fw_mode = FW_SCHED_PARAMS_MODE_FLOW;
+		} else
+			return (EINVAL);
+	} else
+		fw_mode = 0;
 
-	if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS)
-		fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
-	else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS)
-		fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE;
-	else
+	/* Valid channel must always be provided. */
+	if (p->channel < 0)
 		return (EINVAL);
-
-	if (p->ratemode == SCHED_CLASS_RATEMODE_REL)
-		fw_ratemode = FW_SCHED_PARAMS_RATE_REL;
-	else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS)
-		fw_ratemode = FW_SCHED_PARAMS_RATE_ABS;
-	else
-		return (EINVAL);
-
-	/* Vet our parameters ... */
 	if (!in_range(p->channel, 0, sc->chip_params->nchan - 1))
 		return (ERANGE);
 
@@ -116,80 +109,131 @@ set_sched_class_params(struct adapter *sc, struct t4_s
 	MPASS(pi->tx_chan == p->channel);
 	top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */
 
-	if (!in_range(p->cl, 0, sc->chip_params->nsched_cls) ||
-	    !in_range(p->minrate, 0, top_speed) ||
-	    !in_range(p->maxrate, 0, top_speed) ||
-	    !in_range(p->weight, 0, 100))
-		return (ERANGE);
+	if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
+	    p->level == SCHED_CLASS_LEVEL_CH_RL) {
+		/*
+		 * Valid rate (mode, unit and values) must be provided.
+		 */
 
-	/*
-	 * Translate any unset parameters into the firmware's
-	 * nomenclature and/or fail the call if the parameters
-	 * are required ...
-	 */
-	if (p->rateunit < 0 || p->ratemode < 0 || p->channel < 0 || p->cl < 0)
-		return (EINVAL);
+		if (p->minrate < 0)
+			p->minrate = 0;
+		if (p->maxrate < 0)
+			return (EINVAL);
 
-	if (p->minrate < 0)
-		p->minrate = 0;
-	if (p->maxrate < 0) {
-		if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
-		    p->level == SCHED_CLASS_LEVEL_CH_RL)
+		if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS) {
+			fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
+			/* ratemode could be relative (%) or absolute. */
+			if (p->ratemode == SCHED_CLASS_RATEMODE_REL) {
+				fw_ratemode = FW_SCHED_PARAMS_RATE_REL;
+				/* maxrate is % of port bandwidth. */
+				if (!in_range(p->minrate, 0, 100) ||
+				    !in_range(p->maxrate, 0, 100)) {
+					return (ERANGE);
+				}
+			} else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS) {
+				fw_ratemode = FW_SCHED_PARAMS_RATE_ABS;
+				/* maxrate is absolute value in kbps. */
+				if (!in_range(p->minrate, 0, top_speed) ||
+				    !in_range(p->maxrate, 0, top_speed)) {
+					return (ERANGE);
+				}
+			} else
+				return (EINVAL);
+		} else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS) {
+			/* maxrate is the absolute value in pps. */
+			check_pktsize = true;
+			fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE;
+		} else
 			return (EINVAL);
-		else
-			p->maxrate = 0;
+	} else {
+		MPASS(p->level == SCHED_CLASS_LEVEL_CL_WRR);
+
+		/*
+		 * Valid weight must be provided.
+		 */
+		if (p->weight < 0)
+		       return (EINVAL);
+		if (!in_range(p->weight, 1, 99))
+			return (ERANGE);
+
+		fw_rateunit = 0;
+		fw_ratemode = 0;
 	}
-	if (p->weight < 0) {
-		if (p->level == SCHED_CLASS_LEVEL_CL_WRR)
+
+	if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
+	    p->level == SCHED_CLASS_LEVEL_CL_WRR) {
+		/*
+		 * Valid scheduling class must be provided.
+		 */
+		if (p->cl < 0)
 			return (EINVAL);
-		else
-			p->weight = 0;
+		if (!in_range(p->cl, 0, sc->chip_params->nsched_cls - 1))
+			return (ERANGE);
 	}
-	if (p->pktsize < 0) {
-		if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
-		    p->level == SCHED_CLASS_LEVEL_CH_RL)
+
+	if (check_pktsize) {
+		if (p->pktsize < 0)
 			return (EINVAL);
-		else
-			p->pktsize = 0;
+		if (!in_range(p->pktsize, 64, pi->vi[0].ifp->if_mtu))
+			return (ERANGE);
 	}
 
-	rc = begin_synchronized_op(sc, NULL,
-	    sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp");
-	if (rc)
-		return (rc);
 	if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
 		tc = &pi->sched_params->cl_rl[p->cl];
-		if (tc->refcount > 0) {
+		mtx_lock(&sc->tc_lock);
+		if (tc->refcount > 0 || tc->flags & (CLRL_SYNC | CLRL_ASYNC))
 			rc = EBUSY;
-			goto done;
-		} else {
+		else {
+			tc->flags |= CLRL_SYNC | CLRL_USER;
 			tc->ratemode = fw_ratemode;
 			tc->rateunit = fw_rateunit;
 			tc->mode = fw_mode;
 			tc->maxrate = p->maxrate;
 			tc->pktsize = p->pktsize;
+			rc = 0;
+			old= *tc;
 		}
+		mtx_unlock(&sc->tc_lock);
+		if (rc != 0)
+			return (rc);
 	}
+
+	rc = begin_synchronized_op(sc, NULL,
+	    sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp");
+	if (rc != 0) {
+		if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
+			mtx_lock(&sc->tc_lock);
+			*tc = old;
+			mtx_unlock(&sc->tc_lock);
+		}
+		return (rc);
+	}
 	rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level, fw_mode,
 	    fw_rateunit, fw_ratemode, p->channel, p->cl, p->minrate, p->maxrate,
-	    p->weight, p->pktsize, sleep_ok);
-	if (p->level == SCHED_CLASS_LEVEL_CL_RL && rc != 0) {
-		/*
-		 * Unknown state at this point, see parameters in tc for what
-		 * was attempted.
-		 */
-		tc->flags |= TX_CLRL_ERROR;
-	}
-done:
+	    p->weight, p->pktsize, 0, sleep_ok);
 	end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD);
 
+	if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
+		mtx_lock(&sc->tc_lock);
+		MPASS(tc->flags & CLRL_SYNC);
+		MPASS(tc->flags & CLRL_USER);
+		MPASS(tc->refcount == 0);
+
+		tc->flags &= ~CLRL_SYNC;
+		if (rc == 0)
+			tc->flags &= ~CLRL_ERR;
+		else
+			tc->flags |= CLRL_ERR;
+		mtx_unlock(&sc->tc_lock);
+	}
+
 	return (rc);
 }
 
 static void
 update_tx_sched(void *context, int pending)
 {
-	int i, j, mode, rateunit, ratemode, maxrate, pktsize, rc;
+	int i, j, rc;
 	struct port_info *pi;
 	struct tx_cl_rl_params *tc;
 	struct adapter *sc = context;
@@ -201,14 +245,8 @@ update_tx_sched(void *context, int pending)
 		tc = &pi->sched_params->cl_rl[0];
 		for (j = 0; j < n; j++, tc++) {
 			MPASS(mtx_owned(&sc->tc_lock));
-			if ((tc->flags & TX_CLRL_REFRESH) == 0)
+			if ((tc->flags & CLRL_ASYNC) == 0)
 				continue;
-
-			mode = tc->mode;
-			rateunit = tc->rateunit;
-			ratemode = tc->ratemode;
-			maxrate = tc->maxrate;
-			pktsize = tc->pktsize;
 			mtx_unlock(&sc->tc_lock);
 
 			if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
@@ -216,21 +254,19 @@ update_tx_sched(void *context, int pending)
 				mtx_lock(&sc->tc_lock);
 				continue;
 			}
-			rc = t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED,
-			    FW_SCHED_PARAMS_LEVEL_CL_RL, mode, rateunit,
-			    ratemode, pi->tx_chan, j, 0, maxrate, 0, pktsize,
-			    1);
+			rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED,
+			    FW_SCHED_PARAMS_LEVEL_CL_RL, tc->mode, tc->rateunit,
+			    tc->ratemode, pi->tx_chan, j, 0, tc->maxrate, 0,
+			    tc->pktsize, tc->burstsize, 1);
 			end_synchronized_op(sc, 0);
 
 			mtx_lock(&sc->tc_lock);
-			if (rc != 0) {
-				tc->flags |= TX_CLRL_ERROR;
-			} else if (tc->mode == mode &&
-			    tc->rateunit == rateunit &&
-			    tc->maxrate == maxrate &&
-			    tc->pktsize == tc->pktsize) {
-				tc->flags &= ~(TX_CLRL_REFRESH | TX_CLRL_ERROR);
-			}
+			MPASS(tc->flags & CLRL_ASYNC);
+			tc->flags &= ~CLRL_ASYNC;
+			if (rc == 0)
+				tc->flags &= ~CLRL_ERR;
+			else
+				tc->flags |= CLRL_ERR;
 		}
 	}
 	mtx_unlock(&sc->tc_lock);
@@ -252,74 +288,135 @@ t4_set_sched_class(struct adapter *sc, struct t4_sched
 	return (EINVAL);
 }
 
-int
-t4_set_sched_queue(struct adapter *sc, struct t4_sched_queue *p)
+static int
+bind_txq_to_traffic_class(struct adapter *sc, struct sge_txq *txq, int idx)
 {
-	struct port_info *pi = NULL;
-	struct vi_info *vi;
-	struct sge_txq *txq;
-	uint32_t fw_mnem, fw_queue, fw_class;
-	int i, rc;
+	struct tx_cl_rl_params *tc0, *tc;
+	int rc, old_idx;
+	uint32_t fw_mnem, fw_class;
 
-	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsq");
-	if (rc)
-		return (rc);
+	if (!(txq->eq.flags & EQ_ALLOCATED))
+		return (EAGAIN);
 
-	if (p->port >= sc->params.nports) {
-		rc = EINVAL;
+	mtx_lock(&sc->tc_lock);
+	if (txq->tc_idx == -2) {
+		rc = EBUSY;	/* Another bind/unbind in progress already. */
 		goto done;
 	}
-
-	/* XXX: Only supported for the main VI. */
-	pi = sc->port[p->port];
-	vi = &pi->vi[0];
-	if (!(vi->flags & VI_INIT_DONE)) {
-		/* tx queues not set up yet */
-		rc = EAGAIN;
+	if (idx == txq->tc_idx) {
+		rc = 0;		/* No change, nothing to do. */
 		goto done;
 	}
 
-	if (!in_range(p->queue, 0, vi->ntxq - 1) ||
-	    !in_range(p->cl, 0, sc->chip_params->nsched_cls - 1)) {
-		rc = EINVAL;
-		goto done;
+	tc0 = &sc->port[txq->eq.tx_chan]->sched_params->cl_rl[0];
+	if (idx != -1) {
+		/*
+		 * Bind to a different class at index idx.
+		 */
+		tc = &tc0[idx];
+		if (tc->flags & CLRL_ERR) {
+			rc = ENXIO;
+			goto done;
+		} else {
+			/*
+			 * Ok to proceed.  Place a reference on the new class
+			 * while still holding on to the reference on the
+			 * previous class, if any.
+			 */
+			tc->refcount++;
+		}
 	}
+	/* Mark as busy before letting go of the lock. */
+	old_idx = txq->tc_idx;
+	txq->tc_idx = -2;
+	mtx_unlock(&sc->tc_lock);
 
-	/*
-	 * Create a template for the FW_PARAMS_CMD mnemonic and value (TX
-	 * Scheduling Class in this case).
-	 */
+	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4btxq");
+	if (rc != 0)
+		return (rc);
 	fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
-	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH));
-	fw_class = p->cl < 0 ? 0xffffffff : p->cl;
+	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) |
+	    V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
+	fw_class = idx < 0 ? 0xffffffff : idx;
+	rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_mnem, &fw_class);
+	end_synchronized_op(sc, 0);
 
-	/*
-	 * If op.queue is non-negative, then we're only changing the scheduling
-	 * on a single specified TX queue.
-	 */
-	if (p->queue >= 0) {
-		txq = &sc->sge.txq[vi->first_txq + p->queue];
-		fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
-		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue,
-		    &fw_class);
-		goto done;
+	mtx_lock(&sc->tc_lock);
+	MPASS(txq->tc_idx == -2);
+	if (rc == 0) {
+		/*
+		 * Unbind, bind, or bind to a different class succeeded.  Remove
+		 * the reference on the old traffic class, if any.
+		 */
+		if (old_idx != -1) {
+			tc = &tc0[old_idx];
+			MPASS(tc->refcount > 0);
+			tc->refcount--;
+		}
+		txq->tc_idx = idx;
+	} else {
+		/*
+		 * Unbind, bind, or bind to a different class failed.  Remove
+		 * the anticipatory reference on the new traffic class, if any.
+		 */
+		if (idx != -1) {
+			tc = &tc0[idx];
+			MPASS(tc->refcount > 0);
+			tc->refcount--;
+		}
+		txq->tc_idx = old_idx;
 	}
+done:
+	MPASS(txq->tc_idx >= -1 && txq->tc_idx < sc->chip_params->nsched_cls);
+	mtx_unlock(&sc->tc_lock);
+	return (rc);
+}
 
+int
+t4_set_sched_queue(struct adapter *sc, struct t4_sched_queue *p)
+{
+	struct port_info *pi = NULL;
+	struct vi_info *vi;
+	struct sge_txq *txq;
+	int i, rc;
+
+	if (p->port >= sc->params.nports)
+		return (EINVAL);
+
 	/*
-	 * Change the scheduling on all the TX queues for the
-	 * interface.
+	 * XXX: cxgbetool allows the user to specify the physical port only.  So
+	 * we always operate on the main VI.
 	 */
-	for_each_txq(vi, i, txq) {
-		fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
-		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue,
-		    &fw_class);
-		if (rc)
-			goto done;
+	pi = sc->port[p->port];
+	vi = &pi->vi[0];
+
+	/* Checking VI_INIT_DONE outside a synch-op is a harmless race here. */
+	if (!(vi->flags & VI_INIT_DONE))
+		return (EAGAIN);
+
+	if (!in_range(p->queue, 0, vi->ntxq - 1) ||
+	    !in_range(p->cl, 0, sc->chip_params->nsched_cls - 1))
+		return (EINVAL);
+
+	if (p->queue < 0) {
+		/*
+		 * Change the scheduling on all the TX queues for the
+		 * interface.
+		 */
+		for_each_txq(vi, i, txq) {
+			rc = bind_txq_to_traffic_class(sc, txq, p->cl);
+			if (rc != 0)
+				break;
+		}
+	} else {
+		/*
+		 * If op.queue is non-negative, then we're only changing the
+		 * scheduling on a single specified TX queue.
+		 */
+		txq = &sc->sge.txq[vi->first_txq + p->queue];
+		rc = bind_txq_to_traffic_class(sc, txq, p->cl);
 	}
 
-	rc = 0;
-done:
-	end_synchronized_op(sc, 0);
 	return (rc);
 }
 
@@ -330,24 +427,6 @@ t4_init_tx_sched(struct adapter *sc)
 	const int n = sc->chip_params->nsched_cls;
 	struct port_info *pi;
 	struct tx_cl_rl_params *tc;
-	static const uint32_t init_kbps[] = {
-		100 * 1000,
-		200 * 1000,
-		400 * 1000,
-		500 * 1000,
-		800 * 1000,
-		1000 * 1000,
-		1200 * 1000,
-		1500 * 1000,
-		1800 * 1000,
-		2000 * 1000,
-		2500 * 1000,
-		3000 * 1000,
-		3500 * 1000,
-		4000 * 1000,
-		5000 * 1000,
-		10000 * 1000
-	};
 
 	mtx_init(&sc->tc_lock, "tx_sched lock", NULL, MTX_DEF);
 	TASK_INIT(&sc->tc_task, 0, update_tx_sched, sc);
@@ -360,15 +439,12 @@ t4_init_tx_sched(struct adapter *sc)
 			tc->refcount = 0;
 			tc->ratemode = FW_SCHED_PARAMS_RATE_ABS;
 			tc->rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
-			tc->mode = FW_SCHED_PARAMS_MODE_FLOW;
-			tc->maxrate = init_kbps[min(j, nitems(init_kbps) - 1)];
-			tc->pktsize = ETHERMTU;	/* XXX */
+			tc->mode = FW_SCHED_PARAMS_MODE_CLASS;
+			tc->maxrate = 1000 * 1000;	/* 1 Gbps.  Arbitrary */
 
 			if (t4_sched_params_cl_rl_kbps(sc, pi->tx_chan, j,
-			    tc->mode, tc->maxrate, tc->pktsize, 1) == 0)
-				tc->flags = 0;
-			else
-				tc->flags = TX_CLRL_ERROR;
+			    tc->mode, tc->maxrate, tc->pktsize, 1) != 0)
+				tc->flags = CLRL_ERR;
 		}
 	}
 
@@ -404,50 +480,72 @@ int
 t4_reserve_cl_rl_kbps(struct adapter *sc, int port_id, u_int maxrate,
     int *tc_idx)
 {
-	int rc = 0, fa = -1, i;
+	int rc = 0, fa = -1, i, pktsize, burstsize;
+	bool update;
 	struct tx_cl_rl_params *tc;
+	struct port_info *pi;
 
 	MPASS(port_id >= 0 && port_id < sc->params.nports);
 
-	tc = &sc->port[port_id]->sched_params->cl_rl[0];
+	pi = sc->port[port_id];
+	if (pi->sched_params->pktsize > 0)
+		pktsize = pi->sched_params->pktsize;
+	else
+		pktsize = pi->vi[0].ifp->if_mtu;
+	if (pi->sched_params->burstsize > 0)
+		burstsize = pi->sched_params->burstsize;
+	else
+		burstsize = pktsize * 4;
+	tc = &pi->sched_params->cl_rl[0];
+
+	update = false;
 	mtx_lock(&sc->tc_lock);
 	for (i = 0; i < sc->chip_params->nsched_cls; i++, tc++) {
-		if (fa < 0 && tc->refcount == 0)
-			fa = i;
+		if (fa < 0 && tc->refcount == 0 && !(tc->flags & CLRL_USER))
+			fa = i;		/* first available */
 
 		if (tc->ratemode == FW_SCHED_PARAMS_RATE_ABS &&
 		    tc->rateunit == FW_SCHED_PARAMS_UNIT_BITRATE &&
 		    tc->mode == FW_SCHED_PARAMS_MODE_FLOW &&
-		    tc->maxrate == maxrate) {
+		    tc->maxrate == maxrate && tc->pktsize == pktsize &&
+		    tc->burstsize == burstsize) {
 			tc->refcount++;
 			*tc_idx = i;
+			if ((tc->flags & (CLRL_ERR | CLRL_ASYNC | CLRL_SYNC)) ==
+			    CLRL_ERR) {
+				update = true;
+			}
 			goto done;
 		}
 	}
 	/* Not found */
 	MPASS(i == sc->chip_params->nsched_cls);
 	if (fa != -1) {
-		tc = &sc->port[port_id]->sched_params->cl_rl[fa];
-		tc->flags = TX_CLRL_REFRESH;
+		tc = &pi->sched_params->cl_rl[fa];
 		tc->refcount = 1;
 		tc->ratemode = FW_SCHED_PARAMS_RATE_ABS;
 		tc->rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
 		tc->mode = FW_SCHED_PARAMS_MODE_FLOW;
 		tc->maxrate = maxrate;
-		tc->pktsize = ETHERMTU;	/* XXX */
+		tc->pktsize = pktsize;
+		tc->burstsize = burstsize;
 		*tc_idx = fa;
-		t4_update_tx_sched(sc);
+		update = true;
 	} else {
 		*tc_idx = -1;
 		rc = ENOSPC;
 	}
 done:
 	mtx_unlock(&sc->tc_lock);
+	if (update) {
+		tc->flags |= CLRL_ASYNC;
+		t4_update_tx_sched(sc);
+	}
 	return (rc);
 }
 
 void
-t4_release_cl_rl_kbps(struct adapter *sc, int port_id, int tc_idx)
+t4_release_cl_rl(struct adapter *sc, int port_id, int tc_idx)
 {
 	struct tx_cl_rl_params *tc;
 
@@ -457,9 +555,114 @@ t4_release_cl_rl_kbps(struct adapter *sc, int port_id,
 	mtx_lock(&sc->tc_lock);
 	tc = &sc->port[port_id]->sched_params->cl_rl[tc_idx];
 	MPASS(tc->refcount > 0);
-	MPASS(tc->ratemode == FW_SCHED_PARAMS_RATE_ABS);
-	MPASS(tc->rateunit == FW_SCHED_PARAMS_UNIT_BITRATE);
-	MPASS(tc->mode == FW_SCHED_PARAMS_MODE_FLOW);
 	tc->refcount--;
 	mtx_unlock(&sc->tc_lock);
+}
+
+int
+sysctl_tc(SYSCTL_HANDLER_ARGS)
+{
+	struct vi_info *vi = arg1;
+	struct port_info *pi;
+	struct adapter *sc;
+	struct sge_txq *txq;
+	int qidx = arg2, rc, tc_idx;
+
+	MPASS(qidx >= 0 && qidx < vi->ntxq);
+	pi = vi->pi;
+	sc = pi->adapter;
+	txq = &sc->sge.txq[vi->first_txq + qidx];
+
+	tc_idx = txq->tc_idx;
+	rc = sysctl_handle_int(oidp, &tc_idx, 0, req);
+	if (rc != 0 || req->newptr == NULL)
+		return (rc);
+
+	if (sc->flags & IS_VF)
+		return (EPERM);
+	if (!in_range(tc_idx, 0, sc->chip_params->nsched_cls - 1))
+		return (EINVAL);
+
+	return (bind_txq_to_traffic_class(sc, txq, tc_idx));
+}
+
+int
+sysctl_tc_params(SYSCTL_HANDLER_ARGS)
+{
+	struct adapter *sc = arg1;
+	struct tx_cl_rl_params tc;
+	struct sbuf *sb;
+	int i, rc, port_id, mbps, gbps;
+
+	rc = sysctl_wire_old_buffer(req, 0);
+	if (rc != 0)
+		return (rc);
+
+	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
+	if (sb == NULL)
+		return (ENOMEM);
+
+	port_id = arg2 >> 16;
+	MPASS(port_id < sc->params.nports);
+	MPASS(sc->port[port_id] != NULL);
+	i = arg2 & 0xffff;
+	MPASS(i < sc->chip_params->nsched_cls);
+
+	mtx_lock(&sc->tc_lock);
+	tc = sc->port[port_id]->sched_params->cl_rl[i];
+	mtx_unlock(&sc->tc_lock);
+
+	switch (tc.rateunit) {
+	case SCHED_CLASS_RATEUNIT_BITS:
+		switch (tc.ratemode) {
+		case SCHED_CLASS_RATEMODE_REL:
+			/* XXX: top speed or actual link speed? */
+			gbps = port_top_speed(sc->port[port_id]);
+			sbuf_printf(sb, "%u%% of %uGbps", tc.maxrate, gbps);
+			break;
+		case SCHED_CLASS_RATEMODE_ABS:
+			mbps = tc.maxrate / 1000;
+			gbps = tc.maxrate / 1000000;
+			if (tc.maxrate == gbps * 1000000)
+				sbuf_printf(sb, "%uGbps", gbps);
+			else if (tc.maxrate == mbps * 1000)
+				sbuf_printf(sb, "%uMbps", mbps);
+			else
+				sbuf_printf(sb, "%uKbps", tc.maxrate);
+			break;
+		default:
+			rc = ENXIO;
+			goto done;
+		}
+		break;
+	case SCHED_CLASS_RATEUNIT_PKTS:
+		sbuf_printf(sb, "%upps", tc.maxrate);
+		break;
+	default:
+		rc = ENXIO;
+		goto done;
+	}
+
+	switch (tc.mode) {
+	case SCHED_CLASS_MODE_CLASS:
+		sbuf_printf(sb, " aggregate");
+		break;
+	case SCHED_CLASS_MODE_FLOW:
+		sbuf_printf(sb, " per-flow");
+		if (tc.pktsize > 0)
+			sbuf_printf(sb, " pkt-size %u", tc.pktsize);
+		if (tc.burstsize > 0)
+			sbuf_printf(sb, " burst-size %u", tc.burstsize);
+		break;
+	default:
+		rc = ENXIO;
+		goto done;
+	}
+
+done:
+	if (rc == 0)
+		rc = sbuf_finish(sb);
+	sbuf_delete(sb);
+
+	return (rc);
 }

Modified: stable/11/sys/dev/cxgbe/t4_sge.c
==============================================================================
--- stable/11/sys/dev/cxgbe/t4_sge.c	Mon Apr 29 02:38:51 2019	(r346870)
+++ stable/11/sys/dev/cxgbe/t4_sge.c	Mon Apr 29 03:10:58 2019	(r346871)
@@ -288,7 +288,6 @@ static void drain_wrq_wr_list(struct adapter *, struct
 
 static int sysctl_uint16(SYSCTL_HANDLER_ARGS);
 static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS);
-static int sysctl_tc(SYSCTL_HANDLER_ARGS);
 
 static counter_u64_t extfree_refs;
 static counter_u64_t extfree_rels;
@@ -5197,90 +5196,5 @@ sysctl_bufsizes(SYSCTL_HANDLER_ARGS)
 	sbuf_finish(&sb);
 	rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
 	sbuf_delete(&sb);
-	return (rc);
-}
-
-static int
-sysctl_tc(SYSCTL_HANDLER_ARGS)
-{
-	struct vi_info *vi = arg1;
-	struct port_info *pi;
-	struct adapter *sc;
-	struct sge_txq *txq;
-	struct tx_cl_rl_params *tc;
-	int qidx = arg2, rc, tc_idx;
-	uint32_t fw_queue, fw_class;
-
-	MPASS(qidx >= 0 && qidx < vi->ntxq);
-	pi = vi->pi;
-	sc = pi->adapter;
-	txq = &sc->sge.txq[vi->first_txq + qidx];
-
-	tc_idx = txq->tc_idx;
-	rc = sysctl_handle_int(oidp, &tc_idx, 0, req);
-	if (rc != 0 || req->newptr == NULL)
-		return (rc);
-
-	if (sc->flags & IS_VF)
-		return (EPERM);
-
-	/* Note that -1 is legitimate input (it means unbind). */
-	if (tc_idx < -1 || tc_idx >= sc->chip_params->nsched_cls)
-		return (EINVAL);
-
-	mtx_lock(&sc->tc_lock);
-	if (tc_idx == txq->tc_idx) {
-		rc = 0;		/* No change, nothing to do. */
-		goto done;
-	}
-
-	fw_queue = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
-	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) |
-	    V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id);
-
-	if (tc_idx == -1)
-		fw_class = 0xffffffff;	/* Unbind. */
-	else {
-		/*
-		 * Bind to a different class.
-		 */
-		tc = &pi->sched_params->cl_rl[tc_idx];
-		if (tc->flags & TX_CLRL_ERROR) {
-			/* Previous attempt to set the cl-rl params failed. */
-			rc = EIO;
-			goto done;
-		} else {
-			/*
-			 * Ok to proceed.  Place a reference on the new class
-			 * while still holding on to the reference on the
-			 * previous class, if any.
-			 */
-			fw_class = tc_idx;
-			tc->refcount++;
-		}
-	}
-	mtx_unlock(&sc->tc_lock);
-
-	rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4stc");
-	if (rc)
-		return (rc);
-	rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class);
-	end_synchronized_op(sc, 0);
-

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201904290310.x3T3Awwl059372>