Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 3 Dec 2015 00:02:01 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r291665 - in head: share/man/man4 sys/dev/cxgbe sys/dev/cxgbe/common sys/dev/cxgbe/iw_cxgbe sys/dev/cxgbe/tom
Message-ID:  <201512030002.tB3021H0074684@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Thu Dec  3 00:02:01 2015
New Revision: 291665
URL: https://svnweb.freebsd.org/changeset/base/291665

Log:
  Add support for configuring additional virtual interfaces (VIs) on a port.
  
  Each virtual interface has its own MAC address, queues, and statistics.
  The dedicated netmap interfaces (ncxgbeX / ncxlX) were already implemented
  as additional VIs on each port.  This change allows additional non-netmap
  interfaces to be configured on each port.  Additional virtual interfaces
  use the naming scheme vcxgbeX or vcxlX.
  
  Additional VIs are enabled by setting the hw.cxgbe.num_vis tunable to a
  value greater than 1 before loading the cxgbe(4) or cxl(4) driver.
  NB: The first VI on each port is the "main" interface (cxgbeX or cxlX).
  
  T4/T5 NICs provide a limited number of MAC addresses for each physical port.
  As a result, a maximum of six VIs can be configured on each port (including
  the "main" interface and the netmap interface when netmap is enabled).
  
  One user-visible result is that when netmap is enabled, packets received
  or transmitted via the netmap interface are no longer counted in the stats
  for the "main" interface, but are not accounted to the netmap interface.
  
  The netmap interfaces now also have a new-bus device and export various
  information sysctl nodes via dev.n(cxgbe|cxl).X.
  
  The cxgbetool 'clearstats' command clears the stats for all VIs on the
  specified port along with the port's stats.  There is currently no way to
  clear the stats of an individual VI.
  
  Reviewed by:	np
  MFC after:	1 month
  Sponsored by:	Chelsio

Modified:
  head/share/man/man4/cxgbe.4
  head/sys/dev/cxgbe/adapter.h
  head/sys/dev/cxgbe/common/t4_hw.c
  head/sys/dev/cxgbe/iw_cxgbe/provider.c
  head/sys/dev/cxgbe/t4_main.c
  head/sys/dev/cxgbe/t4_netmap.c
  head/sys/dev/cxgbe/t4_sge.c
  head/sys/dev/cxgbe/tom/t4_connect.c
  head/sys/dev/cxgbe/tom/t4_cpl_io.c
  head/sys/dev/cxgbe/tom/t4_listen.c
  head/sys/dev/cxgbe/tom/t4_tom.c
  head/sys/dev/cxgbe/tom/t4_tom.h

Modified: head/share/man/man4/cxgbe.4
==============================================================================
--- head/share/man/man4/cxgbe.4	Wed Dec  2 23:54:59 2015	(r291664)
+++ head/share/man/man4/cxgbe.4	Thu Dec  3 00:02:01 2015	(r291665)
@@ -31,7 +31,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd March 20, 2014
+.Dd December 2, 2015
 .Dt CXGBE 4
 .Os
 .Sh NAME
@@ -170,6 +170,16 @@ number of CPU cores in the system, which
 .It Va hw.cxgbe.nofldrxq1g
 The number of TOE rx queues to use for a 1Gb port.
 The default is 1.
+.It Va hw.cxgbe.num_vis
+The number of virtual interfaces (VIs) created for each port.
+Each virtual interface creates a separate network interface.
+The first virtual interface on each port is required and represents
+the primary network interface on the port.
+Additional virtual interfaces on a port are named vcxgbe (T4) or
+vcxl (T5) and only use a single rx and tx queue.
+Additional virtual interfaces use a single pair of queues
+for rx and tx as well an additional pair of queues for TOE rx and tx.
+The default is 1.
 .It Va hw.cxgbe.holdoff_timer_idx_10G
 .It Va hw.cxgbe.holdoff_timer_idx_1G
 The timer index value to use to delay interrupts.

Modified: head/sys/dev/cxgbe/adapter.h
==============================================================================
--- head/sys/dev/cxgbe/adapter.h	Wed Dec  2 23:54:59 2015	(r291664)
+++ head/sys/dev/cxgbe/adapter.h	Thu Dec  3 00:02:01 2015	(r291665)
@@ -198,34 +198,34 @@ enum {
 	CXGBE_BUSY	= (1 << 9),
 
 	/* port flags */
-	DOOMED		= (1 << 0),
-	PORT_INIT_DONE	= (1 << 1),
-	PORT_SYSCTL_CTX	= (1 << 2),
 	HAS_TRACEQ	= (1 << 3),
+
+	/* VI flags */
+	DOOMED		= (1 << 0),
+	VI_INIT_DONE	= (1 << 1),
+	VI_SYSCTL_CTX	= (1 << 2),
 	INTR_RXQ	= (1 << 4),	/* All NIC rxq's take interrupts */
 	INTR_OFLD_RXQ	= (1 << 5),	/* All TOE rxq's take interrupts */
-	INTR_NM_RXQ	= (1 << 6),	/* All netmap rxq's take interrupts */
-	INTR_ALL	= (INTR_RXQ | INTR_OFLD_RXQ | INTR_NM_RXQ),
+	INTR_ALL	= (INTR_RXQ | INTR_OFLD_RXQ),
+	VI_NETMAP	= (1 << 6),
 
 	/* adapter debug_flags */
 	DF_DUMP_MBOX	= (1 << 0),
 };
 
-#define IS_DOOMED(pi)	((pi)->flags & DOOMED)
-#define SET_DOOMED(pi)	do {(pi)->flags |= DOOMED;} while (0)
+#define IS_DOOMED(vi)	((vi)->flags & DOOMED)
+#define SET_DOOMED(vi)	do {(vi)->flags |= DOOMED;} while (0)
 #define IS_BUSY(sc)	((sc)->flags & CXGBE_BUSY)
 #define SET_BUSY(sc)	do {(sc)->flags |= CXGBE_BUSY;} while (0)
 #define CLR_BUSY(sc)	do {(sc)->flags &= ~CXGBE_BUSY;} while (0)
 
-struct port_info {
+struct vi_info {
 	device_t dev;
-	struct adapter *adapter;
+	struct port_info *pi;
 
 	struct ifnet *ifp;
 	struct ifmedia media;
 
-	struct mtx pi_lock;
-	char lockname[16];
 	unsigned long flags;
 	int if_flags;
 
@@ -234,13 +234,11 @@ struct port_info {
 	int16_t  xact_addr_filt;/* index of exact MAC address filter */
 	uint16_t rss_size;	/* size of VI's RSS table slice */
 	uint16_t rss_base;	/* start of VI's RSS table slice */
-	uint8_t  lport;		/* associated offload logical port */
-	int8_t   mdio_addr;
-	uint8_t  port_type;
-	uint8_t  mod_type;
-	uint8_t  port_id;
-	uint8_t  tx_chan;
-	uint8_t  rx_chan_map;	/* rx MPS channel bitmap */
+
+	eventhandler_tag vlan_c;
+
+	int nintr;
+	int first_intr;
 
 	/* These need to be int as they are used in sysctl */
 	int ntxq;	/* # of tx queues */
@@ -254,24 +252,41 @@ struct port_info {
 	int nofldrxq;		/* # of offload rx queues */
 	int first_ofld_rxq;	/* index of first offload rx queue */
 #endif
-#ifdef DEV_NETMAP
-	int nnmtxq;		/* # of netmap tx queues */
-	int first_nm_txq;	/* index of first netmap tx queue */
-	int nnmrxq;		/* # of netmap rx queues */
-	int first_nm_rxq;	/* index of first netmap rx queue */
-
-	struct ifnet *nm_ifp;
-	struct ifmedia nm_media;
-	int nmif_flags;
-	uint16_t nm_viid;
-	int16_t nm_xact_addr_filt;
-	uint16_t nm_rss_size;	/* size of netmap VI's RSS table slice */
-#endif
 	int tmr_idx;
 	int pktc_idx;
 	int qsize_rxq;
 	int qsize_txq;
 
+	struct timeval last_refreshed;
+	struct fw_vi_stats_vf stats;
+
+	struct callout tick;
+	struct sysctl_ctx_list ctx;	/* from ifconfig up to driver detach */
+
+	uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */
+};
+
+struct port_info {
+	device_t dev;
+	struct adapter *adapter;
+
+	struct vi_info *vi;
+	int nvi;
+	int up_vis;
+	int uld_vis;
+
+	struct mtx pi_lock;
+	char lockname[16];
+	unsigned long flags;
+
+	uint8_t  lport;		/* associated offload logical port */
+	int8_t   mdio_addr;
+	uint8_t  port_type;
+	uint8_t  mod_type;
+	uint8_t  port_id;
+	uint8_t  tx_chan;
+	uint8_t  rx_chan_map;	/* rx MPS channel bitmap */
+
 	int linkdnrc;
 	struct link_config link_cfg;
 
@@ -280,14 +295,11 @@ struct port_info {
 	u_int tnl_cong_drops;
 	u_int tx_parse_error;
 
-	eventhandler_tag vlan_c;
-
 	struct callout tick;
-	struct sysctl_ctx_list ctx;	/* from ifconfig up to driver detach */
-
-	uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */
 };
 
+#define	IS_MAIN_VI(vi)		((vi) == &((vi)->pi->vi[0]))
+
 /* Where the cluster came from, how it has been carved up. */
 struct cluster_layout {
 	int8_t zidx;
@@ -603,7 +615,7 @@ struct sge_wrq {
 
 #ifdef DEV_NETMAP
 struct sge_nm_rxq {
-	struct port_info *pi;
+	struct vi_info *vi;
 
 	struct iq_desc *iq_desc;
 	uint16_t iq_abs_id;
@@ -760,7 +772,6 @@ struct adapter {
 	struct tid_info tids;
 
 	uint16_t doorbells;
-	int open_device_map;
 #ifdef TCP_OFFLOAD
 	int offload_map;	/* ports with IFCAP_TOE enabled */
 	int active_ulds;	/* ULDs activated on this adapter */
@@ -870,24 +881,27 @@ struct adapter {
 		} \
 	} while (0)
 
-#define for_each_txq(pi, iter, q) \
-	for (q = &pi->adapter->sge.txq[pi->first_txq], iter = 0; \
-	    iter < pi->ntxq; ++iter, ++q)
-#define for_each_rxq(pi, iter, q) \
-	for (q = &pi->adapter->sge.rxq[pi->first_rxq], iter = 0; \
-	    iter < pi->nrxq; ++iter, ++q)
-#define for_each_ofld_txq(pi, iter, q) \
-	for (q = &pi->adapter->sge.ofld_txq[pi->first_ofld_txq], iter = 0; \
-	    iter < pi->nofldtxq; ++iter, ++q)
-#define for_each_ofld_rxq(pi, iter, q) \
-	for (q = &pi->adapter->sge.ofld_rxq[pi->first_ofld_rxq], iter = 0; \
-	    iter < pi->nofldrxq; ++iter, ++q)
-#define for_each_nm_txq(pi, iter, q) \
-	for (q = &pi->adapter->sge.nm_txq[pi->first_nm_txq], iter = 0; \
-	    iter < pi->nnmtxq; ++iter, ++q)
-#define for_each_nm_rxq(pi, iter, q) \
-	for (q = &pi->adapter->sge.nm_rxq[pi->first_nm_rxq], iter = 0; \
-	    iter < pi->nnmrxq; ++iter, ++q)
+#define for_each_txq(vi, iter, q) \
+	for (q = &vi->pi->adapter->sge.txq[vi->first_txq], iter = 0; \
+	    iter < vi->ntxq; ++iter, ++q)
+#define for_each_rxq(vi, iter, q) \
+	for (q = &vi->pi->adapter->sge.rxq[vi->first_rxq], iter = 0; \
+	    iter < vi->nrxq; ++iter, ++q)
+#define for_each_ofld_txq(vi, iter, q) \
+	for (q = &vi->pi->adapter->sge.ofld_txq[vi->first_ofld_txq], iter = 0; \
+	    iter < vi->nofldtxq; ++iter, ++q)
+#define for_each_ofld_rxq(vi, iter, q) \
+	for (q = &vi->pi->adapter->sge.ofld_rxq[vi->first_ofld_rxq], iter = 0; \
+	    iter < vi->nofldrxq; ++iter, ++q)
+#define for_each_nm_txq(vi, iter, q) \
+	for (q = &vi->pi->adapter->sge.nm_txq[vi->first_txq], iter = 0; \
+	    iter < vi->ntxq; ++iter, ++q)
+#define for_each_nm_rxq(vi, iter, q) \
+	for (q = &vi->pi->adapter->sge.nm_rxq[vi->first_rxq], iter = 0; \
+	    iter < vi->nrxq; ++iter, ++q)
+#define for_each_vi(_pi, _iter, _vi) \
+	for ((_vi) = (_pi)->vi, (_iter) = 0; (_iter) < (_pi)->nvi; \
+	     ++(_iter), ++(_vi))
 
 #define IDXINCR(idx, incr, wrap) do { \
 	idx = wrap - idx > incr ? idx + incr : incr - (wrap - idx); \
@@ -979,7 +993,7 @@ static inline void
 t4_os_set_hw_addr(struct adapter *sc, int idx, uint8_t hw_addr[])
 {
 
-	bcopy(hw_addr, sc->port[idx]->hw_addr, ETHER_ADDR_LEN);
+	bcopy(hw_addr, sc->port[idx]->vi[0].hw_addr, ETHER_ADDR_LEN);
 }
 
 static inline bool
@@ -1015,13 +1029,17 @@ int t4_register_cpl_handler(struct adapt
 int t4_register_an_handler(struct adapter *, an_handler_t);
 int t4_register_fw_msg_handler(struct adapter *, int, fw_msg_handler_t);
 int t4_filter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *);
-int begin_synchronized_op(struct adapter *, struct port_info *, int, char *);
+int begin_synchronized_op(struct adapter *, struct vi_info *, int, char *);
+void doom_vi(struct adapter *, struct vi_info *);
 void end_synchronized_op(struct adapter *, int);
 int update_mac_settings(struct ifnet *, int);
 int adapter_full_init(struct adapter *);
 int adapter_full_uninit(struct adapter *);
-int port_full_init(struct port_info *);
-int port_full_uninit(struct port_info *);
+uint64_t cxgbe_get_counter(struct ifnet *, ift_counter);
+int vi_full_init(struct vi_info *);
+int vi_full_uninit(struct vi_info *);
+void vi_sysctls(struct vi_info *);
+void vi_tick(void *);
 
 #ifdef DEV_NETMAP
 /* t4_netmap.c */
@@ -1043,8 +1061,8 @@ void t4_sge_sysctls(struct adapter *, st
 int t4_destroy_dma_tag(struct adapter *);
 int t4_setup_adapter_queues(struct adapter *);
 int t4_teardown_adapter_queues(struct adapter *);
-int t4_setup_port_queues(struct port_info *);
-int t4_teardown_port_queues(struct port_info *);
+int t4_setup_vi_queues(struct vi_info *);
+int t4_teardown_vi_queues(struct vi_info *);
 void t4_intr_all(void *);
 void t4_intr(void *);
 void t4_intr_err(void *);

Modified: head/sys/dev/cxgbe/common/t4_hw.c
==============================================================================
--- head/sys/dev/cxgbe/common/t4_hw.c	Wed Dec  2 23:54:59 2015	(r291664)
+++ head/sys/dev/cxgbe/common/t4_hw.c	Thu Dec  3 00:02:01 2015	(r291665)
@@ -5723,11 +5723,11 @@ int __devinit t4_port_init(struct port_i
 	if (ret < 0)
 		return ret;
 
-	p->viid = ret;
+	p->vi[0].viid = ret;
 	p->tx_chan = j;
 	p->rx_chan_map = get_mps_bg_map(adap, j);
 	p->lport = j;
-	p->rss_size = rss_size;
+	p->vi[0].rss_size = rss_size;
 	t4_os_set_hw_addr(adap, p->port_id, addr);
 
 	ret = ntohl(c.u.info.lstatus_to_modtype);
@@ -5740,13 +5740,13 @@ int __devinit t4_port_init(struct port_i
 
 	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
 	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
-	    V_FW_PARAMS_PARAM_YZ(p->viid);
+	    V_FW_PARAMS_PARAM_YZ(p->vi[0].viid);
 	ret = t4_query_params(adap, mbox, pf, vf, 1, &param, &val);
 	if (ret)
-		p->rss_base = 0xffff;
+		p->vi[0].rss_base = 0xffff;
 	else {
 		/* MPASS((val >> 16) == rss_size); */
-		p->rss_base = val & 0xffff;
+		p->vi[0].rss_base = val & 0xffff;
 	}
 
 	return 0;

Modified: head/sys/dev/cxgbe/iw_cxgbe/provider.c
==============================================================================
--- head/sys/dev/cxgbe/iw_cxgbe/provider.c	Wed Dec  2 23:54:59 2015	(r291664)
+++ head/sys/dev/cxgbe/iw_cxgbe/provider.c	Thu Dec  3 00:02:01 2015	(r291665)
@@ -296,7 +296,7 @@ c4iw_query_gid(struct ib_device *ibdev, 
 	if (port == 0 || port > sc->params.nports)
 		return (-EINVAL);
 	pi = sc->port[port - 1];
-	memcpy(&gid->raw[0], pi->hw_addr, sizeof(pi->hw_addr));
+	memcpy(&gid->raw[0], pi->vi[0].hw_addr, ETHER_ADDR_LEN);
 	return (0);
 }
 
@@ -309,7 +309,8 @@ c4iw_query_device(struct ib_device *ibde
 	CTR3(KTR_IW_CXGBE, "%s ibdev %p, props %p", __func__, ibdev, props);
 
 	memset(props, 0, sizeof *props);
-	memcpy(&props->sys_image_guid, sc->port[0]->hw_addr, 6);
+	memcpy(&props->sys_image_guid, sc->port[0]->vi[0].hw_addr,
+	    ETHER_ADDR_LEN);
 	props->hw_ver = sc->params.chipid;
 	props->fw_ver = sc->params.fw_vers;
 	props->device_cap_flags = dev->device_cap_flags;
@@ -352,7 +353,7 @@ c4iw_query_port(struct ib_device *ibdev,
 	if (port > sc->params.nports)
 		return (-EINVAL);
 	pi = sc->port[port - 1];
-	ifp = pi->ifp;
+	ifp = pi->vi[0].ifp;
 
 	memset(props, 0, sizeof(struct ib_port_attr));
 	props->max_mtu = IB_MTU_4096;
@@ -397,7 +398,7 @@ c4iw_register_device(struct c4iw_dev *de
 	BUG_ON(!sc->port[0]);
 	strlcpy(ibdev->name, device_get_nameunit(sc->dev), sizeof(ibdev->name));
 	memset(&ibdev->node_guid, 0, sizeof(ibdev->node_guid));
-	memcpy(&ibdev->node_guid, sc->port[0]->hw_addr, 6);
+	memcpy(&ibdev->node_guid, sc->port[0]->vi[0].hw_addr, ETHER_ADDR_LEN);
 	ibdev->owner = THIS_MODULE;
 	dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
 	if (fastreg_support)

Modified: head/sys/dev/cxgbe/t4_main.c
==============================================================================
--- head/sys/dev/cxgbe/t4_main.c	Wed Dec  2 23:54:59 2015	(r291664)
+++ head/sys/dev/cxgbe/t4_main.c	Thu Dec  3 00:02:01 2015	(r291665)
@@ -106,6 +106,22 @@ static driver_t cxgbe_driver = {
 	sizeof(struct port_info)
 };
 
+/* T4 VI (vcxgbe) interface */
+static int vcxgbe_probe(device_t);
+static int vcxgbe_attach(device_t);
+static int vcxgbe_detach(device_t);
+static device_method_t vcxgbe_methods[] = {
+	DEVMETHOD(device_probe,		vcxgbe_probe),
+	DEVMETHOD(device_attach,	vcxgbe_attach),
+	DEVMETHOD(device_detach,	vcxgbe_detach),
+	{ 0, 0 }
+};
+static driver_t vcxgbe_driver = {
+	"vcxgbe",
+	vcxgbe_methods,
+	sizeof(struct vi_info)
+};
+
 static d_ioctl_t t4_ioctl;
 static d_open_t t4_open;
 static d_close_t t4_close;
@@ -142,6 +158,13 @@ static driver_t cxl_driver = {
 	sizeof(struct port_info)
 };
 
+/* T5 VI (vcxl) interface */
+static driver_t vcxl_driver = {
+	"vcxl",
+	vcxgbe_methods,
+	sizeof(struct vi_info)
+};
+
 static struct cdevsw t5_cdevsw = {
        .d_version = D_VERSION,
        .d_flags = 0,
@@ -156,7 +179,6 @@ static void cxgbe_init(void *);
 static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
 static int cxgbe_transmit(struct ifnet *, struct mbuf *);
 static void cxgbe_qflush(struct ifnet *);
-static uint64_t cxgbe_get_counter(struct ifnet *, ift_counter);
 static int cxgbe_media_change(struct ifnet *);
 static void cxgbe_media_status(struct ifnet *, struct ifmediareq *);
 
@@ -327,6 +349,19 @@ TUNABLE_INT("hw.cxgbe.fcoecaps_allowed",
 static int t5_write_combine = 0;
 TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine);
 
+static int t4_num_vis = 1;
+TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis);
+
+/* Functions used by extra VIs to obtain unique MAC addresses for each VI. */
+static int vi_mac_funcs[] = {
+	FW_VI_FUNC_OFLD,
+	FW_VI_FUNC_IWARP,
+	FW_VI_FUNC_OPENISCSI,
+	FW_VI_FUNC_OPENFCOE,
+	FW_VI_FUNC_FOISCSI,
+	FW_VI_FUNC_FOFCOE,
+};
+
 struct intrs_and_queues {
 	uint16_t intr_type;	/* INTx, MSI, or MSI-X */
 	uint16_t nirq;		/* Total # of vectors */
@@ -370,7 +405,7 @@ static int validate_mt_off_len(struct ad
     uint32_t *);
 static void memwin_info(struct adapter *, int, uint32_t *, uint32_t *);
 static uint32_t position_memwin(struct adapter *, int, uint32_t);
-static int cfg_itype_and_nqueues(struct adapter *, int, int,
+static int cfg_itype_and_nqueues(struct adapter *, int, int, int,
     struct intrs_and_queues *);
 static int prep_firmware(struct adapter *);
 static int partition_resources(struct adapter *, const struct firmware *,
@@ -380,8 +415,8 @@ static int get_params__post_init(struct 
 static int set_params__post_init(struct adapter *);
 static void t4_set_desc(struct adapter *);
 static void build_medialist(struct port_info *, struct ifmedia *);
-static int cxgbe_init_synchronized(struct port_info *);
-static int cxgbe_uninit_synchronized(struct port_info *);
+static int cxgbe_init_synchronized(struct vi_info *);
+static int cxgbe_uninit_synchronized(struct vi_info *);
 static int setup_intr_handlers(struct adapter *);
 static void quiesce_txq(struct adapter *, struct sge_txq *);
 static void quiesce_wrq(struct adapter *, struct sge_wrq *);
@@ -393,6 +428,7 @@ static int t4_free_irq(struct adapter *,
 static void reg_block_dump(struct adapter *, uint8_t *, unsigned int,
     unsigned int);
 static void t4_get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
+static void vi_refresh_stats(struct adapter *, struct vi_info *);
 static void cxgbe_refresh_stats(struct adapter *, struct port_info *);
 static void cxgbe_tick(void *);
 static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t);
@@ -400,8 +436,8 @@ static int cpl_not_handled(struct sge_iq
     struct mbuf *);
 static int an_not_handled(struct sge_iq *, const struct rsp_ctrl *);
 static int fw_msg_not_handled(struct adapter *, const __be64 *);
-static int t4_sysctls(struct adapter *);
-static int cxgbe_sysctls(struct port_info *);
+static void t4_sysctls(struct adapter *);
+static void cxgbe_sysctls(struct port_info *);
 static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
 static int sysctl_bitfield(SYSCTL_HANDLER_ARGS);
 static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
@@ -459,7 +495,7 @@ static int read_i2c(struct adapter *, st
 static int set_sched_class(struct adapter *, struct t4_sched_params *);
 static int set_sched_queue(struct adapter *, struct t4_sched_queue *);
 #ifdef TCP_OFFLOAD
-static int toe_capability(struct port_info *, int);
+static int toe_capability(struct vi_info *, int);
 #endif
 static int mod_event(module_t, int, void *);
 
@@ -604,7 +640,7 @@ static int
 t4_attach(device_t dev)
 {
 	struct adapter *sc;
-	int rc = 0, i, n10g, n1g, rqidx, tqidx;
+	int rc = 0, i, j, n10g, n1g, rqidx, tqidx;
 	struct intrs_and_queues iaq;
 	struct sge *s;
 #ifdef TCP_OFFLOAD
@@ -613,6 +649,7 @@ t4_attach(device_t dev)
 #ifdef DEV_NETMAP
 	int nm_rqidx, nm_tqidx;
 #endif
+	int num_vis;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
@@ -646,7 +683,7 @@ t4_attach(device_t dev)
 
 	mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
 	TAILQ_INIT(&sc->sfl);
-	callout_init(&sc->sfl_callout, 1);
+	callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
 
 	mtx_init(&sc->regwin_lock, "register and memory window", 0, MTX_DEF);
 
@@ -731,6 +768,27 @@ t4_attach(device_t dev)
 		goto done; /* error message displayed already */
 
 	/*
+	 * Number of VIs to create per-port.  The first VI is the
+	 * "main" regular VI for the port.  The second VI is used for
+	 * netmap if present, and any remaining VIs are used for
+	 * additional virtual interfaces.
+	 *
+	 * Limit the number of VIs per port to the number of available
+	 * MAC addresses per port.
+	 */
+	if (t4_num_vis >= 1)
+		num_vis = t4_num_vis;
+	else
+		num_vis = 1;
+#ifdef DEV_NETMAP
+	num_vis++;
+#endif
+	if (num_vis > nitems(vi_mac_funcs)) {
+		num_vis = nitems(vi_mac_funcs);
+		device_printf(dev, "Number of VIs limited to %d\n", num_vis);
+	}
+
+	/*
 	 * First pass over all the ports - allocate VIs and initialize some
 	 * basic parameters like mac address, port type, etc.  We also figure
 	 * out whether a port is 10G or 1G and use that information when
@@ -739,6 +797,7 @@ t4_attach(device_t dev)
 	n10g = n1g = 0;
 	for_each_port(sc, i) {
 		struct port_info *pi;
+		struct vi_info *vi;
 
 		pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
 		sc->port[i] = pi;
@@ -746,12 +805,19 @@ t4_attach(device_t dev)
 		/* These must be set before t4_port_init */
 		pi->adapter = sc;
 		pi->port_id = i;
+		pi->nvi = num_vis;
+		pi->vi = malloc(sizeof(struct vi_info) * num_vis, M_CXGBE,
+		    M_ZERO | M_WAITOK);
 
-		/* Allocate the vi and initialize parameters like mac addr */
+		/*
+		 * Allocate the "main" VI and initialize parameters
+		 * like mac addr.
+		 */
 		rc = -t4_port_init(pi, sc->mbox, sc->pf, 0);
 		if (rc != 0) {
 			device_printf(dev, "unable to initialize port %d: %d\n",
 			    i, rc);
+			free(pi->vi, M_CXGBE);
 			free(pi, M_CXGBE);
 			sc->port[i] = NULL;
 			goto done;
@@ -765,6 +831,7 @@ t4_attach(device_t dev)
 		rc = -t4_link_start(sc, sc->mbox, pi->tx_chan, &pi->link_cfg);
 		if (rc != 0) {
 			device_printf(dev, "port %d l1cfg failed: %d\n", i, rc);
+			free(pi->vi, M_CXGBE);
 			free(pi, M_CXGBE);
 			sc->port[i] = NULL;
 			goto done;
@@ -777,19 +844,25 @@ t4_attach(device_t dev)
 
 		if (is_10G_port(pi) || is_40G_port(pi)) {
 			n10g++;
-			pi->tmr_idx = t4_tmr_idx_10g;
-			pi->pktc_idx = t4_pktc_idx_10g;
+			for_each_vi(pi, j, vi) {
+				vi->tmr_idx = t4_tmr_idx_10g;
+				vi->pktc_idx = t4_pktc_idx_10g;
+			}
 		} else {
 			n1g++;
-			pi->tmr_idx = t4_tmr_idx_1g;
-			pi->pktc_idx = t4_pktc_idx_1g;
+			for_each_vi(pi, j, vi) {
+				vi->tmr_idx = t4_tmr_idx_1g;
+				vi->pktc_idx = t4_pktc_idx_1g;
+			}
 		}
 
-		pi->xact_addr_filt = -1;
 		pi->linkdnrc = -1;
 
-		pi->qsize_rxq = t4_qsize_rxq;
-		pi->qsize_txq = t4_qsize_txq;
+		for_each_vi(pi, j, vi) {
+			vi->qsize_rxq = t4_qsize_rxq;
+			vi->qsize_txq = t4_qsize_txq;
+			vi->pi = pi;
+		}
 
 		pi->dev = device_add_child(dev, is_t4(sc) ? "cxgbe" : "cxl", -1);
 		if (pi->dev == NULL) {
@@ -798,13 +871,17 @@ t4_attach(device_t dev)
 			rc = ENXIO;
 			goto done;
 		}
+		pi->vi[0].dev = pi->dev;
 		device_set_softc(pi->dev, pi);
 	}
 
 	/*
 	 * Interrupt type, # of interrupts, # of rx/tx queues, etc.
 	 */
-	rc = cfg_itype_and_nqueues(sc, n10g, n1g, &iaq);
+#ifdef DEV_NETMAP
+	num_vis--;
+#endif
+	rc = cfg_itype_and_nqueues(sc, n10g, n1g, num_vis, &iaq);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
@@ -814,6 +891,10 @@ t4_attach(device_t dev)
 	s = &sc->sge;
 	s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g;
 	s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g;
+	if (num_vis > 1) {
+		s->nrxq += (n10g + n1g) * (num_vis - 1);
+		s->ntxq += (n10g + n1g) * (num_vis - 1);
+	}
 	s->neq = s->ntxq + s->nrxq;	/* the free list in an rxq is an eq */
 	s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */
 	s->niq = s->nrxq + 1;		/* 1 extra for firmware event queue */
@@ -821,6 +902,10 @@ t4_attach(device_t dev)
 	if (is_offload(sc)) {
 		s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g;
 		s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g;
+		if (num_vis > 1) {
+			s->nofldrxq += (n10g + n1g) * (num_vis - 1);
+			s->nofldtxq += (n10g + n1g) * (num_vis - 1);
+		}
 		s->neq += s->nofldtxq + s->nofldrxq;
 		s->niq += s->nofldrxq;
 
@@ -871,57 +956,68 @@ t4_attach(device_t dev)
 #endif
 	for_each_port(sc, i) {
 		struct port_info *pi = sc->port[i];
+		struct vi_info *vi;
 
 		if (pi == NULL)
 			continue;
 
-		pi->first_rxq = rqidx;
-		pi->first_txq = tqidx;
-		if (is_10G_port(pi) || is_40G_port(pi)) {
-			pi->flags |= iaq.intr_flags_10g;
-			pi->nrxq = iaq.nrxq10g;
-			pi->ntxq = iaq.ntxq10g;
-		} else {
-			pi->flags |= iaq.intr_flags_1g;
-			pi->nrxq = iaq.nrxq1g;
-			pi->ntxq = iaq.ntxq1g;
-		}
+		for_each_vi(pi, j, vi) {
+#ifdef DEV_NETMAP
+			if (j == 1) {
+				vi->flags |= VI_NETMAP | INTR_RXQ;
+				vi->first_rxq = nm_rqidx;
+				vi->first_txq = nm_tqidx;
+				if (is_10G_port(pi) || is_40G_port(pi)) {
+					vi->nrxq = iaq.nnmrxq10g;
+					vi->ntxq = iaq.nnmtxq10g;
+				} else {
+					vi->nrxq = iaq.nnmrxq1g;
+					vi->ntxq = iaq.nnmtxq1g;
+				}
+				nm_rqidx += vi->nrxq;
+				nm_tqidx += vi->ntxq;
+				continue;
+			}
+#endif
 
-		if (pi->ntxq > 1)
-			pi->rsrv_noflowq = iaq.rsrv_noflowq ? 1 : 0;
-		else
-			pi->rsrv_noflowq = 0;
+			vi->first_rxq = rqidx;
+			vi->first_txq = tqidx;
+			if (is_10G_port(pi) || is_40G_port(pi)) {
+				vi->flags |= iaq.intr_flags_10g & INTR_RXQ;
+				vi->nrxq = j == 0 ? iaq.nrxq10g : 1;
+				vi->ntxq = j == 0 ? iaq.ntxq10g : 1;
+			} else {
+				vi->flags |= iaq.intr_flags_1g & INTR_RXQ;
+				vi->nrxq = j == 0 ? iaq.nrxq1g : 1;
+				vi->ntxq = j == 0 ? iaq.ntxq1g : 1;
+			}
+
+			if (vi->ntxq > 1)
+				vi->rsrv_noflowq = iaq.rsrv_noflowq ? 1 : 0;
+			else
+				vi->rsrv_noflowq = 0;
+
+			rqidx += vi->nrxq;
+			tqidx += vi->ntxq;
 
-		rqidx += pi->nrxq;
-		tqidx += pi->ntxq;
 #ifdef TCP_OFFLOAD
-		if (is_offload(sc)) {
-			pi->first_ofld_rxq = ofld_rqidx;
-			pi->first_ofld_txq = ofld_tqidx;
+			if (!is_offload(sc))
+				continue;
+			vi->first_ofld_rxq = ofld_rqidx;
+			vi->first_ofld_txq = ofld_tqidx;
 			if (is_10G_port(pi) || is_40G_port(pi)) {
-				pi->nofldrxq = iaq.nofldrxq10g;
-				pi->nofldtxq = iaq.nofldtxq10g;
+				vi->flags |= iaq.intr_flags_10g & INTR_OFLD_RXQ;
+				vi->nofldrxq = j == 0 ? iaq.nofldrxq10g : 1;
+				vi->nofldtxq = j == 0 ? iaq.nofldtxq10g : 1;
 			} else {
-				pi->nofldrxq = iaq.nofldrxq1g;
-				pi->nofldtxq = iaq.nofldtxq1g;
+				vi->flags |= iaq.intr_flags_1g & INTR_OFLD_RXQ;
+				vi->nofldrxq = j == 0 ? iaq.nofldrxq1g : 1;
+				vi->nofldtxq = j == 0 ? iaq.nofldtxq1g : 1;
 			}
-			ofld_rqidx += pi->nofldrxq;
-			ofld_tqidx += pi->nofldtxq;
-		}
+			ofld_rqidx += vi->nofldrxq;
+			ofld_tqidx += vi->nofldtxq;
 #endif
-#ifdef DEV_NETMAP
-		pi->first_nm_rxq = nm_rqidx;
-		pi->first_nm_txq = nm_tqidx;
-		if (is_10G_port(pi) || is_40G_port(pi)) {
-			pi->nnmrxq = iaq.nnmrxq10g;
-			pi->nnmtxq = iaq.nnmtxq10g;
-		} else {
-			pi->nnmrxq = iaq.nnmrxq1g;
-			pi->nnmtxq = iaq.nnmtxq1g;
 		}
-		nm_rqidx += pi->nnmrxq;
-		nm_tqidx += pi->nnmtxq;
-#endif
 	}
 
 	rc = setup_intr_handlers(sc);
@@ -996,11 +1092,12 @@ t4_detach(device_t dev)
 	for (i = 0; i < MAX_NPORTS; i++) {
 		pi = sc->port[i];
 		if (pi) {
-			t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->viid);
+			t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
 			if (pi->dev)
 				device_delete_child(dev, pi->dev);
 
 			mtx_destroy(&pi->pi_lock);
+			free(pi->vi, M_CXGBE);
 			free(pi, M_CXGBE);
 		}
 	}
@@ -1052,6 +1149,7 @@ t4_detach(device_t dev)
 		mtx_destroy(&sc->sc_lock);
 	}
 
+	callout_drain(&sc->sfl_callout);
 	if (mtx_initialized(&sc->tids.ftid_lock))
 		mtx_destroy(&sc->tids.ftid_lock);
 	if (mtx_initialized(&sc->sfl_lock))
@@ -1084,12 +1182,13 @@ cxgbe_probe(device_t dev)
 #define T4_CAP_ENABLE (T4_CAP)
 
 static int
-cxgbe_attach(device_t dev)
+cxgbe_vi_attach(device_t dev, struct vi_info *vi)
 {
-	struct port_info *pi = device_get_softc(dev);
 	struct ifnet *ifp;
-	char *s;
-	int n, o;
+	struct sbuf *sb;
+
+	vi->xact_addr_filt = -1;
+	callout_init(&vi->tick, 1);
 
 	/* Allocate an ifnet and set it up */
 	ifp = if_alloc(IFT_ETHER);
@@ -1097,10 +1196,8 @@ cxgbe_attach(device_t dev)
 		device_printf(dev, "Cannot allocate ifnet\n");
 		return (ENOMEM);
 	}
-	pi->ifp = ifp;
-	ifp->if_softc = pi;
-
-	callout_init(&pi->tick, 1);
+	vi->ifp = ifp;
+	ifp->if_softc = vi;
 
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
@@ -1113,7 +1210,7 @@ cxgbe_attach(device_t dev)
 
 	ifp->if_capabilities = T4_CAP;
 #ifdef TCP_OFFLOAD
-	if (is_offload(pi->adapter))
+	if (vi->nofldrxq != 0)
 		ifp->if_capabilities |= IFCAP_TOE;
 #endif
 	ifp->if_capenable = T4_CAP_ENABLE;
@@ -1124,99 +1221,121 @@ cxgbe_attach(device_t dev)
 	ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS;
 	ifp->if_hw_tsomaxsegsize = 65536;
 
-	/* Initialize ifmedia for this port */
-	ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change,
+	/* Initialize ifmedia for this VI */
+	ifmedia_init(&vi->media, IFM_IMASK, cxgbe_media_change,
 	    cxgbe_media_status);
-	build_medialist(pi, &pi->media);
+	build_medialist(vi->pi, &vi->media);
 
-	pi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp,
+	vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp,
 	    EVENTHANDLER_PRI_ANY);
 
-	ether_ifattach(ifp, pi->hw_addr);
+	ether_ifattach(ifp, vi->hw_addr);
 
-	n = 128;
-	s = malloc(n, M_CXGBE, M_WAITOK);
-	o = snprintf(s, n, "%d txq, %d rxq (NIC)", pi->ntxq, pi->nrxq);
-	MPASS(n > o);
+	sb = sbuf_new_auto();
+	sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
 #ifdef TCP_OFFLOAD
-	if (is_offload(pi->adapter)) {
-		o += snprintf(s + o, n - o, "; %d txq, %d rxq (TOE)",
-		    pi->nofldtxq, pi->nofldrxq);
-		MPASS(n > o);
-	}
-#endif
-#ifdef DEV_NETMAP
-	o += snprintf(s + o, n - o, "; %d txq, %d rxq (netmap)", pi->nnmtxq,
-	    pi->nnmrxq);
-	MPASS(n > o);
+	if (ifp->if_capabilities & IFCAP_TOE)
+		sbuf_printf(sb, "; %d txq, %d rxq (TOE)",
+		    vi->nofldtxq, vi->nofldrxq);
 #endif
-	device_printf(dev, "%s\n", s);
-	free(s, M_CXGBE);
+	sbuf_finish(sb);
+	device_printf(dev, "%s\n", sbuf_data(sb));
+	sbuf_delete(sb);
+
+	vi_sysctls(vi);
+
+	return (0);
+}
+
+static int
+cxgbe_attach(device_t dev)
+{
+	struct port_info *pi = device_get_softc(dev);
+	struct vi_info *vi;
+	int i, rc;
+
+	callout_init_mtx(&pi->tick, &pi->pi_lock, 0);
 
+	rc = cxgbe_vi_attach(dev, &pi->vi[0]);
+	if (rc)
+		return (rc);
+
+	for_each_vi(pi, i, vi) {
+		if (i == 0)
+			continue;
 #ifdef DEV_NETMAP
-	/* nm_media handled here to keep implementation private to this file */
-	ifmedia_init(&pi->nm_media, IFM_IMASK, cxgbe_media_change,
-	    cxgbe_media_status);
-	build_medialist(pi, &pi->nm_media);
-	create_netmap_ifnet(pi);	/* logs errors it something fails */
+		if (vi->flags & VI_NETMAP) {
+			/*
+			 * media handled here to keep
+			 * implementation private to this file
+			 */
+			ifmedia_init(&vi->media, IFM_IMASK, cxgbe_media_change,
+			    cxgbe_media_status);
+			build_medialist(pi, &vi->media);
+			vi->dev = device_add_child(dev, is_t4(pi->adapter) ?
+			    "ncxgbe" : "ncxl", device_get_unit(dev));
+		} else
 #endif
+			vi->dev = device_add_child(dev, is_t4(pi->adapter) ?
+			    "vcxgbe" : "vcxl", -1);
+		if (vi->dev == NULL) {
+			device_printf(dev, "failed to add VI %d\n", i);
+			continue;
+		}
+		device_set_softc(vi->dev, vi);
+	}
+
 	cxgbe_sysctls(pi);
 
+	bus_generic_attach(dev);
+
 	return (0);
 }
 
+static void
+cxgbe_vi_detach(struct vi_info *vi)
+{
+	struct ifnet *ifp = vi->ifp;
+
+	ether_ifdetach(ifp);
+
+	if (vi->vlan_c)
+		EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c);
+
+	/* Let detach proceed even if these fail. */
+	cxgbe_uninit_synchronized(vi);
+	callout_drain(&vi->tick);
+	vi_full_uninit(vi);
+
+	ifmedia_removeall(&vi->media);
+	if_free(vi->ifp);
+	vi->ifp = NULL;
+}
+
 static int
 cxgbe_detach(device_t dev)
 {
 	struct port_info *pi = device_get_softc(dev);
 	struct adapter *sc = pi->adapter;
-	struct ifnet *ifp = pi->ifp;
+	int rc;
 
-	/* Tell if_ioctl and if_init that the port is going away */
-	ADAPTER_LOCK(sc);
-	SET_DOOMED(pi);
-	wakeup(&sc->flags);
-	while (IS_BUSY(sc))
-		mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
-	SET_BUSY(sc);
-#ifdef INVARIANTS
-	sc->last_op = "t4detach";
-	sc->last_op_thr = curthread;
-	sc->last_op_flags = 0;
-#endif
-	ADAPTER_UNLOCK(sc);
+	/* Detach the extra VIs first. */
+	rc = bus_generic_detach(dev);
+	if (rc)
+		return (rc);
+	device_delete_children(dev);
+
+	doom_vi(sc, &pi->vi[0]);
 
 	if (pi->flags & HAS_TRACEQ) {
 		sc->traceq = -1;	/* cloner should not create ifnet */
 		t4_tracer_port_detach(sc);
 	}
 
-	if (pi->vlan_c)
-		EVENTHANDLER_DEREGISTER(vlan_config, pi->vlan_c);
-
-	PORT_LOCK(pi);
-	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-	callout_stop(&pi->tick);
-	PORT_UNLOCK(pi);
+	cxgbe_vi_detach(&pi->vi[0]);
 	callout_drain(&pi->tick);
 
-	/* Let detach proceed even if these fail. */
-	cxgbe_uninit_synchronized(pi);
-	port_full_uninit(pi);
-
-	ifmedia_removeall(&pi->media);
-	ether_ifdetach(pi->ifp);
-	if_free(pi->ifp);
-
-#ifdef DEV_NETMAP
-	/* XXXNM: equivalent of cxgbe_uninit_synchronized to ifdown nm_ifp */
-	destroy_netmap_ifnet(pi);
-#endif
-
-	ADAPTER_LOCK(sc);
-	CLR_BUSY(sc);
-	wakeup(&sc->flags);
-	ADAPTER_UNLOCK(sc);
+	end_synchronized_op(sc, 0);
 
 	return (0);
 }
@@ -1224,12 +1343,12 @@ cxgbe_detach(device_t dev)
 static void
 cxgbe_init(void *arg)
 {
-	struct port_info *pi = arg;
-	struct adapter *sc = pi->adapter;
+	struct vi_info *vi = arg;
+	struct adapter *sc = vi->pi->adapter;
 
-	if (begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4init") != 0)
+	if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
 		return;
-	cxgbe_init_synchronized(pi);
+	cxgbe_init_synchronized(vi);
 	end_synchronized_op(sc, 0);
 }
 
@@ -1237,8 +1356,8 @@ static int
 cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
 {
 	int rc = 0, mtu, flags, can_sleep;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201512030002.tB3021H0074684>