Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 11 Dec 2015 12:22:37 +0000
From:      Steven Hartland <steven.hartland@multiplay.co.uk>
To:        Steven Hartland <smh@FreeBSD.org>, src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   Re: svn commit: r292094 - stable/10/sys/dev/ixl
Message-ID:  <566AC00D.4090200@multiplay.co.uk>
In-Reply-To: <201512111216.tBBCG5BI044327@repo.freebsd.org>
References:  <201512111216.tBBCG5BI044327@repo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
This should have referenced https://reviews.freebsd.org/D4265

On 11/12/2015 12:16, Steven Hartland wrote:
> Author: smh
> Date: Fri Dec 11 12:16:05 2015
> New Revision: 292094
> URL: https://svnweb.freebsd.org/changeset/base/292094
>
> Log:
>    MFC r277084,r277088,r277130,r277143,r277151,r277262
>    
>    r277084: Intel I40E updates ixl v1.3.0 and ixlv 1.2.0 featuring RSS
>    r277088, r277130, r277143, r277151 & r277262: Misc RSS fixes
>    
>    The main change is the addition of RSS, which is not supported in stable/10,
>    however these commits also include other bug fixes. In order to bring those
>    fixes in and facilitate easier merge of future updates the RSS changes are
>    maintained but left disabled by the removal of the opt_rss.h include.
>    
>    Sponsored by:	Multiplay
>
> Modified:
>    stable/10/sys/dev/ixl/if_ixl.c
>    stable/10/sys/dev/ixl/if_ixlv.c
>    stable/10/sys/dev/ixl/ixl.h
>    stable/10/sys/dev/ixl/ixl_txrx.c
> Directory Properties:
>    stable/10/   (props changed)
>
> Modified: stable/10/sys/dev/ixl/if_ixl.c
> ==============================================================================
> --- stable/10/sys/dev/ixl/if_ixl.c	Fri Dec 11 11:08:00 2015	(r292093)
> +++ stable/10/sys/dev/ixl/if_ixl.c	Fri Dec 11 12:16:05 2015	(r292094)
> @@ -37,10 +37,14 @@
>   #include "ixl.h"
>   #include "ixl_pf.h"
>   
> +#ifdef RSS
> +#include <net/rss_config.h>
> +#endif
> +
>   /*********************************************************************
>    *  Driver version
>    *********************************************************************/
> -char ixl_driver_version[] = "1.2.8";
> +char ixl_driver_version[] = "1.3.1";
>   
>   /*********************************************************************
>    *  PCI Device ID Table
> @@ -174,7 +178,7 @@ static void	ixl_stat_update48(struct i40
>   static void	ixl_stat_update32(struct i40e_hw *, u32, bool,
>   		    u64 *, u64 *);
>   
> -#ifdef IXL_DEBUG
> +#ifdef IXL_DEBUG_SYSCTL
>   static int 	ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS);
>   static int	ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS);
>   static int	ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS);
> @@ -427,7 +431,7 @@ ixl_attach(device_t dev)
>   	    OID_AUTO, "dynamic_tx_itr", CTLFLAG_RW,
>   	    &ixl_dynamic_tx_itr, 0, "Dynamic TX ITR");
>   
> -#ifdef IXL_DEBUG
> +#ifdef IXL_DEBUG_SYSCTL
>   	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
>   	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
>   	    OID_AUTO, "link_status", CTLTYPE_STRING | CTLFLAG_RD,
> @@ -662,8 +666,9 @@ ixl_attach(device_t dev)
>   
>   	/* Reset port's advertised speeds */
>   	if (!i40e_is_40G_device(hw->device_id)) {
> -		pf->advertised_speed = 0x7;
> -		ixl_set_advertised_speeds(pf, 0x7);
> +		pf->advertised_speed =
> +		    (hw->device_id == I40E_DEV_ID_10G_BASE_T) ? 0x7 : 0x6;
> +		ixl_set_advertised_speeds(pf, pf->advertised_speed);
>   	}
>   
>   	/* Register for VLAN events */
> @@ -1407,6 +1412,12 @@ ixl_media_status(struct ifnet * ifp, str
>   		case I40E_PHY_TYPE_10GBASE_SFPP_CU:
>   			ifmr->ifm_active |= IFM_10G_TWINAX;
>   			break;
> +		case I40E_PHY_TYPE_10GBASE_KR:
> +			/*
> +			** this is not technically correct
> +			** but FreeBSD does not have the media
> +			** type defined yet, so its a compromise.
> +			*/
>   		case I40E_PHY_TYPE_10GBASE_SR:
>   			ifmr->ifm_active |= IFM_10G_SR;
>   			break;
> @@ -1721,8 +1732,10 @@ ixl_local_timer(void *arg)
>   				vsi->active_queues |= ((u64)1 << que->me);
>   		}
>   		if (que->busy >= IXL_MAX_TX_BUSY) {
> +#ifdef IXL_DEBUG
>   			device_printf(dev,"Warning queue %d "
>   			    "appears to be hung!\n", i);
> +#endif
>   			que->busy = IXL_QUEUE_HUNG;
>   			++hung;
>   		}
> @@ -1765,6 +1778,15 @@ ixl_update_link_status(struct ixl_pf *pf
>   				    "Full Duplex", ixl_fc_string[fc]);
>   			}
>   			vsi->link_active = TRUE;
> +			/*
> +			** Warn user if link speed on NPAR enabled
> +			** partition is not at least 10GB
> +			*/
> +			if (hw->func_caps.npar_enable &&
> +			   (hw->phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
> +			   hw->phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
> +				device_printf(dev, "The partition detected link"
> +				    "speed that is less than 10Gbps\n");
>   			if_link_state_change(ifp, LINK_STATE_UP);
>   		}
>   	} else { /* Link down */
> @@ -1901,6 +1923,7 @@ ixl_assign_vsi_msix(struct ixl_pf *pf)
>   
>   	/* Now set up the stations */
>   	for (int i = 0; i < vsi->num_queues; i++, vector++, que++) {
> +		int cpu_id = i;
>   		rid = vector + 1;
>   		txr = &que->txr;
>   		que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
> @@ -1921,14 +1944,23 @@ ixl_assign_vsi_msix(struct ixl_pf *pf)
>   		}
>   		bus_describe_intr(dev, que->res, que->tag, "q%d", i);
>   		/* Bind the vector to a CPU */
> -		bus_bind_intr(dev, que->res, i);
> +#ifdef RSS
> +		cpu_id = rss_getcpu(i % rss_getnumbuckets());
> +#endif
> +		bus_bind_intr(dev, que->res, cpu_id);
>   		que->msix = vector;
>   		TASK_INIT(&que->tx_task, 0, ixl_deferred_mq_start, que);
>   		TASK_INIT(&que->task, 0, ixl_handle_que, que);
>   		que->tq = taskqueue_create_fast("ixl_que", M_NOWAIT,
>   		    taskqueue_thread_enqueue, &que->tq);
> -		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
> -		    device_get_nameunit(pf->dev));
> +#ifdef RSS
> +		taskqueue_start_threads_pinned(&que->tq, 1, PI_NET,
> +		    cpu_id, "%s (bucket %d)",
> +		    device_get_nameunit(dev), cpu_id);
> +#else
> +		taskqueue_start_threads(&que->tq, 1, PI_NET,
> +		    "%s que", device_get_nameunit(dev));
> +#endif
>   	}
>   
>   	return (0);
> @@ -1995,6 +2027,12 @@ ixl_init_msix(struct ixl_pf *pf)
>   	if ((ixl_max_queues != 0) && (ixl_max_queues <= queues))
>   		queues = ixl_max_queues;
>   
> +#ifdef  RSS
> +	/* If we're doing RSS, clamp at the number of RSS buckets */
> +	if (queues > rss_getnumbuckets())
> +		queues = rss_getnumbuckets();
> +#endif
> +
>   	/*
>   	** Want one vector (RX/TX pair) per queue
>   	** plus an additional for the admin queue.
> @@ -2015,6 +2053,25 @@ ixl_init_msix(struct ixl_pf *pf)
>   		    "Using MSIX interrupts with %d vectors\n", vectors);
>   		pf->msix = vectors;
>   		pf->vsi.num_queues = queues;
> +#ifdef RSS
> +		/*
> +		 * If we're doing RSS, the number of queues needs to
> +		 * match the number of RSS buckets that are configured.
> +		 *
> +		 * + If there's more queues than RSS buckets, we'll end
> +		 *   up with queues that get no traffic.
> +		 *
> +		 * + If there's more RSS buckets than queues, we'll end
> +		 *   up having multiple RSS buckets map to the same queue,
> +		 *   so there'll be some contention.
> +		 */
> +		if (queues != rss_getnumbuckets()) {
> +			device_printf(dev,
> +			    "%s: queues (%d) != RSS buckets (%d)"
> +			    "; performance will be impacted.\n",
> +			    __func__, queues, rss_getnumbuckets());
> +		}
> +#endif
>   		return (vectors);
>   	}
>   msi:
> @@ -2383,7 +2440,8 @@ ixl_setup_interface(device_t dev, struct
>   	if (aq_error == I40E_ERR_UNKNOWN_PHY) {
>   		/* Need delay to detect fiber correctly */
>   		i40e_msec_delay(200);
> -		aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities_resp, NULL);
> +		aq_error = i40e_aq_get_phy_capabilities(hw, FALSE,
> +		    TRUE, &abilities_resp, NULL);
>   		if (aq_error == I40E_ERR_UNKNOWN_PHY)
>   			device_printf(dev, "Unknown PHY type detected!\n");
>   		else
> @@ -3043,7 +3101,6 @@ ixl_add_sysctls_eth_stats(struct sysctl_
>   			"Multicast Packets Transmitted"},
>   		{&eth_stats->tx_broadcast, "bcast_pkts_txd",
>   			"Broadcast Packets Transmitted"},
> -		{&eth_stats->tx_discards, "tx_discards", "Discarded TX packets"},
>   		// end
>   		{0,0,0}
>   	};
> @@ -3126,19 +3183,45 @@ static void ixl_config_rss(struct ixl_vs
>   	struct ixl_pf	*pf = (struct ixl_pf *)vsi->back;
>   	struct i40e_hw	*hw = vsi->hw;
>   	u32		lut = 0;
> -	u64		set_hena, hena;
> -	int		i, j;
> +	u64		set_hena = 0, hena;
> +	int		i, j, que_id;
> +#ifdef RSS
> +	u32		rss_hash_config;
> +	u32		rss_seed[IXL_KEYSZ];
> +#else
> +	u32             rss_seed[IXL_KEYSZ] = {0x41b01687,
> +			    0x183cfd8c, 0xce880440, 0x580cbc3c,
> +			    0x35897377, 0x328b25e1, 0x4fa98922,
> +			    0xb7d90c14, 0xd5bad70d, 0xcd15a2c1};
> +#endif
>   
> -	static const u32 seed[I40E_PFQF_HKEY_MAX_INDEX + 1] = {0x41b01687,
> -	    0x183cfd8c, 0xce880440, 0x580cbc3c, 0x35897377,
> -	    0x328b25e1, 0x4fa98922, 0xb7d90c14, 0xd5bad70d,
> -	    0xcd15a2c1, 0xe8580225, 0x4a1e9d11, 0xfe5731be};
> +#ifdef RSS
> +        /* Fetch the configured RSS key */
> +        rss_getkey((uint8_t *) &rss_seed);
> +#endif
>   
>   	/* Fill out hash function seed */
> -	for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
> -                wr32(hw, I40E_PFQF_HKEY(i), seed[i]);
> +	for (i = 0; i < IXL_KEYSZ; i++)
> +                wr32(hw, I40E_PFQF_HKEY(i), rss_seed[i]);
>   
>   	/* Enable PCTYPES for RSS: */
> +#ifdef RSS
> +	rss_hash_config = rss_gethashconfig();
> +	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
> +                set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER);
> +	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
> +                set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP);
> +	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
> +                set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP);
> +	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
> +                set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER);
> +        if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
> +		set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6);
> +	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
> +                set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP);
> +        if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
> +                set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP);
> +#else
>   	set_hena =
>   		((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
>   		((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) |
> @@ -3151,7 +3234,7 @@ static void ixl_config_rss(struct ixl_vs
>   		((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) |
>   		((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6) |
>   		((u64)1 << I40E_FILTER_PCTYPE_L2_PAYLOAD);
> -
> +#endif
>   	hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) |
>   	    ((u64)rd32(hw, I40E_PFQF_HENA(1)) << 32);
>   	hena |= set_hena;
> @@ -3162,8 +3245,19 @@ static void ixl_config_rss(struct ixl_vs
>   	for (i = j = 0; i < pf->hw.func_caps.rss_table_size; i++, j++) {
>   		if (j == vsi->num_queues)
>   			j = 0;
> +#ifdef RSS
> +		/*
> +		 * Fetch the RSS bucket id for the given indirection entry.
> +		 * Cap it at the number of configured buckets (which is
> +		 * num_queues.)
> +		 */
> +		que_id = rss_get_indirection_to_bucket(i);
> +		que_id = que_id % vsi->num_queues;
> +#else
> +		que_id = j;
> +#endif
>   		/* lut = 4-byte sliding window of 4 lut entries */
> -		lut = (lut << 8) | (j &
> +		lut = (lut << 8) | (que_id &
>   		    ((0x1 << pf->hw.func_caps.rss_table_entry_width) - 1));
>   		/* On i = 3, we have 4 entries in lut; write to the register */
>   		if ((i & 3) == 3)
> @@ -3401,7 +3495,7 @@ ixl_add_hw_filters(struct ixl_vsi *vsi,
>   	a = malloc(sizeof(struct i40e_aqc_add_macvlan_element_data) * cnt,
>   	    M_DEVBUF, M_NOWAIT | M_ZERO);
>   	if (a == NULL) {
> -		device_printf(dev, "add hw filter failed to get memory\n");
> +		device_printf(dev, "add_hw_filters failed to get memory\n");
>   		return;
>   	}
>   
> @@ -3426,8 +3520,8 @@ ixl_add_hw_filters(struct ixl_vsi *vsi,
>   	if (j > 0) {
>   		err = i40e_aq_add_macvlan(hw, vsi->seid, a, j, NULL);
>   		if (err)
> -			device_printf(dev, "aq_add_macvlan failure %d\n",
> -			    hw->aq.asq_last_status);
> +			device_printf(dev, "aq_add_macvlan err %d, aq_error %d\n",
> +			    err, hw->aq.asq_last_status);
>   		else
>   			vsi->hw_filters_add += j;
>   	}
> @@ -3476,6 +3570,7 @@ ixl_del_hw_filters(struct ixl_vsi *vsi,
>   		err = i40e_aq_remove_macvlan(hw, vsi->seid, d, j, NULL);
>   		/* NOTE: returns ENOENT every time but seems to work fine,
>   		   so we'll ignore that specific error. */
> +		// TODO: Does this still occur on current firmwares?
>   		if (err && hw->aq.asq_last_status != I40E_AQ_RC_ENOENT) {
>   			int sc = 0;
>   			for (int i = 0; i < j; i++)
> @@ -3828,29 +3923,6 @@ ixl_update_stats_counters(struct ixl_pf
>   			   pf->stat_offsets_loaded,
>   			   &osd->link_xoff_tx, &nsd->link_xoff_tx);
>   
> -	/* Priority flow control stats */
> -#if 0
> -	for (int i = 0; i < 8; i++) {
> -		ixl_stat_update32(hw, I40E_GLPRT_PXONRXC(hw->port, i),
> -				   pf->stat_offsets_loaded,
> -				   &osd->priority_xon_rx[i],
> -				   &nsd->priority_xon_rx[i]);
> -		ixl_stat_update32(hw, I40E_GLPRT_PXONTXC(hw->port, i),
> -				   pf->stat_offsets_loaded,
> -				   &osd->priority_xon_tx[i],
> -				   &nsd->priority_xon_tx[i]);
> -		ixl_stat_update32(hw, I40E_GLPRT_PXOFFTXC(hw->port, i),
> -				   pf->stat_offsets_loaded,
> -				   &osd->priority_xoff_tx[i],
> -				   &nsd->priority_xoff_tx[i]);
> -		ixl_stat_update32(hw,
> -				   I40E_GLPRT_RXON2OFFCNT(hw->port, i),
> -				   pf->stat_offsets_loaded,
> -				   &osd->priority_xon_2_xoff[i],
> -				   &nsd->priority_xon_2_xoff[i]);
> -	}
> -#endif
> -
>   	/* Packet size stats rx */
>   	ixl_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port),
>   			   I40E_GLPRT_PRC64L(hw->port),
> @@ -4377,6 +4449,15 @@ ixl_set_advertised_speeds(struct ixl_pf
>   		return (EAGAIN);
>   	}
>   
> +	/*
> +	** This seems a bit heavy handed, but we
> +	** need to get a reinit on some devices
> +	*/
> +	IXL_PF_LOCK(pf);
> +	ixl_stop(pf);
> +	ixl_init_locked(pf);
> +	IXL_PF_UNLOCK(pf);
> +
>   	return (0);
>   }
>   
> @@ -4521,7 +4602,7 @@ ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS)
>   }
>   
>   
> -#ifdef IXL_DEBUG
> +#ifdef IXL_DEBUG_SYSCTL
>   static int
>   ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS)
>   {
> @@ -4630,6 +4711,16 @@ ixl_sysctl_sw_filter_list(SYSCTL_HANDLER
>   
>   #define IXL_SW_RES_SIZE 0x14
>   static int
> +ixl_res_alloc_cmp(const void *a, const void *b)
> +{
> +	const struct i40e_aqc_switch_resource_alloc_element_resp *one, *two;
> +	one = (struct i40e_aqc_switch_resource_alloc_element_resp *)a;
> +	two = (struct i40e_aqc_switch_resource_alloc_element_resp *)b;
> +
> +	return ((int)one->resource_type - (int)two->resource_type);
> +}
> +
> +static int
>   ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS)
>   {
>   	struct ixl_pf *pf = (struct ixl_pf *)arg1;
> @@ -4647,6 +4738,7 @@ ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_A
>   		return (ENOMEM);
>   	}
>   
> +	bzero(resp, sizeof(resp));
>   	error = i40e_aq_get_switch_resource_alloc(hw, &num_entries,
>   				resp,
>   				IXL_SW_RES_SIZE,
> @@ -4657,9 +4749,14 @@ ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_A
>   		sbuf_delete(buf);
>   		return error;
>   	}
> -	device_printf(dev, "Num_entries: %d\n", num_entries);
> +
> +	/* Sort entries by type for display */
> +	qsort(resp, num_entries,
> +	    sizeof(struct i40e_aqc_switch_resource_alloc_element_resp),
> +	    &ixl_res_alloc_cmp);
>   
>   	sbuf_cat(buf, "\n");
> +	sbuf_printf(buf, "# of entries: %d\n", num_entries);
>   	sbuf_printf(buf,
>   	    "Type | Guaranteed | Total | Used   | Un-allocated\n"
>   	    "     | (this)     | (all) | (this) | (all)       \n");
> @@ -4847,5 +4944,5 @@ ixl_sysctl_dump_txd(SYSCTL_HANDLER_ARGS)
>   	sbuf_delete(buf);
>   	return error;
>   }
> -#endif
> +#endif /* IXL_DEBUG_SYSCTL */
>   
>
> Modified: stable/10/sys/dev/ixl/if_ixlv.c
> ==============================================================================
> --- stable/10/sys/dev/ixl/if_ixlv.c	Fri Dec 11 11:08:00 2015	(r292093)
> +++ stable/10/sys/dev/ixl/if_ixlv.c	Fri Dec 11 12:16:05 2015	(r292094)
> @@ -37,10 +37,14 @@
>   #include "ixl.h"
>   #include "ixlv.h"
>   
> +#ifdef RSS
> +#include <net/rss_config.h>
> +#endif
> +
>   /*********************************************************************
>    *  Driver version
>    *********************************************************************/
> -char ixlv_driver_version[] = "1.1.18";
> +char ixlv_driver_version[] = "1.2.1";
>   
>   /*********************************************************************
>    *  PCI Device ID Table
> @@ -1161,7 +1165,11 @@ ixlv_init_msix(struct ixlv_sc *sc)
>   	/* Override with hardcoded value if sane */
>   	if ((ixlv_max_queues != 0) && (ixlv_max_queues <= queues))
>   		queues = ixlv_max_queues;
> -
> +#ifdef  RSS
> +	/* If we're doing RSS, clamp at the number of RSS buckets */
> +	if (queues > rss_getnumbuckets())
> +		queues = rss_getnumbuckets();
> +#endif
>   	/* Enforce the VF max value */
>   	if (queues > IXLV_MAX_QUEUES)
>   		queues = IXLV_MAX_QUEUES;
> @@ -1181,6 +1189,26 @@ ixlv_init_msix(struct ixlv_sc *sc)
>   		goto fail;
>   	}
>   
> +#ifdef RSS
> +	/*
> +	* If we're doing RSS, the number of queues needs to
> +	* match the number of RSS buckets that are configured.
> +	*
> +	* + If there's more queues than RSS buckets, we'll end
> +	*   up with queues that get no traffic.
> +	*
> +	* + If there's more RSS buckets than queues, we'll end
> +	*   up having multiple RSS buckets map to the same queue,
> +	*   so there'll be some contention.
> +	*/
> +	if (queues != rss_getnumbuckets()) {
> +		device_printf(dev,
> +		    "%s: queues (%d) != RSS buckets (%d)"
> +		    "; performance will be impacted.\n",
> +		     __func__, queues, rss_getnumbuckets());
> +	}
> +#endif
> +
>   	if (pci_alloc_msix(dev, &vectors) == 0) {
>   		device_printf(sc->dev,
>   		    "Using MSIX interrupts with %d vectors\n", vectors);
> @@ -1352,6 +1380,7 @@ ixlv_assign_msix(struct ixlv_sc *sc)
>   	int 		error, rid, vector = 1;
>   
>   	for (int i = 0; i < vsi->num_queues; i++, vector++, que++) {
> +		int cpu_id = i;
>   		rid = vector + 1;
>   		txr = &que->txr;
>   		que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
> @@ -1372,15 +1401,25 @@ ixlv_assign_msix(struct ixlv_sc *sc)
>   		}
>   		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
>   		/* Bind the vector to a CPU */
> -		bus_bind_intr(dev, que->res, i);
> +#ifdef RSS
> +		cpu_id = rss_getcpu(i % rss_getnumbuckets());
> +#endif
> +		bus_bind_intr(dev, que->res, cpu_id);
>   		que->msix = vector;
>           	vsi->que_mask |= (u64)(1 << que->msix);
>   		TASK_INIT(&que->tx_task, 0, ixl_deferred_mq_start, que);
>   		TASK_INIT(&que->task, 0, ixlv_handle_que, que);
>   		que->tq = taskqueue_create_fast("ixlv_que", M_NOWAIT,
>   		    taskqueue_thread_enqueue, &que->tq);
> -		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
> -		    device_get_nameunit(sc->dev));
> +#ifdef RSS
> +		taskqueue_start_threads_pinned(&que->tq, 1, PI_NET,
> +		    cpu_id, "%s (bucket %d)",
> +		    device_get_nameunit(dev), cpu_id);
> +#else
> +                taskqueue_start_threads(&que->tq, 1, PI_NET,
> +                    "%s que", device_get_nameunit(dev));
> +#endif
> +
>   	}
>   
>   	return (0);
> @@ -2521,16 +2560,18 @@ ixlv_config_rss(struct ixlv_sc *sc)
>   	struct i40e_hw	*hw = &sc->hw;
>   	struct ixl_vsi	*vsi = &sc->vsi;
>   	u32		lut = 0;
> -	u64		set_hena, hena;
> -	int		i, j;
> -
> -	/* set up random bits */
> -	static const u32 seed[I40E_VFQF_HKEY_MAX_INDEX + 1] = {
> -	    0x794221b4, 0xbca0c5ab, 0x6cd5ebd9, 0x1ada6127,
> -	    0x983b3aa1, 0x1c4e71eb, 0x7f6328b2, 0xfcdc0da0,
> -	    0xc135cafa, 0x7a6f7e2d, 0xe7102d28, 0x163cd12e,
> -	    0x4954b126 };
> -
> +	u64		set_hena = 0, hena;
> +	int		i, j, que_id;
> +#ifdef RSS
> +	u32		rss_hash_config;
> +	u32		rss_seed[IXL_KEYSZ];
> +#else
> +	u32		rss_seed[IXL_KEYSZ] = {0x41b01687,
> +			    0x183cfd8c, 0xce880440, 0x580cbc3c,
> +			    0x35897377, 0x328b25e1, 0x4fa98922,
> +			    0xb7d90c14, 0xd5bad70d, 0xcd15a2c1};
> +#endif
> +
>   	/* Don't set up RSS if using a single queue */
>   	if (vsi->num_queues == 1) {
>   		wr32(hw, I40E_VFQF_HENA(0), 0);
> @@ -2539,11 +2580,32 @@ ixlv_config_rss(struct ixlv_sc *sc)
>   		return;
>   	}
>   
> +#ifdef RSS
> +	/* Fetch the configured RSS key */
> +	rss_getkey((uint8_t *) &rss_seed);
> +#endif
>   	/* Fill out hash function seed */
> -	for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++)
> -                wr32(hw, I40E_VFQF_HKEY(i), seed[i]);
> +	for (i = 0; i <= IXL_KEYSZ; i++)
> +                wr32(hw, I40E_VFQF_HKEY(i), rss_seed[i]);
>   
>   	/* Enable PCTYPES for RSS: */
> +#ifdef RSS
> +	rss_hash_config = rss_gethashconfig();
> +	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
> +                set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER);
> +	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
> +                set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP);
> +	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
> +                set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP);
> +	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
> +                set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER);
> +        if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
> +		set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6);
> +	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
> +                set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP);
> +        if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
> +                set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP);
> +#else
>   	set_hena =
>   		((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
>   		((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) |
> @@ -2556,7 +2618,7 @@ ixlv_config_rss(struct ixlv_sc *sc)
>   		((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) |
>   		((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6) |
>   		((u64)1 << I40E_FILTER_PCTYPE_L2_PAYLOAD);
> -
> +#endif
>   	hena = (u64)rd32(hw, I40E_VFQF_HENA(0)) |
>   	    ((u64)rd32(hw, I40E_VFQF_HENA(1)) << 32);
>   	hena |= set_hena;
> @@ -2564,16 +2626,26 @@ ixlv_config_rss(struct ixlv_sc *sc)
>   	wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32));
>   
>   	/* Populate the LUT with max no. of queues in round robin fashion */
> -	for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; j++) {
> +	for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++, j++) {
>                   if (j == vsi->num_queues)
>                           j = 0;
> +#ifdef RSS
> +		/*
> +		 * Fetch the RSS bucket id for the given indirection entry.
> +		 * Cap it at the number of configured buckets (which is
> +		 * num_queues.)
> +		 */
> +		que_id = rss_get_indirection_to_bucket(i);
> +		que_id = que_id % vsi->num_queues;
> +#else
> +		que_id = j;
> +#endif
>                   /* lut = 4-byte sliding window of 4 lut entries */
> -                lut = (lut << 8) | (j & 0xF);
> +                lut = (lut << 8) | (que_id & 0xF);
>                   /* On i = 3, we have 4 entries in lut; write to the register */
> -                if ((j & 3) == 3) {
> +                if ((i & 3) == 3) {
>                           wr32(hw, I40E_VFQF_HLUT(i), lut);
>   			DDPRINTF(sc->dev, "HLUT(%2d): %#010x", i, lut);
> -			i++;
>   		}
>           }
>   	ixl_flush(hw);
>
> Modified: stable/10/sys/dev/ixl/ixl.h
> ==============================================================================
> --- stable/10/sys/dev/ixl/ixl.h	Fri Dec 11 11:08:00 2015	(r292093)
> +++ stable/10/sys/dev/ixl/ixl.h	Fri Dec 11 12:16:05 2015	(r292094)
> @@ -93,7 +93,7 @@
>   #include "i40e_type.h"
>   #include "i40e_prototype.h"
>   
> -#ifdef IXL_DEBUG
> +#if defined(IXL_DEBUG) || defined(IXL_DEBUG_SYSCTL)
>   #include <sys/sbuf.h>
>   
>   #define MAC_FORMAT "%02x:%02x:%02x:%02x:%02x:%02x"
> @@ -101,7 +101,13 @@
>   	(mac_addr)[0], (mac_addr)[1], (mac_addr)[2], (mac_addr)[3], \
>   	(mac_addr)[4], (mac_addr)[5]
>   #define ON_OFF_STR(is_set) ((is_set) ? "On" : "Off")
> +#endif /* IXL_DEBUG || IXL_DEBUG_SYSCTL */
>   
> +#ifdef IXL_DEBUG
> +/* Enable debug sysctls */
> +#ifndef IXL_DEBUG_SYSCTL
> +#define IXL_DEBUG_SYSCTL 1
> +#endif
>   
>   #define _DBG_PRINTF(S, ...)		printf("%s: " S "\n", __func__, ##__VA_ARGS__)
>   #define _DEV_DBG_PRINTF(dev, S, ...)	device_printf(dev, "%s: " S "\n", __func__, ##__VA_ARGS__)
> @@ -128,7 +134,7 @@
>   
>   #define HW_DEBUGOUT(...)		if (DEBUG_HW) _DBG_PRINTF(__VA_ARGS__)
>   
> -#else
> +#else /* no IXL_DEBUG */
>   #define DEBUG_INIT  0
>   #define DEBUG_IOCTL 0
>   #define DEBUG_HW    0
> @@ -144,7 +150,7 @@
>   #define IOCTL_DBG_IF2(...)
>   #define IOCTL_DBG_IF(...)
>   #define HW_DEBUGOUT(...)
> -#endif
> +#endif /* IXL_DEBUG */
>   
>   /* Tunables */
>   
> @@ -214,6 +220,7 @@
>   #define IXL_MAX_TSO_SEGS	66
>   #define IXL_SPARSE_CHAIN	6
>   #define IXL_QUEUE_HUNG		0x80000000
> +#define IXL_KEYSZ		10
>   
>   /* ERJ: hardware can support ~1.5k filters between all functions */
>   #define IXL_MAX_FILTERS	256
>
> Modified: stable/10/sys/dev/ixl/ixl_txrx.c
> ==============================================================================
> --- stable/10/sys/dev/ixl/ixl_txrx.c	Fri Dec 11 11:08:00 2015	(r292093)
> +++ stable/10/sys/dev/ixl/ixl_txrx.c	Fri Dec 11 12:16:05 2015	(r292094)
> @@ -42,6 +42,10 @@
>   #include "opt_inet6.h"
>   #include "ixl.h"
>   
> +#ifdef RSS
> +#include <net/rss_config.h>
> +#endif
> +
>   /* Local Prototypes */
>   static void	ixl_rx_checksum(struct mbuf *, u32, u32, u8);
>   static void	ixl_refresh_mbufs(struct ixl_queue *, int);
> @@ -65,14 +69,33 @@ ixl_mq_start(struct ifnet *ifp, struct m
>   	struct ixl_queue	*que;
>   	struct tx_ring		*txr;
>   	int 			err, i;
> +#ifdef RSS
> +	u32			bucket_id;
> +#endif
>   
> -	/* check if flowid is set */
> -	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
> -		i = m->m_pkthdr.flowid % vsi->num_queues;
> -	else
> +	/*
> +	** Which queue to use:
> +	**
> +	** When doing RSS, map it to the same outbound
> +	** queue as the incoming flow would be mapped to.
> +	** If everything is setup correctly, it should be
> +	** the same bucket that the current CPU we're on is.
> +	*/
> +	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
> +#ifdef  RSS
> +		if (rss_hash2bucket(m->m_pkthdr.flowid,
> +		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
> +			i = bucket_id % vsi->num_queues;
> +                } else
> +#endif
> +                        i = m->m_pkthdr.flowid % vsi->num_queues;
> +        } else
>   		i = curcpu % vsi->num_queues;
> -
> -	/* Check for a hung queue and pick alternative */
> +	/*
> +	** This may not be perfect, but until something
> +	** better comes along it will keep from scheduling
> +	** on stalled queues.
> +	*/
>   	if (((1 << i) & vsi->active_queues) == 0)
>   		i = ffsl(vsi->active_queues);
>   
> @@ -1089,8 +1112,8 @@ int
>   ixl_init_rx_ring(struct ixl_queue *que)
>   {
>   	struct	rx_ring 	*rxr = &que->rxr;
> -#if defined(INET6) || defined(INET)
>   	struct ixl_vsi		*vsi = que->vsi;
> +#if defined(INET6) || defined(INET)
>   	struct ifnet		*ifp = vsi->ifp;
>   	struct lro_ctrl		*lro = &rxr->lro;
>   #endif
> @@ -1345,6 +1368,63 @@ ixl_rx_discard(struct rx_ring *rxr, int
>   	return;
>   }
>   
> +#ifdef RSS
> +/*
> +** ixl_ptype_to_hash: parse the packet type
> +** to determine the appropriate hash.
> +*/
> +static inline int
> +ixl_ptype_to_hash(u8 ptype)
> +{
> +        struct i40e_rx_ptype_decoded	decoded;
> +	u8				ex = 0;
> +
> +	decoded = decode_rx_desc_ptype(ptype);
> +	ex = decoded.outer_frag;
> +
> +	if (!decoded.known)
> +		return M_HASHTYPE_OPAQUE;
> +
> +	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2)
> +		return M_HASHTYPE_OPAQUE;
> +
> +	/* Note: anything that gets to this point is IP */
> +        if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
> +		switch (decoded.inner_prot) {
> +			case I40E_RX_PTYPE_INNER_PROT_TCP:
> +				if (ex)
> +					return M_HASHTYPE_RSS_TCP_IPV6_EX;
> +				else
> +					return M_HASHTYPE_RSS_TCP_IPV6;
> +			case I40E_RX_PTYPE_INNER_PROT_UDP:
> +				if (ex)
> +					return M_HASHTYPE_RSS_UDP_IPV6_EX;
> +				else
> +					return M_HASHTYPE_RSS_UDP_IPV6;
> +			default:
> +				if (ex)
> +					return M_HASHTYPE_RSS_IPV6_EX;
> +				else
> +					return M_HASHTYPE_RSS_IPV6;
> +		}
> +	}
> +        if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
> +		switch (decoded.inner_prot) {
> +			case I40E_RX_PTYPE_INNER_PROT_TCP:
> +					return M_HASHTYPE_RSS_TCP_IPV4;
> +			case I40E_RX_PTYPE_INNER_PROT_UDP:
> +				if (ex)
> +					return M_HASHTYPE_RSS_UDP_IPV4_EX;
> +				else
> +					return M_HASHTYPE_RSS_UDP_IPV4;
> +			default:
> +					return M_HASHTYPE_RSS_IPV4;
> +		}
> +	}
> +	/* We should never get here!! */
> +	return M_HASHTYPE_OPAQUE;
> +}
> +#endif /* RSS */
>   
>   /*********************************************************************
>    *
> @@ -1542,8 +1622,14 @@ ixl_rxeof(struct ixl_queue *que, int cou
>   			rxr->bytes += sendmp->m_pkthdr.len;
>   			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
>   				ixl_rx_checksum(sendmp, status, error, ptype);
> +#ifdef RSS
> +			sendmp->m_pkthdr.flowid =
> +			    le32toh(cur->wb.qword0.hi_dword.rss);
> +			M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
> +#else
>   			sendmp->m_pkthdr.flowid = que->msix;
>   			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
> +#endif
>   		}
>   next_desc:
>   		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
>




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?566AC00D.4090200>