Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 7 Nov 2017 23:52:14 +0000 (UTC)
From:      Navdeep Parhar <np@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r325522 - in projects/bsd_rdma_4_9: contrib/ofed/libcxgb4 sys/compat/linuxkpi/common/include/linux sys/dev/cxgbe sys/dev/cxgbe/common sys/dev/cxgbe/iw_cxgbe sys/modules/cxgbe/iw_cxgbe
Message-ID:  <201711072352.vA7NqEHU046048@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: np
Date: Tue Nov  7 23:52:14 2017
New Revision: 325522
URL: https://svnweb.freebsd.org/changeset/base/325522

Log:
  Update the iw_cxgbe bits in the projects branch.
  
  Submitted by:	Krishnamraju Eraparaju @ Chelsio
  Sponsored by:	Chelsio Communications

Modified:
  projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/cq.c
  projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/dev.c
  projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/qp.c
  projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/t4.h
  projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/verbs.c
  projects/bsd_rdma_4_9/sys/compat/linuxkpi/common/include/linux/dma-mapping.h
  projects/bsd_rdma_4_9/sys/dev/cxgbe/adapter.h
  projects/bsd_rdma_4_9/sys/dev/cxgbe/common/common.h
  projects/bsd_rdma_4_9/sys/dev/cxgbe/common/t4_hw.c
  projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cm.c
  projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cq.c
  projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/device.c
  projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
  projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/mem.c
  projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/provider.c
  projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/qp.c
  projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/t4.h
  projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/user.h
  projects/bsd_rdma_4_9/sys/dev/cxgbe/offload.h
  projects/bsd_rdma_4_9/sys/dev/cxgbe/t4_main.c
  projects/bsd_rdma_4_9/sys/modules/cxgbe/iw_cxgbe/Makefile

Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/cq.c
==============================================================================
--- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/cq.c	Tue Nov  7 19:12:20 2017	(r325521)
+++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/cq.c	Tue Nov  7 23:52:14 2017	(r325522)
@@ -437,7 +437,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq,
 		if (!*cqe_flushed && CQE_STATUS(hw_cqe))
 			dump_cqe(hw_cqe);
 
-		BUG_ON((*cqe_flushed == 0) && !SW_CQE(hw_cqe));
+		BUG_ON((cqe_flushed == 0) && !SW_CQE(hw_cqe));
 		goto proc_cqe;
 	}
 

Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/dev.c
==============================================================================
--- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/dev.c	Tue Nov  7 19:12:20 2017	(r325521)
+++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/dev.c	Tue Nov  7 23:52:14 2017	(r325522)
@@ -39,6 +39,7 @@
 #include <pthread.h>
 #include <string.h>
 #include <signal.h>
+#include <stdbool.h>
 
 #include "libcxgb4.h"
 #include "cxgb4-abi.h"
@@ -194,6 +195,17 @@ static struct ibv_context *c4iw_alloc_context(struct i
 		rhp->cqid2ptr = calloc(rhp->max_cq, sizeof(void *));
 		if (!rhp->cqid2ptr)
 			goto err_unmap;
+
+		/* Disable userspace WC if architecture/adapter does not
+		 * support WC.
+		 * Note: To forcefully disable WC in kernel driver use the
+		 * loader tunable "hw.cxl.write_combine=0"
+		 */
+		if (t5_en_wc && !context->status_page->wc_supported) {
+			fprintf(stderr, "iw_cxgb4 driver doesn't support Write "
+				"Combine, so regular DB writes will be used\n");
+			t5_en_wc = 0;
+		}
 	}
 
 	return &context->ibv_ctx;
@@ -400,11 +412,44 @@ int c4iw_abi_version = 1;
 static struct verbs_device *cxgb4_driver_init(const char *uverbs_sys_path,
 					      int abi_version)
 {
-	char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[32], *cp;
+	char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[128], *cp;
+	char dev_str[IBV_SYSFS_PATH_MAX];
 	struct c4iw_dev *dev;
 	unsigned vendor, device, fw_maj, fw_min;
 	int i;
+	char devnum;
+	char ib_param[16];
 
+#ifndef __linux__
+	if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev",
+				ibdev, sizeof ibdev) < 0)
+		return NULL;
+
+	devnum = atoi(&ibdev[5]);
+
+	if (ibdev[0] == 't' && ibdev[1] >= '4' && ibdev[1] <= '6' &&
+	    strstr(&ibdev[2], "nex") && devnum >= 0) {
+		snprintf(dev_str, sizeof(dev_str), "/dev/t%cnex/%d", ibdev[1],
+		    devnum);
+	} else
+		return NULL;
+
+	if (ibv_read_sysfs_file(dev_str, "\%pnpinfo", value, sizeof value) < 0)
+		return NULL;
+	else {
+		if (strstr(value, "vendor=")) {
+			strncpy(ib_param, strstr(value, "vendor=") +
+					strlen("vendor="), 6);
+			sscanf(ib_param, "%i", &vendor);
+		}
+
+		if (strstr(value, "device=")) {
+			strncpy(ib_param, strstr(value, "device=") +
+					strlen("device="), 6);
+			sscanf(ib_param, "%i", &device);
+		}
+	}
+#else
 	if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
 				value, sizeof value) < 0)
 		return NULL;
@@ -414,6 +459,7 @@ static struct verbs_device *cxgb4_driver_init(const ch
 				value, sizeof value) < 0)
 		return NULL;
 	sscanf(value, "%i", &device);
+#endif
 
 	for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i)
 		if (vendor == hca_table[i].vendor &&
@@ -425,6 +471,11 @@ static struct verbs_device *cxgb4_driver_init(const ch
 found:
 	c4iw_abi_version = abi_version;	
 
+#ifndef __linux__
+	if (ibv_read_sysfs_file(dev_str, "firmware_version",
+				value, sizeof value) < 0)
+		return NULL;
+#else
 	/*
 	 * Verify that the firmware major number matches.  Major number
 	 * mismatches are fatal.  Minor number mismatches are tolerated.
@@ -438,6 +489,7 @@ found:
 		 ibv_get_sysfs_path(), ibdev);
 	if (ibv_read_sysfs_file(devstr, "fw_ver", value, sizeof value) < 0)
 		return NULL;
+#endif
 
 	cp = strtok(value+1, ".");
 	sscanf(cp, "%i", &fw_maj);

Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/qp.c
==============================================================================
--- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/qp.c	Tue Nov  7 19:12:20 2017	(r325521)
+++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/qp.c	Tue Nov  7 23:52:14 2017	(r325522)
@@ -44,10 +44,13 @@ struct c4iw_stats c4iw_stats;
 
 static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16)
 {
-	u64 *src, *dst;
+	void *src, *dst;
+	uintptr_t end;
+	int total, len;
 
-	src = (u64 *)wqe;
-	dst = (u64 *)((u8 *)wq->sq.queue + wq->sq.wq_pidx * T4_EQ_ENTRY_SIZE);
+	src = &wqe->flits[0];
+	dst = &wq->sq.queue->flits[wq->sq.wq_pidx *
+	    (T4_EQ_ENTRY_SIZE / sizeof(__be64))];
 	if (t4_sq_onchip(wq)) {
 		len16 = align(len16, 4);
 
@@ -57,17 +60,18 @@ static void copy_wr_to_sq(struct t4_wq *wq, union t4_w
 		 * happens */
 		mmio_wc_start();
 	}
-	while (len16) {
-		*dst++ = *src++;
-		if (dst == (u64 *)&wq->sq.queue[wq->sq.size])
-			dst = (u64 *)wq->sq.queue;
-		*dst++ = *src++;
-		if (dst == (u64 *)&wq->sq.queue[wq->sq.size])
-			dst = (u64 *)wq->sq.queue;
-		len16--;
 
-		/* NOTE len16 cannot be large enough to write to the
-		   same sq.queue memory twice in this loop */
+	/* NOTE len16 cannot be large enough to write to the
+	   same sq.queue memory twice in this loop */
+	total = len16 * 16;
+	end = (uintptr_t)&wq->sq.queue[wq->sq.size];
+	if (__predict_true((uintptr_t)dst + total <= end)) {
+		/* Won't wrap around. */
+		memcpy(dst, src, total);
+	} else {
+		len = end - (uintptr_t)dst;
+		memcpy(dst, src, len);
+		memcpy(wq->sq.queue, src + len, total - len);
 	}
 
 	if (t4_sq_onchip(wq))
@@ -76,18 +80,23 @@ static void copy_wr_to_sq(struct t4_wq *wq, union t4_w
 
 static void copy_wr_to_rq(struct t4_wq *wq, union t4_recv_wr *wqe, u8 len16)
 {
-	u64 *src, *dst;
+	void *src, *dst;
+	uintptr_t end;
+	int total, len;
 
-	src = (u64 *)wqe;
-	dst = (u64 *)((u8 *)wq->rq.queue + wq->rq.wq_pidx * T4_EQ_ENTRY_SIZE);
-	while (len16) {
-		*dst++ = *src++;
-		if (dst >= (u64 *)&wq->rq.queue[wq->rq.size])
-			dst = (u64 *)wq->rq.queue;
-		*dst++ = *src++;
-		if (dst >= (u64 *)&wq->rq.queue[wq->rq.size])
-			dst = (u64 *)wq->rq.queue;
-		len16--;
+	src = &wqe->flits[0];
+	dst = &wq->rq.queue->flits[wq->rq.wq_pidx *
+	    (T4_EQ_ENTRY_SIZE / sizeof(__be64))];
+
+	total = len16 * 16;
+	end = (uintptr_t)&wq->rq.queue[wq->rq.size];
+	if (__predict_true((uintptr_t)dst + total <= end)) {
+		/* Won't wrap around. */
+		memcpy(dst, src, total);
+	} else {
+		len = end - (uintptr_t)dst;
+		memcpy(dst, src, len);
+		memcpy(wq->rq.queue, src + len, total - len);
 	}
 }
 

Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/t4.h
==============================================================================
--- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/t4.h	Tue Nov  7 19:12:20 2017	(r325521)
+++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/t4.h	Tue Nov  7 23:52:14 2017	(r325522)
@@ -87,7 +87,7 @@
 #define T4_MAX_CQ_DEPTH (T4_MAX_IQ_SIZE - 1)
 #define T4_MAX_NUM_STAG (1<<15)
 #define T4_MAX_MR_SIZE (~0ULL - 1)
-#define T4_PAGESIZE_MASK 0xffff000  /* 4KB-128MB */
+#define T4_PAGESIZE_MASK 0xffffffff000  /* 4KB-8TB */
 #define T4_STAG_UNSET 0xffffffff
 #define T4_FW_MAJ 0
 
@@ -723,7 +723,7 @@ static inline void t4_reset_cq_in_error(struct t4_cq *
 struct t4_dev_status_page 
 {
 	u8 db_off;
-	u8 pad1;
+	u8 wc_supported;
 	u16 pad2;
 	u32 pad3;
 	u64 qp_start;

Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/verbs.c
==============================================================================
--- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/verbs.c	Tue Nov  7 19:12:20 2017	(r325521)
+++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/verbs.c	Tue Nov  7 23:52:14 2017	(r325522)
@@ -468,7 +468,7 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd,
 	}
 
 	qhp->wq.sq.queue = mmap(NULL, qhp->wq.sq.memsize,
-			    PROT_WRITE, MAP_SHARED,
+			    PROT_READ|PROT_WRITE, MAP_SHARED,
 			    pd->context->cmd_fd, resp.sq_key);
 	if (qhp->wq.sq.queue == MAP_FAILED)
 		goto err4;
@@ -490,7 +490,7 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd,
 		qhp->wq.rq.udb += 2;
 	}
 	qhp->wq.rq.queue = mmap(NULL, qhp->wq.rq.memsize,
-			    PROT_WRITE, MAP_SHARED,
+			    PROT_READ|PROT_WRITE, MAP_SHARED,
 			    pd->context->cmd_fd, resp.rq_key);
 	if (qhp->wq.rq.queue == MAP_FAILED)
 		goto err6;

Modified: projects/bsd_rdma_4_9/sys/compat/linuxkpi/common/include/linux/dma-mapping.h
==============================================================================
--- projects/bsd_rdma_4_9/sys/compat/linuxkpi/common/include/linux/dma-mapping.h	Tue Nov  7 19:12:20 2017	(r325521)
+++ projects/bsd_rdma_4_9/sys/compat/linuxkpi/common/include/linux/dma-mapping.h	Tue Nov  7 23:52:14 2017	(r325522)
@@ -127,7 +127,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dm
 	size_t align;
 	void *mem;
 
-	if (dev->dma_mask)
+	if (dev != NULL && dev->dma_mask)
 		high = *dev->dma_mask;
 	else if (flag & GFP_DMA32)
 		high = BUS_SPACE_MAXADDR_32BIT;

Modified: projects/bsd_rdma_4_9/sys/dev/cxgbe/adapter.h
==============================================================================
--- projects/bsd_rdma_4_9/sys/dev/cxgbe/adapter.h	Tue Nov  7 19:12:20 2017	(r325521)
+++ projects/bsd_rdma_4_9/sys/dev/cxgbe/adapter.h	Tue Nov  7 23:52:14 2017	(r325522)
@@ -801,6 +801,7 @@ struct adapter {
 
 	void *tom_softc;	/* (struct tom_data *) */
 	struct tom_tunables tt;
+	struct iw_tunables iwt;
 	void *iwarp_softc;	/* (struct c4iw_dev *) */
 	void *iscsi_ulp_softc;	/* (struct cxgbei_data *) */
 	void *ccr_softc;	/* (struct ccr_softc *) */

Modified: projects/bsd_rdma_4_9/sys/dev/cxgbe/common/common.h
==============================================================================
--- projects/bsd_rdma_4_9/sys/dev/cxgbe/common/common.h	Tue Nov  7 19:12:20 2017	(r325521)
+++ projects/bsd_rdma_4_9/sys/dev/cxgbe/common/common.h	Tue Nov  7 23:52:14 2017	(r325522)
@@ -68,6 +68,8 @@ enum {
 	FEC_RESERVED  = 1 << 2,
 };
 
+enum t4_bar2_qtype { T4_BAR2_QTYPE_EGRESS, T4_BAR2_QTYPE_INGRESS };
+
 struct port_stats {
 	u64 tx_octets;            /* total # of octets in good frames */
 	u64 tx_frames;            /* all good frames */
@@ -843,5 +845,8 @@ int t4vf_get_sge_params(struct adapter *adapter);
 int t4vf_get_rss_glb_config(struct adapter *adapter);
 int t4vf_get_vfres(struct adapter *adapter);
 int t4vf_prep_adapter(struct adapter *adapter);
+int t4_bar2_sge_qregs(struct adapter *adapter, unsigned int qid,
+		enum t4_bar2_qtype qtype, int user, u64 *pbar2_qoffset,
+		unsigned int *pbar2_qid);
 
 #endif /* __CHELSIO_COMMON_H */

Modified: projects/bsd_rdma_4_9/sys/dev/cxgbe/common/t4_hw.c
==============================================================================
--- projects/bsd_rdma_4_9/sys/dev/cxgbe/common/t4_hw.c	Tue Nov  7 19:12:20 2017	(r325521)
+++ projects/bsd_rdma_4_9/sys/dev/cxgbe/common/t4_hw.c	Tue Nov  7 23:52:14 2017	(r325522)
@@ -8081,6 +8081,98 @@ int t4_shutdown_adapter(struct adapter *adapter)
 }
 
 /**
+ *	t4_bar2_sge_qregs - return BAR2 SGE Queue register information
+ *	@adapter: the adapter
+ *	@qid: the Queue ID
+ *	@qtype: the Ingress or Egress type for @qid
+ *	@user: true if this request is for a user mode queue
+ *	@pbar2_qoffset: BAR2 Queue Offset
+ *	@pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues
+ *
+ *	Returns the BAR2 SGE Queue Registers information associated with the
+ *	indicated Absolute Queue ID.  These are passed back in return value
+ *	pointers.  @qtype should be T4_BAR2_QTYPE_EGRESS for Egress Queue
+ *	and T4_BAR2_QTYPE_INGRESS for Ingress Queues.
+ *
+ *	This may return an error which indicates that BAR2 SGE Queue
+ *	registers aren't available.  If an error is not returned, then the
+ *	following values are returned:
+ *
+ *	  *@pbar2_qoffset: the BAR2 Offset of the @qid Registers
+ *	  *@pbar2_qid: the BAR2 SGE Queue ID or 0 of @qid
+ *
+ *	If the returned BAR2 Queue ID is 0, then BAR2 SGE registers which
+ *	require the "Inferred Queue ID" ability may be used.  E.g. the
+ *	Write Combining Doorbell Buffer. If the BAR2 Queue ID is not 0,
+ *	then these "Inferred Queue ID" register may not be used.
+ */
+int t4_bar2_sge_qregs(struct adapter *adapter,
+		      unsigned int qid,
+		      enum t4_bar2_qtype qtype,
+		      int user,
+		      u64 *pbar2_qoffset,
+		      unsigned int *pbar2_qid)
+{
+	unsigned int page_shift, page_size, qpp_shift, qpp_mask;
+	u64 bar2_page_offset, bar2_qoffset;
+	unsigned int bar2_qid, bar2_qid_offset, bar2_qinferred;
+
+	/* T4 doesn't support BAR2 SGE Queue registers for kernel
+	 * mode queues.
+	 */
+	if (!user && is_t4(adapter))
+		return -EINVAL;
+
+	/* Get our SGE Page Size parameters.
+	 */
+	page_shift = adapter->params.sge.page_shift;
+	page_size = 1 << page_shift;
+
+	/* Get the right Queues per Page parameters for our Queue.
+	 */
+	qpp_shift = (qtype == T4_BAR2_QTYPE_EGRESS
+		     ? adapter->params.sge.eq_s_qpp
+		     : adapter->params.sge.iq_s_qpp);
+	qpp_mask = (1 << qpp_shift) - 1;
+
+	/* Calculate the basics of the BAR2 SGE Queue register area:
+	 *  o The BAR2 page the Queue registers will be in.
+	 *  o The BAR2 Queue ID.
+	 *  o The BAR2 Queue ID Offset into the BAR2 page.
+	 */
+	bar2_page_offset = ((u64)(qid >> qpp_shift) << page_shift);
+	bar2_qid = qid & qpp_mask;
+	bar2_qid_offset = bar2_qid * SGE_UDB_SIZE;
+
+	/* If the BAR2 Queue ID Offset is less than the Page Size, then the
+	 * hardware will infer the Absolute Queue ID simply from the writes to
+	 * the BAR2 Queue ID Offset within the BAR2 Page (and we need to use a
+	 * BAR2 Queue ID of 0 for those writes).  Otherwise, we'll simply
+	 * write to the first BAR2 SGE Queue Area within the BAR2 Page with
+	 * the BAR2 Queue ID and the hardware will infer the Absolute Queue ID
+	 * from the BAR2 Page and BAR2 Queue ID.
+	 *
+	 * One important censequence of this is that some BAR2 SGE registers
+	 * have a "Queue ID" field and we can write the BAR2 SGE Queue ID
+	 * there.  But other registers synthesize the SGE Queue ID purely
+	 * from the writes to the registers -- the Write Combined Doorbell
+	 * Buffer is a good example.  These BAR2 SGE Registers are only
+	 * available for those BAR2 SGE Register areas where the SGE Absolute
+	 * Queue ID can be inferred from simple writes.
+	 */
+	bar2_qoffset = bar2_page_offset;
+	bar2_qinferred = (bar2_qid_offset < page_size);
+	if (bar2_qinferred) {
+		bar2_qoffset += bar2_qid_offset;
+		bar2_qid = 0;
+	}
+
+	*pbar2_qoffset = bar2_qoffset;
+	*pbar2_qid = bar2_qid;
+	return 0;
+}
+
+/**
  *	t4_init_devlog_params - initialize adapter->params.devlog
  *	@adap: the adapter
  *	@fw_attach: whether we can talk to the firmware

Modified: projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cm.c
==============================================================================
--- projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cm.c	Tue Nov  7 19:12:20 2017	(r325521)
+++ projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cm.c	Tue Nov  7 23:52:14 2017	(r325522)
@@ -46,8 +46,11 @@ __FBSDID("$FreeBSD$");
 
 #include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
+#include <netinet6/in6_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/in_fib.h>
+#include <netinet6/in6_fib.h>
+#include <netinet6/scope6_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp.h>
@@ -78,6 +81,8 @@ static struct work_struct c4iw_task;
 static struct workqueue_struct *c4iw_taskq;
 static LIST_HEAD(err_cqe_list);
 static spinlock_t err_cqe_lock;
+static LIST_HEAD(listen_port_list);
+static DEFINE_MUTEX(listen_port_mutex);
 
 static void process_req(struct work_struct *ctx);
 static void start_ep_timer(struct c4iw_ep *ep);
@@ -85,12 +90,7 @@ static int stop_ep_timer(struct c4iw_ep *ep);
 static int set_tcpinfo(struct c4iw_ep *ep);
 static void process_timeout(struct c4iw_ep *ep);
 static void process_err_cqes(void);
-static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc);
-static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
-static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
 static void *alloc_ep(int size, gfp_t flags);
-static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
-		__be16 peer_port, u8 tos, struct nhop4_extended *pnh4);
 static void close_socket(struct socket *so);
 static int send_mpa_req(struct c4iw_ep *ep);
 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
@@ -120,6 +120,15 @@ static int process_terminate(struct c4iw_ep *ep);
 static int terminate(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m);
 static int add_ep_to_req_list(struct c4iw_ep *ep, int ep_events);
+static struct listen_port_info *
+add_ep_to_listenlist(struct c4iw_listen_ep *lep);
+static int rem_ep_from_listenlist(struct c4iw_listen_ep *lep);
+static struct c4iw_listen_ep *
+find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so);
+static int get_ifnet_from_raddr(struct sockaddr_storage *raddr,
+		struct ifnet **ifp);
+static void process_newconn(struct c4iw_listen_ep *master_lep,
+		struct socket *new_so);
 #define START_EP_TIMER(ep) \
     do { \
 	    CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \
@@ -134,6 +143,34 @@ static int add_ep_to_req_list(struct c4iw_ep *ep, int 
 	    stop_ep_timer(ep); \
     })
 
+#define GET_LOCAL_ADDR(pladdr, so) \
+	do { \
+		struct sockaddr_storage *__a = NULL; \
+		struct  inpcb *__inp = sotoinpcb(so); \
+		KASSERT(__inp != NULL, \
+		   ("GET_LOCAL_ADDR(%s):so:%p, inp = NULL", __func__, so)); \
+		if (__inp->inp_vflag & INP_IPV4) \
+			in_getsockaddr(so, (struct sockaddr **)&__a); \
+		else \
+			in6_getsockaddr(so, (struct sockaddr **)&__a); \
+		*(pladdr) = *__a; \
+		free(__a, M_SONAME); \
+	} while (0)
+
+#define GET_REMOTE_ADDR(praddr, so) \
+	do { \
+		struct sockaddr_storage *__a = NULL; \
+		struct  inpcb *__inp = sotoinpcb(so); \
+		KASSERT(__inp != NULL, \
+		   ("GET_REMOTE_ADDR(%s):so:%p, inp = NULL", __func__, so)); \
+		if (__inp->inp_vflag & INP_IPV4) \
+			in_getpeeraddr(so, (struct sockaddr **)&__a); \
+		else \
+			in6_getpeeraddr(so, (struct sockaddr **)&__a); \
+		*(praddr) = *__a; \
+		free(__a, M_SONAME); \
+	} while (0)
+
 #ifdef KTR
 static char *states[] = {
 	"idle",
@@ -152,7 +189,6 @@ static char *states[] = {
 };
 #endif
 
-
 static void deref_cm_id(struct c4iw_ep_common *epc)
 {
       epc->cm_id->rem_ref(epc->cm_id);
@@ -179,13 +215,184 @@ static void ref_qp(struct c4iw_ep *ep)
 	set_bit(QP_REFED, &ep->com.history);
 	c4iw_qp_add_ref(&ep->com.qp->ibqp);
 }
+/* allocated per TCP port while listening */
+struct listen_port_info {
+	uint16_t port_num; /* TCP port address */
+	struct list_head list; /* belongs to listen_port_list */
+	struct list_head lep_list; /* per port lep list */
+	uint32_t refcnt; /* number of lep's listening */
+};
 
+/*
+ * Following two lists are used to manage INADDR_ANY listeners:
+ * 1)listen_port_list
+ * 2)lep_list
+ *
+ * Below is the INADDR_ANY listener lists overview on a system with a two port
+ * adapter:
+ *   |------------------|
+ *   |listen_port_list  |
+ *   |------------------|
+ *            |
+ *            |              |-----------|       |-----------|  
+ *            |              | port_num:X|       | port_num:X|  
+ *            |--------------|-list------|-------|-list------|-------....
+ *                           | lep_list----|     | lep_list----|
+ *                           | refcnt    | |     | refcnt    | |
+ *                           |           | |     |           | |
+ *                           |           | |     |           | |
+ *                           |-----------| |     |-----------| |
+ *                                         |                   |
+ *                                         |                   |
+ *                                         |                   |
+ *                                         |                   |         lep1                  lep2         
+ *                                         |                   |    |----------------|    |----------------|
+ *                                         |                   |----| listen_ep_list |----| listen_ep_list |
+ *                                         |                        |----------------|    |----------------|
+ *                                         |
+ *                                         |
+ *                                         |        lep1                  lep2         
+ *                                         |   |----------------|    |----------------|
+ *                                         |---| listen_ep_list |----| listen_ep_list |
+ *                                             |----------------|    |----------------|
+ *
+ * Because of two port adapter, the number of lep's are two(lep1 & lep2) for
+ * each TCP port number.
+ *
+ * Here 'lep1' is always marked as Master lep, because solisten() is always
+ * called through first lep. 
+ *
+ */
+static struct listen_port_info *
+add_ep_to_listenlist(struct c4iw_listen_ep *lep)
+{
+	uint16_t port;
+	struct listen_port_info *port_info = NULL;
+	struct sockaddr_storage *laddr = &lep->com.local_addr;
+
+	port = (laddr->ss_family == AF_INET) ?
+		((struct sockaddr_in *)laddr)->sin_port :
+		((struct sockaddr_in6 *)laddr)->sin6_port;
+
+	mutex_lock(&listen_port_mutex);
+
+	list_for_each_entry(port_info, &listen_port_list, list)
+		if (port_info->port_num == port)
+			goto found_port;
+
+	port_info = malloc(sizeof(*port_info), M_CXGBE, M_WAITOK);
+	port_info->port_num = port;
+	port_info->refcnt    = 0;
+
+	list_add_tail(&port_info->list, &listen_port_list);
+	INIT_LIST_HEAD(&port_info->lep_list);
+
+found_port:
+	port_info->refcnt++;
+	list_add_tail(&lep->listen_ep_list, &port_info->lep_list);
+	mutex_unlock(&listen_port_mutex);
+	return port_info;
+}
+
+static int
+rem_ep_from_listenlist(struct c4iw_listen_ep *lep)
+{
+	uint16_t port;
+	struct listen_port_info *port_info = NULL;
+	struct sockaddr_storage *laddr = &lep->com.local_addr;
+	int refcnt = 0;
+
+	port = (laddr->ss_family == AF_INET) ?
+		((struct sockaddr_in *)laddr)->sin_port :
+		((struct sockaddr_in6 *)laddr)->sin6_port;
+
+	mutex_lock(&listen_port_mutex);
+
+	/* get the port_info structure based on the lep's port address */
+	list_for_each_entry(port_info, &listen_port_list, list) {
+		if (port_info->port_num == port) {
+			port_info->refcnt--;
+			refcnt = port_info->refcnt;
+			/* remove the current lep from the listen list */
+			list_del(&lep->listen_ep_list);
+			if (port_info->refcnt == 0) {
+				/* Remove this entry from the list as there
+				 * are no more listeners for this port_num.
+				 */
+				list_del(&port_info->list);
+				kfree(port_info);
+			}
+			break;
+		}
+	}
+	mutex_unlock(&listen_port_mutex);
+	return refcnt;
+}
+
+/*
+ * Find the lep that belongs to the ifnet on which the SYN frame was received.
+ */
+struct c4iw_listen_ep *
+find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so)
+{
+	struct adapter *adap = NULL;
+	struct c4iw_listen_ep *lep = NULL;
+	struct sockaddr_storage remote = { 0 };
+	struct ifnet *new_conn_ifp = NULL;
+	struct listen_port_info *port_info = NULL;
+	int err = 0, i = 0,
+	    found_portinfo = 0, found_lep = 0;
+	uint16_t port;
+
+	/* STEP 1: get 'ifnet' based on socket's remote address */
+	GET_REMOTE_ADDR(&remote, so);
+
+	err = get_ifnet_from_raddr(&remote, &new_conn_ifp);
+	if (err) {
+		CTR4(KTR_IW_CXGBE, "%s: Failed to get ifnet, sock %p, "
+				"master_lep %p err %d",
+				__func__, so, master_lep, err);
+		return (NULL);
+	}
+
+	/* STEP 2: Find 'port_info' with listener local port address. */
+	port = (master_lep->com.local_addr.ss_family == AF_INET) ?
+		((struct sockaddr_in *)&master_lep->com.local_addr)->sin_port :
+		((struct sockaddr_in6 *)&master_lep->com.local_addr)->sin6_port;
+
+
+	mutex_lock(&listen_port_mutex);
+	list_for_each_entry(port_info, &listen_port_list, list)
+		if (port_info->port_num == port) {
+			found_portinfo =1;
+			break;
+		}
+	if (!found_portinfo)
+		goto out;
+
+	/* STEP 3: Traverse through list of lep's that are bound to the current
+	 * TCP port address and find the lep that belongs to the ifnet on which
+	 * the SYN frame was received.
+	 */
+	list_for_each_entry(lep, &port_info->lep_list, listen_ep_list) {
+		adap = lep->com.dev->rdev.adap;
+		for_each_port(adap, i) {
+			if (new_conn_ifp == adap->port[i]->vi[0].ifp) {
+				found_lep =1;
+				goto out;
+			}
+		}
+	}
+out:
+	mutex_unlock(&listen_port_mutex);
+	return found_lep ? lep : (NULL);
+}
+
 static void process_timeout(struct c4iw_ep *ep)
 {
-	struct c4iw_qp_attributes attrs;
+	struct c4iw_qp_attributes attrs = {0};
 	int abort = 1;
 
-	mutex_lock(&ep->com.mutex);
 	CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__,
 			ep, ep->hwtid, ep->com.state);
 	set_bit(TIMEDOUT, &ep->com.history);
@@ -221,7 +428,6 @@ static void process_timeout(struct c4iw_ep *ep)
 				, __func__, ep, ep->hwtid, ep->com.state);
 		abort = 0;
 	}
-	mutex_unlock(&ep->com.mutex);
 	if (abort)
 		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
 	c4iw_put_ep(&ep->com);
@@ -273,14 +479,16 @@ process_req(struct work_struct *ctx)
 		ep_events = epc->ep_events;
 		epc->ep_events = 0;
 		spin_unlock_irqrestore(&req_lock, flag);
-		CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, events 0x%x", __func__,
-		    epc->so, epc, ep_events);
+		mutex_lock(&epc->mutex);
+		CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, ep_state %s events 0x%x",
+		    __func__, epc->so, epc, states[epc->state], ep_events);
 		if (ep_events & C4IW_EVENT_TERM)
 			process_terminate((struct c4iw_ep *)epc);
 		if (ep_events & C4IW_EVENT_TIMEOUT)
 			process_timeout((struct c4iw_ep *)epc);
 		if (ep_events & C4IW_EVENT_SOCKET)
 			process_socket_event((struct c4iw_ep *)epc);
+		mutex_unlock(&epc->mutex);
 		c4iw_put_ep(epc);
 		process_err_cqes();
 		spin_lock_irqsave(&req_lock, flag);
@@ -321,55 +529,67 @@ done:
 	return (rc);
 
 }
-
 static int
-find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
-		__be16 peer_port, u8 tos, struct nhop4_extended *pnh4)
+get_ifnet_from_raddr(struct sockaddr_storage *raddr, struct ifnet **ifp)
 {
-	struct in_addr addr;
-	int err;
+	int err = 0;
 
-	CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip,
-	    peer_ip, ntohs(local_port), ntohs(peer_port));
+	if (raddr->ss_family == AF_INET) {
+		struct sockaddr_in *raddr4 = (struct sockaddr_in *)raddr;
+		struct nhop4_extended nh4 = {0};
 
-	addr.s_addr = peer_ip;
-	err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4);
+		err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, raddr4->sin_addr,
+				NHR_REF, 0, &nh4);
+		*ifp = nh4.nh_ifp;
+		if (err)
+			fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
+	} else {
+		struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)raddr;
+		struct nhop6_extended nh6 = {0};
+		struct in6_addr addr6;
+		uint32_t scopeid;
 
-	CTR2(KTR_IW_CXGBE, "%s:frtE %d", __func__, err);
+		memset(&addr6, 0, sizeof(addr6));
+		in6_splitscope((struct in6_addr *)&raddr6->sin6_addr,
+					&addr6, &scopeid);
+		err = fib6_lookup_nh_ext(RT_DEFAULT_FIB, &addr6, scopeid,
+				NHR_REF, 0, &nh6);
+		*ifp = nh6.nh_ifp;
+		if (err)
+			fib6_free_nh_ext(RT_DEFAULT_FIB, &nh6);
+	}
+
+	CTR2(KTR_IW_CXGBE, "%s: return: %d", __func__, err);
 	return err;
 }
 
 static void
 close_socket(struct socket *so)
 {
-
 	uninit_iwarp_socket(so);
-	sodisconnect(so);
+	soclose(so);
 }
 
 static void
 process_peer_close(struct c4iw_ep *ep)
 {
-	struct c4iw_qp_attributes attrs;
+	struct c4iw_qp_attributes attrs = {0};
 	int disconnect = 1;
 	int release = 0;
 
 	CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep,
 	    ep->com.so, states[ep->com.state]);
 
-	mutex_lock(&ep->com.mutex);
 	switch (ep->com.state) {
 
 		case MPA_REQ_WAIT:
-			CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING",
+			CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT DEAD",
 			    __func__, ep);
-			__state_set(&ep->com, CLOSING);
-			break;
-
+			/* Fallthrough */
 		case MPA_REQ_SENT:
-			CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING",
+			CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT DEAD",
 			    __func__, ep);
-			__state_set(&ep->com, DEAD);
+			ep->com.state = DEAD;
 			connect_reply_upcall(ep, -ECONNABORTED);
 
 			disconnect = 0;
@@ -388,21 +608,20 @@ process_peer_close(struct c4iw_ep *ep)
 			 */
 			CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING",
 			    __func__, ep);
-			__state_set(&ep->com, CLOSING);
-			c4iw_get_ep(&ep->com);
+			ep->com.state = CLOSING;
 			break;
 
 		case MPA_REP_SENT:
 			CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING",
 			    __func__, ep);
-			__state_set(&ep->com, CLOSING);
+			ep->com.state = CLOSING;
 			break;
 
 		case FPDU_MODE:
 			CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING",
 			    __func__, ep);
 			START_EP_TIMER(ep);
-			__state_set(&ep->com, CLOSING);
+			ep->com.state = CLOSING;
 			attrs.next_state = C4IW_QP_STATE_CLOSING;
 			c4iw_modify_qp(ep->com.dev, ep->com.qp,
 					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
@@ -418,7 +637,7 @@ process_peer_close(struct c4iw_ep *ep)
 		case CLOSING:
 			CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND",
 			    __func__, ep);
-			__state_set(&ep->com, MORIBUND);
+			ep->com.state = MORIBUND;
 			disconnect = 0;
 			break;
 
@@ -433,7 +652,7 @@ process_peer_close(struct c4iw_ep *ep)
 			}
 			close_socket(ep->com.so);
 			close_complete_upcall(ep, 0);
-			__state_set(&ep->com, DEAD);
+			ep->com.state = DEAD;
 			release = 1;
 			disconnect = 0;
 			break;
@@ -450,7 +669,6 @@ process_peer_close(struct c4iw_ep *ep)
 			break;
 	}
 
-	mutex_unlock(&ep->com.mutex);
 
 	if (disconnect) {
 
@@ -469,11 +687,10 @@ process_peer_close(struct c4iw_ep *ep)
 static void
 process_conn_error(struct c4iw_ep *ep)
 {
-	struct c4iw_qp_attributes attrs;
+	struct c4iw_qp_attributes attrs = {0};
 	int ret;
 	int state;
 
-	mutex_lock(&ep->com.mutex);
 	state = ep->com.state;
 	CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s",
 	    __func__, ep, ep->com.so, ep->com.so->so_error,
@@ -483,6 +700,7 @@ process_conn_error(struct c4iw_ep *ep)
 
 		case MPA_REQ_WAIT:
 			STOP_EP_TIMER(ep);
+			c4iw_put_ep(&ep->parent_ep->com);
 			break;
 
 		case MPA_REQ_SENT:
@@ -496,13 +714,6 @@ process_conn_error(struct c4iw_ep *ep)
 			break;
 
 		case MPA_REQ_RCVD:
-
-			/*
-			 * We're gonna mark this puppy DEAD, but keep
-			 * the reference on it until the ULP accepts or
-			 * rejects the CR.
-			 */
-			c4iw_get_ep(&ep->com);
 			break;
 
 		case MORIBUND:
@@ -531,7 +742,6 @@ process_conn_error(struct c4iw_ep *ep)
 		case DEAD:
 			CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!",
 			    __func__, ep->com.so->so_error);
-			mutex_unlock(&ep->com.mutex);
 			return;
 
 		default:
@@ -541,10 +751,9 @@ process_conn_error(struct c4iw_ep *ep)
 
 	if (state != ABORTING) {
 		close_socket(ep->com.so);
-		__state_set(&ep->com, DEAD);
+		ep->com.state = DEAD;
 		c4iw_put_ep(&ep->com);
 	}
-	mutex_unlock(&ep->com.mutex);
 	CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep);
 	return;
 }
@@ -552,14 +761,13 @@ process_conn_error(struct c4iw_ep *ep)
 static void
 process_close_complete(struct c4iw_ep *ep)
 {
-	struct c4iw_qp_attributes attrs;
+	struct c4iw_qp_attributes attrs = {0};
 	int release = 0;
 
 	CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep,
 	    ep->com.so, states[ep->com.state]);
 
 	/* The cm_id may be null if we failed to connect */
-	mutex_lock(&ep->com.mutex);
 	set_bit(CLOSE_CON_RPL, &ep->com.history);
 
 	switch (ep->com.state) {
@@ -567,7 +775,7 @@ process_close_complete(struct c4iw_ep *ep)
 		case CLOSING:
 			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND",
 			    __func__, ep);
-			__state_set(&ep->com, MORIBUND);
+			ep->com.state = MORIBUND;
 			break;
 
 		case MORIBUND:
@@ -588,7 +796,7 @@ process_close_complete(struct c4iw_ep *ep)
 
 			close_socket(ep->com.so);
 			close_complete_upcall(ep, 0);
-			__state_set(&ep->com, DEAD);
+			ep->com.state = DEAD;
 			release = 1;
 			break;
 
@@ -605,12 +813,11 @@ process_close_complete(struct c4iw_ep *ep)
 			panic("%s:pcc6 %p unknown ep state", __func__, ep);
 			break;
 	}
-	mutex_unlock(&ep->com.mutex);
 
 	if (release) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep);
-		c4iw_put_ep(&ep->com);
+		release_ep_resources(ep);
 	}
 	CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep);
 	return;
@@ -639,49 +846,56 @@ setiwsockopt(struct socket *so)
 static void
 init_iwarp_socket(struct socket *so, void *arg)
 {
-
-	SOCKBUF_LOCK(&so->so_rcv);
-	soupcall_set(so, SO_RCV, c4iw_so_upcall, arg);
-	so->so_state |= SS_NBIO;
-	SOCKBUF_UNLOCK(&so->so_rcv);
+	if (SOLISTENING(so)) {
+		SOLISTEN_LOCK(so);
+		solisten_upcall_set(so, c4iw_so_upcall, arg);
+		so->so_state |= SS_NBIO;
+		SOLISTEN_UNLOCK(so);
+	} else {
+		SOCKBUF_LOCK(&so->so_rcv);
+		soupcall_set(so, SO_RCV, c4iw_so_upcall, arg);
+		so->so_state |= SS_NBIO;
+		SOCKBUF_UNLOCK(&so->so_rcv);
+	}
 }
 
 static void
 uninit_iwarp_socket(struct socket *so)
 {
-
-	SOCKBUF_LOCK(&so->so_rcv);
-	soupcall_clear(so, SO_RCV);
-	SOCKBUF_UNLOCK(&so->so_rcv);
+	if (SOLISTENING(so)) {
+		SOLISTEN_LOCK(so);
+		solisten_upcall_set(so, NULL, NULL);
+		SOLISTEN_UNLOCK(so);
+	} else {
+		SOCKBUF_LOCK(&so->so_rcv);
+		soupcall_clear(so, SO_RCV);
+		SOCKBUF_UNLOCK(&so->so_rcv);
+	}
 }
 
 static void
 process_data(struct c4iw_ep *ep)
 {
-	struct sockaddr_in *local, *remote;
 	int disconnect = 0;
 
 	CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__,
 	    ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv));
 
-	switch (state_read(&ep->com)) {
+	switch (ep->com.state) {
 	case MPA_REQ_SENT:
 		disconnect = process_mpa_reply(ep);
 		break;
 	case MPA_REQ_WAIT:
-		in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
-		in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
-		ep->com.local_addr = *local;
-		ep->com.remote_addr = *remote;
-		free(local, M_SONAME);
-		free(remote, M_SONAME);
 		disconnect = process_mpa_request(ep);
+		if (disconnect)
+			/* Refered in process_newconn() */
+			c4iw_put_ep(&ep->parent_ep->com);
 		break;
 	default:
 		if (sbused(&ep->com.so->so_rcv))
 			log(LOG_ERR, "%s: Unexpected streaming data. ep %p, "
 			    "state %d, so %p, so_state 0x%x, sbused %u\n",
-			    __func__, ep, state_read(&ep->com), ep->com.so,

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201711072352.vA7NqEHU046048>