Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 28 Feb 2017 19:27:41 +0000 (UTC)
From:      Navdeep Parhar <np@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r314400 - in head/sys: dev/cxgbe/iw_cxgbe ofed/drivers/infiniband/core
Message-ID:  <201702281927.v1SJRfok099754@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: np
Date: Tue Feb 28 19:27:41 2017
New Revision: 314400
URL: https://svnweb.freebsd.org/changeset/base/314400

Log:
  cxgbe/iw_cxgbe: fix various double-close panics with iWARP sockets.
  
  Sockets representing the TCP endpoints for iWARP connections are
  allocated by the ibcore module.  Before this revision they were closed
  either by the ibcore module or the iw_cxgbe hardware driver depending on
  the state transitions during connection teardown.  This is error prone
  and there were cases where both iw_cxgbe and ibcore closed the socket
  leading to double-free panics.  The fix is to let ibcore close the
  sockets it creates and never do it in the driver.
  
  - Use sodisconnect instead of soclose (preceded by solinger = 0) in the
    driver to tear down an RDMA connection abruptly.  This does what's
    intended without releasing the socket's fd reference.
  
  - Close the socket in ibcore when the iWARP iw_cm_id is destroyed.  This
    works for all kinds of sockets: clients that initiate connections,
    listeners, and sockets accepted off of listeners.
  
  Reviewed by:	Steve Wise @ Open Grid Computing, hselasky@
  MFC after:	3 days
  Sponsored by:	Chelsio Communications
  Differential Revision:	https://reviews.freebsd.org/D9796

Modified:
  head/sys/dev/cxgbe/iw_cxgbe/cm.c
  head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
  head/sys/ofed/drivers/infiniband/core/cma.c
  head/sys/ofed/drivers/infiniband/core/iwcm.c

Modified: head/sys/dev/cxgbe/iw_cxgbe/cm.c
==============================================================================
--- head/sys/dev/cxgbe/iw_cxgbe/cm.c	Tue Feb 28 19:01:59 2017	(r314399)
+++ head/sys/dev/cxgbe/iw_cxgbe/cm.c	Tue Feb 28 19:27:41 2017	(r314400)
@@ -92,9 +92,7 @@ static void *alloc_ep(int size, gfp_t fl
 void __free_ep(struct c4iw_ep_common *epc);
 static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
 		__be16 peer_port, u8 tos, struct nhop4_extended *pnh4);
-static int close_socket(struct c4iw_ep_common *epc, int close);
-static int shutdown_socket(struct c4iw_ep_common *epc);
-static void abort_socket(struct c4iw_ep *ep);
+static void close_socket(struct socket *so);
 static int send_mpa_req(struct c4iw_ep *ep);
 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen);
@@ -111,7 +109,8 @@ static void process_peer_close(struct c4
 static void process_conn_error(struct c4iw_ep *ep);
 static void process_close_complete(struct c4iw_ep *ep);
 static void ep_timeout(unsigned long arg);
-static void init_sock(struct c4iw_ep_common *epc);
+static void init_iwarp_socket(struct socket *so, void *arg);
+static void uninit_iwarp_socket(struct socket *so);
 static void process_data(struct c4iw_ep *ep);
 static void process_connected(struct c4iw_ep *ep);
 static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag);
@@ -319,87 +318,12 @@ find_route(__be32 local_ip, __be32 peer_
 	return err;
 }
 
-static int
-close_socket(struct c4iw_ep_common *epc, int close)
-{
-	struct socket *so = epc->so;
-	int rc;
-
-	CTR5(KTR_IW_CXGBE, "%s:csoB so %p, ep %p, state %s, tid %d", __func__,
-			so, epc, states[epc->state],
-			((struct c4iw_ep *)epc)->hwtid);
-	mutex_lock(&epc->so_mutex);
-	if ((so == NULL) || (so->so_count == 0)) {
-		mutex_unlock(&epc->so_mutex);
-		CTR5(KTR_IW_CXGBE, "%s:cso1 so %p, ep %p, state %s, tid %d",
-				__func__, so, epc, states[epc->state],
-				((struct c4iw_ep *)epc)->hwtid);
-		return -EINVAL;
-	}
-
-	SOCK_LOCK(so);
-	soupcall_clear(so, SO_RCV);
-	SOCK_UNLOCK(so);
-
-	if (close)
-                rc = soclose(so);
-        else
-                rc = soshutdown(so, SHUT_WR | SHUT_RD);
-	epc->so = NULL;
-
-	mutex_unlock(&epc->so_mutex);
-	return (rc);
-}
-
-static int
-shutdown_socket(struct c4iw_ep_common *epc)
-{
-
-	struct socket *so = epc->so;
-	int rc;
-
-	CTR5(KTR_IW_CXGBE, "%s:ssoB so %p, ep %p, state %s, tid %d", __func__,
-			epc->so, epc, states[epc->state],
-			((struct c4iw_ep *)epc)->hwtid);
-	mutex_lock(&epc->so_mutex);
-	if ((so == NULL) || (so->so_count == 0)) {
-		mutex_unlock(&epc->so_mutex);
-		CTR5(KTR_IW_CXGBE, "%s:sso1 so %p, ep %p, state %s, tid %d",
-			__func__, epc->so, epc, states[epc->state],
-			((struct c4iw_ep *)epc)->hwtid);
-		return -EINVAL;
-	}
-	rc = soshutdown(so, SHUT_WR);
-	mutex_unlock(&epc->so_mutex);
-	return rc;
-}
-
 static void
-abort_socket(struct c4iw_ep *ep)
+close_socket(struct socket *so)
 {
-	struct sockopt sopt;
-	int rc;
-	struct linger l;
-
-	CTR5(KTR_IW_CXGBE, "%s ep %p so %p state %s tid %d", __func__, ep,
-			ep->com.so, states[ep->com.state], ep->hwtid);
-	mutex_lock(&ep->com.so_mutex);
-	l.l_onoff = 1;
-	l.l_linger = 0;
 
-	/* linger_time of 0 forces RST to be sent */
-	sopt.sopt_dir = SOPT_SET;
-	sopt.sopt_level = SOL_SOCKET;
-	sopt.sopt_name = SO_LINGER;
-	sopt.sopt_val = (caddr_t)&l;
-	sopt.sopt_valsize = sizeof l;
-	sopt.sopt_td = NULL;
-	rc = sosetopt(ep->com.so, &sopt);
-	if (rc) {
-		log(LOG_ERR, "%s: can't set linger to 0, no RST! err %d\n",
-		    __func__, rc);
-	}
-	mutex_unlock(&ep->com.so_mutex);
+	uninit_iwarp_socket(so);
+	sodisconnect(so);
 }
 
 static void
@@ -429,7 +353,7 @@ process_peer_close(struct c4iw_ep *ep)
 
 			disconnect = 0;
 			STOP_EP_TIMER(ep);
-			close_socket(&ep->com, 0);
+			close_socket(ep->com.so);
 			deref_cm_id(&ep->com);
 			release = 1;
 			break;
@@ -486,7 +410,7 @@ process_peer_close(struct c4iw_ep *ep)
 				c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
 						C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
 			}
-			close_socket(&ep->com, 0);
+			close_socket(ep->com.so);
 			close_complete_upcall(ep, 0);
 			__state_set(&ep->com, DEAD);
 			release = 1;
@@ -595,14 +519,7 @@ process_conn_error(struct c4iw_ep *ep)
 	}
 
 	if (state != ABORTING) {
-		if (ep->parent_ep) {
-			CTR2(KTR_IW_CXGBE, "%s:pce1 %p", __func__, ep);
-			close_socket(&ep->com, 1);
-		} else {
-			CTR2(KTR_IW_CXGBE, "%s:pce2 %p", __func__, ep);
-			close_socket(&ep->com, 0);
-		}
-
+		close_socket(ep->com.so);
 		__state_set(&ep->com, DEAD);
 		c4iw_put_ep(&ep->com);
 	}
@@ -648,16 +565,7 @@ process_close_complete(struct c4iw_ep *e
 						&attrs, 1);
 			}
 
-			if (ep->parent_ep) {
-
-				CTR2(KTR_IW_CXGBE, "%s:pcc3 %p", __func__, ep);
-				close_socket(&ep->com, 1);
-			}
-			else {
-
-				CTR2(KTR_IW_CXGBE, "%s:pcc4 %p", __func__, ep);
-				close_socket(&ep->com, 0);
-			}
+			close_socket(ep->com.so);
 			close_complete_upcall(ep, 0);
 			__state_set(&ep->com, DEAD);
 			release = 1;
@@ -688,23 +596,15 @@ process_close_complete(struct c4iw_ep *e
 }
 
 static void
-init_sock(struct c4iw_ep_common *epc)
+init_iwarp_socket(struct socket *so, void *arg)
 {
 	int rc;
 	struct sockopt sopt;
-	struct socket *so = epc->so;
 	int on = 1;
 
-	mutex_lock(&epc->so_mutex);
-	if ((so == NULL) || (so->so_count == 0)) {
-		mutex_unlock(&epc->so_mutex);
-		CTR5(KTR_IW_CXGBE, "%s:iso1 so %p, ep %p, state %s, tid %d",
-			__func__, so, epc, states[epc->state],
-			((struct c4iw_ep *)epc)->hwtid);
-		return;
-	}
+	/* Note that SOCK_LOCK(so) is same as SOCKBUF_LOCK(&so->so_rcv) */
 	SOCK_LOCK(so);
-	soupcall_set(so, SO_RCV, c4iw_so_upcall, epc);
+	soupcall_set(so, SO_RCV, c4iw_so_upcall, arg);
 	so->so_state |= SS_NBIO;
 	SOCK_UNLOCK(so);
 	sopt.sopt_dir = SOPT_SET;
@@ -718,7 +618,15 @@ init_sock(struct c4iw_ep_common *epc)
 		log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n",
 		    __func__, so, rc);
 	}
-	mutex_unlock(&epc->so_mutex);
+}
+
+static void
+uninit_iwarp_socket(struct socket *so)
+{
+
+	SOCKBUF_LOCK(&so->so_rcv);
+	soupcall_clear(so, SO_RCV);
+	SOCKBUF_UNLOCK(&so->so_rcv);
 }
 
 static void
@@ -759,18 +667,18 @@ process_data(struct c4iw_ep *ep)
 static void
 process_connected(struct c4iw_ep *ep)
 {
+	struct socket *so = ep->com.so;
 
-	if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) {
+	if ((so->so_state & SS_ISCONNECTED) && !so->so_error) {
 		if (send_mpa_req(ep))
 			goto err;
-	}
-	else {
-		connect_reply_upcall(ep, -ep->com.so->so_error);
+	} else {
+		connect_reply_upcall(ep, -so->so_error);
 		goto err;
 	}
 	return;
 err:
-	close_socket(&ep->com, 0);
+	close_socket(so);
 	state_set(&ep->com, DEAD);
 	c4iw_put_ep(&ep->com);
 	return;
@@ -785,23 +693,9 @@ process_newconn(struct iw_cm_id *parent_
 	struct c4iw_ep *parent_ep = parent_cm_id->provider_data;
 	int ret = 0;
 
-	if (!child_so) {
-		CTR4(KTR_IW_CXGBE,
-		    "%s: parent so %p, parent ep %p, child so %p, invalid so",
-		    __func__, parent_ep->com.so, parent_ep, child_so);
-		log(LOG_ERR, "%s: invalid child socket\n", __func__);
-		return;
-	}
-	child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
-	if (!child_ep) {
-		CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM",
-		    __func__, parent_ep->com.so, parent_ep);
-		log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__);
-		return;
-	}
-	SOCKBUF_LOCK(&child_so->so_rcv);
-	soupcall_set(child_so, SO_RCV, c4iw_so_upcall, child_ep);
-	SOCKBUF_UNLOCK(&child_so->so_rcv);
+	MPASS(child_so != NULL);
+
+	child_ep = alloc_ep(sizeof(*child_ep), M_WAITOK);
 
 	CTR5(KTR_IW_CXGBE,
 	    "%s: parent so %p, parent ep %p, child so %p, child ep %p",
@@ -821,6 +715,7 @@ process_newconn(struct iw_cm_id *parent_
 	free(local, M_SONAME);
 	free(remote, M_SONAME);
 
+	init_iwarp_socket(child_so, &child_ep->com);
 	c4iw_get_ep(&parent_ep->com);
 	init_timer(&child_ep->timer);
 	state_set(&child_ep->com, MPA_REQ_WAIT);
@@ -1038,7 +933,6 @@ alloc_ep(int size, gfp_t gfp)
 
 	kref_init(&epc->kref);
 	mutex_init(&epc->mutex);
-	mutex_init(&epc->so_mutex);
 	c4iw_init_wr_wait(&epc->wr_wait);
 
 	return (epc);
@@ -1359,30 +1253,47 @@ static void close_complete_upcall(struct
 	CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep);
 }
 
-static int send_abort(struct c4iw_ep *ep)
+static int
+send_abort(struct c4iw_ep *ep)
 {
-	int err;
+	struct socket *so = ep->com.so;
+	struct sockopt sopt;
+	int rc;
+	struct linger l;
 
-	CTR2(KTR_IW_CXGBE, "%s:abB %p", __func__, ep);
-	abort_socket(ep);
+	CTR5(KTR_IW_CXGBE, "%s ep %p so %p state %s tid %d", __func__, ep, so,
+	    states[ep->com.state], ep->hwtid);
 
-	/*
-	 * Since socket options were set as l_onoff=1 and l_linger=0 in in
-	 * abort_socket, invoking soclose here sends a RST (reset) to the peer.
-	 */
-	err = close_socket(&ep->com, 1);
+	l.l_onoff = 1;
+	l.l_linger = 0;
+
+	/* linger_time of 0 forces RST to be sent */
+	sopt.sopt_dir = SOPT_SET;
+	sopt.sopt_level = SOL_SOCKET;
+	sopt.sopt_name = SO_LINGER;
+	sopt.sopt_val = (caddr_t)&l;
+	sopt.sopt_valsize = sizeof l;
+	sopt.sopt_td = NULL;
+	rc = sosetopt(so, &sopt);
+	if (rc != 0) {
+		log(LOG_ERR, "%s: sosetopt(%p, linger = 0) failed with %d.\n",
+		    __func__, so, rc);
+	}
+
+	uninit_iwarp_socket(so);
+	sodisconnect(so);
 	set_bit(ABORT_CONN, &ep->com.history);
-	CTR2(KTR_IW_CXGBE, "%s:abE %p", __func__, ep);
 
 	/*
-	 * TBD: iw_cgbe driver should receive ABORT reply for every ABORT
+	 * TBD: iw_cxgbe driver should receive ABORT reply for every ABORT
 	 * request it has sent. But the current TOE driver is not propagating
 	 * this ABORT reply event (via do_abort_rpl) to iw_cxgbe. So as a work-
 	 * around de-refer 'ep' (which was refered before sending ABORT request)
 	 * here instead of doing it in abort_rpl() handler of iw_cxgbe driver.
 	 */
 	c4iw_put_ep(&ep->com);
-	return err;
+
+	return (0);
 }
 
 static void peer_close_upcall(struct c4iw_ep *ep)
@@ -2213,7 +2124,6 @@ int c4iw_connect(struct iw_cm_id *cm_id,
 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
 	struct c4iw_ep *ep = NULL;
 	struct nhop4_extended nh4;
-	struct toedev *tdev;
 
 	CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id);
 
@@ -2266,8 +2176,6 @@ int c4iw_connect(struct iw_cm_id *cm_id,
 	ep->com.thread = curthread;
 	ep->com.so = cm_id->so;
 
-	init_sock(&ep->com);
-
 	/* find a route */
 	err = find_route(
 		cm_id->local_addr.sin_addr.s_addr,
@@ -2283,22 +2191,11 @@ int c4iw_connect(struct iw_cm_id *cm_id,
 		goto fail2;
 	}
 
-	if (!(nh4.nh_ifp->if_capenable & IFCAP_TOE)) {
-
-		CTR2(KTR_IW_CXGBE, "%s:cc8 %p", __func__, ep);
-		printf("%s - interface not TOE capable.\n", __func__);
-		close_socket(&ep->com, 0);
+	if (!(nh4.nh_ifp->if_capenable & IFCAP_TOE) ||
+	    TOEDEV(nh4.nh_ifp) == NULL) {
 		err = -ENOPROTOOPT;
 		goto fail3;
 	}
-	tdev = TOEDEV(nh4.nh_ifp);
-
-	if (tdev == NULL) {
-
-		CTR2(KTR_IW_CXGBE, "%s:cc9 %p", __func__, ep);
-		printf("%s - No toedev for interface.\n", __func__);
-		goto fail3;
-	}
 	fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
 
 	state_set(&ep->com, CONNECTING);
@@ -2309,19 +2206,18 @@ int c4iw_connect(struct iw_cm_id *cm_id,
 		ep->com.thread);
 
 	if (!err) {
-		CTR2(KTR_IW_CXGBE, "%s:cca %p", __func__, ep);
+		init_iwarp_socket(cm_id->so, &ep->com);
 		goto out;
 	} else {
-		close_socket(&ep->com, 0);
 		goto fail2;
 	}
 
 fail3:
-	CTR2(KTR_IW_CXGBE, "%s:ccb %p", __func__, ep);
 	fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
 fail2:
 	deref_cm_id(&ep->com);
 	c4iw_put_ep(&ep->com);
+	ep = NULL;	/* CTR shouldn't display already-freed ep. */
 out:
 	CTR2(KTR_IW_CXGBE, "%s:ccE %p", __func__, ep);
 	return err;
@@ -2465,7 +2361,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *e
 
 			if (!ep->parent_ep)
 				__state_set(&ep->com, MORIBUND);
-			ret = shutdown_socket(&ep->com);
+			ret = sodisconnect(ep->com.so);
 		}
 
 		if (ret) {

Modified: head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
==============================================================================
--- head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h	Tue Feb 28 19:01:59 2017	(r314399)
+++ head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h	Tue Feb 28 19:27:41 2017	(r314400)
@@ -754,7 +754,6 @@ struct c4iw_ep_common {
         int rpl_done;
         struct thread *thread;
         struct socket *so;
-	struct mutex so_mutex;
 };
 
 struct c4iw_listen_ep {

Modified: head/sys/ofed/drivers/infiniband/core/cma.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/cma.c	Tue Feb 28 19:01:59 2017	(r314399)
+++ head/sys/ofed/drivers/infiniband/core/cma.c	Tue Feb 28 19:27:41 2017	(r314400)
@@ -1072,8 +1072,6 @@ static void cma_release_port(struct rdma
 		kfree(bind_list);
 	}
 	mutex_unlock(&lock);
-	if (id_priv->sock)
-		sock_release(id_priv->sock);
 }
 
 static void cma_leave_mc_groups(struct rdma_id_private *id_priv)

Modified: head/sys/ofed/drivers/infiniband/core/iwcm.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/iwcm.c	Tue Feb 28 19:01:59 2017	(r314399)
+++ head/sys/ofed/drivers/infiniband/core/iwcm.c	Tue Feb 28 19:27:41 2017	(r314400)
@@ -528,24 +528,15 @@ iw_init_sock(struct iw_cm_id *cm_id)
 }
 
 static int
-iw_close_socket(struct iw_cm_id *cm_id, int close)
+iw_uninit_socket(struct iw_cm_id *cm_id)
 {
 	struct socket *so = cm_id->so;
-	int rc;
-
 
 	SOCK_LOCK(so);
 	soupcall_clear(so, SO_RCV);
 	SOCK_UNLOCK(so);
 
-	if (close)
-		rc = soclose(so);
-	else
-		rc = soshutdown(so, SHUT_WR | SHUT_RD);
-
-	cm_id->so = NULL;
-
-	return rc;
+	return (0);
 }
 
 static int
@@ -554,18 +545,17 @@ iw_create_listen(struct iw_cm_id *cm_id,
 	int rc;
 
 	iw_init_sock(cm_id);
-	rc = solisten(cm_id->so, backlog, curthread);
+	rc = -solisten(cm_id->so, backlog, curthread);
 	if (rc != 0)
-		iw_close_socket(cm_id, 0);
-	return rc;
+		iw_uninit_socket(cm_id);
+	return (rc);
 }
 
 static int
 iw_destroy_listen(struct iw_cm_id *cm_id)
 {
-	int rc;
-	rc = iw_close_socket(cm_id, 0);
-	return rc;
+
+	return (iw_uninit_socket(cm_id));
 }
 
 
@@ -663,6 +653,9 @@ void iw_destroy_cm_id(struct iw_cm_id *c
 
 	wait_for_completion(&cm_id_priv->destroy_comp);
 
+	if (cm_id->so)
+		sock_release(cm_id->so);
+
 	free_cm_id(cm_id_priv);
 }
 EXPORT_SYMBOL(iw_destroy_cm_id);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201702281927.v1SJRfok099754>