Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 30 Jun 2020 14:49:52 +0000 (UTC)
From:      Rick Macklem <rmacklem@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r362798 - in projects/nfs-over-tls/sys/rpc: . rpcsec_tls
Message-ID:  <202006301449.05UEnq2x072917@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: rmacklem
Date: Tue Jun 30 14:49:51 2020
New Revision: 362798
URL: https://svnweb.freebsd.org/changeset/base/362798

Log:
  Testing when a server does not respond to TLS handshake records exposed
  a couple of problems, since the daemon would be in SSL_connect() for 6 minutes.
  
  - When the upcall timed out and was retried, the RPCTLS_SYSC_CLSOCKET syscall
    was broken and did not return an error upon a retry. It allocated a file
    descriptor for a NULL socket.
  - The socket structure in the kernel could be free'd while the daemon was
    still using it in SSL_connect().
  - Adjust the timeout a retry count so that upcalls are only attempted once
    with a 10minute timeout.
  
  This patch fixes these problems by changing the following:
  - If the handshake is in progress, don't soclose(so) in the kernel
    clnt_vc_destroy().
  - Fix the RPCTLS_SYSC_CLSOCKET (and RPCTLS_SYSC_SRVSOCKET) to correctly
    return an error if the socket is NULL (which means it already has a
    file decriptor assigned to it).

Modified:
  projects/nfs-over-tls/sys/rpc/clnt_vc.c
  projects/nfs-over-tls/sys/rpc/rpcsec_tls/rpctls_impl.c

Modified: projects/nfs-over-tls/sys/rpc/clnt_vc.c
==============================================================================
--- projects/nfs-over-tls/sys/rpc/clnt_vc.c	Tue Jun 30 11:50:52 2020	(r362797)
+++ projects/nfs-over-tls/sys/rpc/clnt_vc.c	Tue Jun 30 14:49:51 2020	(r362798)
@@ -895,12 +895,21 @@ clnt_vc_destroy(CLIENT *cl)
 			 * If the upcall fails, the socket has
 			 * probably been closed via the rpctlscd
 			 * daemon having crashed or been
-			 * restarted.
+			 * restarted, so ignore return stat.
 			 */
 			stat = rpctls_cl_disconnect(ct->ct_sslsec,
 			    ct->ct_sslusec, ct->ct_sslrefno,
 			    &reterr);
-		} else {
+		} else if ((ct->ct_rcvstate & RPCRCVSTATE_TLSHANDSHAKE) == 0) {
+			/*
+			 * If the TLS handshake is in progress, leave the
+			 * socket so that it will closed by the daemon.
+			 * This can only occur if the daemon is waiting for
+			 * an openssl call like SSL_connect() for a long
+			 * time.  The call will normally eventually fail and
+			 * then the daemon will close the socket, so do not
+			 * do it here.
+			 */
 			soshutdown(so, SHUT_WR);
 			soclose(so);
 		}
@@ -1278,17 +1287,20 @@ clnt_vc_dotlsupcall(void *data)
 	enum clnt_stat ret;
 	uint32_t reterr;
 
-printf("TLSupcall started\n");
 	mtx_lock(&ct->ct_lock);
 	ct->ct_rcvstate |= RPCRCVSTATE_UPCALLTHREAD;
 	while (!ct->ct_closed) {
 		if ((ct->ct_rcvstate & RPCRCVSTATE_UPCALLNEEDED) != 0) {
 			ct->ct_rcvstate &= ~RPCRCVSTATE_UPCALLNEEDED;
 			ct->ct_rcvstate |= RPCRCVSTATE_UPCALLINPROG;
-			mtx_unlock(&ct->ct_lock);
-			ret = rpctls_cl_handlerecord(ct->ct_sslsec, ct->ct_sslusec,
-			    ct->ct_sslrefno, &reterr);
-			mtx_lock(&ct->ct_lock);
+			if (ct->ct_sslrefno != 0) {
+				mtx_unlock(&ct->ct_lock);
+printf("at handlerecord\n");
+				ret = rpctls_cl_handlerecord(ct->ct_sslsec,
+				    ct->ct_sslusec, ct->ct_sslrefno, &reterr);
+printf("aft handlerecord=%d\n", ret);
+				mtx_lock(&ct->ct_lock);
+			}
 			ct->ct_rcvstate &= ~RPCRCVSTATE_UPCALLINPROG;
 			if (ret == RPC_SUCCESS && reterr == RPCTLSERR_OK)
 				ct->ct_rcvstate |= RPCRCVSTATE_NORMAL;
@@ -1309,6 +1321,5 @@ printf("TLSupcall started\n");
 	ct->ct_rcvstate &= ~RPCRCVSTATE_UPCALLTHREAD;
 	wakeup(&ct->ct_sslrefno);
 	mtx_unlock(&ct->ct_lock);
-printf("TLSupcall exit\n");
 	kthread_exit();
 }

Modified: projects/nfs-over-tls/sys/rpc/rpcsec_tls/rpctls_impl.c
==============================================================================
--- projects/nfs-over-tls/sys/rpc/rpcsec_tls/rpctls_impl.c	Tue Jun 30 11:50:52 2020	(r362797)
+++ projects/nfs-over-tls/sys/rpc/rpcsec_tls/rpctls_impl.c	Tue Jun 30 14:49:51 2020	(r362798)
@@ -114,8 +114,9 @@ sys_rpctls_syscall(struct thread *td, struct rpctls_sy
 	struct file *fp;
 	struct socket *so;
 	char path[MAXPATHLEN];
-	int fd = -1, error, retry_count = 5;
+	int fd = -1, error, try_count;
 	CLIENT *cl, *oldcl;
+	struct timeval timeo;
 #ifdef KERN_TLS
 	u_int maxlen;
 #endif
@@ -155,12 +156,26 @@ printf("got cl=%p\n", cl);
 			/*
 			 * The number of retries defaults to INT_MAX, which
 			 * effectively means an infinite, uninterruptable loop. 
-			 * Limiting it to five retries keeps it from running
-			 * forever.
+			 * Set the try_count to 1 so that no retries of the
+			 * RPC occur.  Since it is an upcall to a local daemon,
+			 * requests should not be lost and doing one of these
+			 * RPCs multiple times is not correct.
+			 * SSL_connect() in the openssl library has been
+			 * observed to take 6 minutes when the server is not
+			 * responding to the handshake records, so set the
+			 * timeout to 10min.  If it times out before the
+			 * daemon completes the RPC, that should still be ok,
+			 * since the daemon is single threaded and will not
+			 * do further RPCs until the openssl library call
+			 * returns (usually with a failure).
 			 */
-			if (cl != NULL)
-				CLNT_CONTROL(cl, CLSET_RETRIES, &retry_count);
-			else
+			if (cl != NULL) {
+				try_count = 1;
+				CLNT_CONTROL(cl, CLSET_RETRIES, &try_count);
+				timeo.tv_sec = 10 * 60;
+				timeo.tv_usec = 0;
+				CLNT_CONTROL(cl, CLSET_TIMEOUT, &timeo);
+			} else
 				error = EINVAL;
 		}
 	
@@ -203,12 +218,20 @@ printf("got cl=%p\n", cl);
 			/*
 			 * The number of retries defaults to INT_MAX, which
 			 * effectively means an infinite, uninterruptable loop. 
-			 * Limiting it to five retries keeps it from running
-			 * forever.
+			 * Doing even one retry of these upcalls is probably
+			 * not a good plan, since repeating the openssl
+			 * operations are not likely to work.
+			 * The timeout is set fairly large, since some
+			 * openssl operations such as SSL_connect() take a
+			 * long time to return upon failure.
 			 */
-			if (cl != NULL)
-				CLNT_CONTROL(cl, CLSET_RETRIES, &retry_count);
-			else
+			if (cl != NULL) {
+				try_count = 1;
+				CLNT_CONTROL(cl, CLSET_RETRIES, &try_count);
+				timeo.tv_sec = 2 * 60;
+				timeo.tv_usec = 0;
+				CLNT_CONTROL(cl, CLSET_TIMEOUT, &timeo);
+			} else
 				error = EINVAL;
 		}
 	
@@ -249,33 +272,41 @@ printf("srvshutd oldcl=%p\n", oldcl);
 		break;
 	case RPCTLS_SYSC_CLSOCKET:
 printf("In connect\n");
-		error = falloc(td, &fp, &fd, 0);
-		if (error == 0) {
+		mtx_lock(&rpctls_connect_lock);
+		so = rpctls_connect_so;
+		rpctls_connect_so = NULL;
+		mtx_unlock(&rpctls_connect_lock);
+		if (so != NULL) {
+			error = falloc(td, &fp, &fd, 0);
 printf("falloc=%d fd=%d\n", error, fd);
-			mtx_lock(&rpctls_connect_lock);
-			so = rpctls_connect_so;
-			rpctls_connect_so = NULL;
-			mtx_unlock(&rpctls_connect_lock);
-			finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
-			fdrop(fp, td);	/* Drop fp reference. */
-			td->td_retval[0] = fd;
-		}
-printf("returning=%d\n", fd);
+			if (error == 0) {
+				finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so,
+				    &socketops);
+				fdrop(fp, td);	/* Drop fp reference. */
+				td->td_retval[0] = fd;
+			}
+		} else
+			error = EPERM;
+printf("clsocket err=%d fd=%d\n", error, fd);
 		break;
 	case RPCTLS_SYSC_SRVSOCKET:
 printf("In srvconnect\n");
-		error = falloc(td, &fp, &fd, 0);
-		if (error == 0) {
+		mtx_lock(&rpctls_server_lock);
+		so = rpctls_server_so;
+		rpctls_server_so = NULL;
+		mtx_unlock(&rpctls_server_lock);
+		if (so != NULL) {
+			error = falloc(td, &fp, &fd, 0);
 printf("falloc=%d fd=%d\n", error, fd);
-			mtx_lock(&rpctls_server_lock);
-			so = rpctls_server_so;
-			rpctls_server_so = NULL;
-			mtx_unlock(&rpctls_server_lock);
-			finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
-			fdrop(fp, td);	/* Drop fp reference. */
-			td->td_retval[0] = fd;
-		}
-printf("srv returning=%d\n", fd);
+			if (error == 0) {
+				finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so,
+				    &socketops);
+				fdrop(fp, td);	/* Drop fp reference. */
+				td->td_retval[0] = fd;
+			}
+		} else
+			error = EPERM;
+printf("srvsocket err=%d fd=%d\n", error, fd);
 		break;
 	default:
 		error = EINVAL;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202006301449.05UEnq2x072917>