Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 3 Aug 2021 23:54:10 GMT
From:      Rick Macklem <rmacklem@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 9fb6e613373c - stable/13 - nfsd: Add sysctl to set maximum I/O size up to 1Mbyte
Message-ID:  <202108032354.173NsABE014110@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=9fb6e613373cf90bb25e7cf5b40a0a7f82c941f6

commit 9fb6e613373cf90bb25e7cf5b40a0a7f82c941f6
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2021-07-16 22:01:03 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2021-08-03 23:50:33 +0000

    nfsd: Add sysctl to set maximum I/O size up to 1Mbyte
    
    Since MAXPHYS now allows the FreeBSD NFS client
    to do 1Mbyte I/O operations, add a sysctl called vfs.nfsd.srvmaxio
    so that the maximum NFS server I/O size can be set up to 1Mbyte.
    The Linux NFS client can also do 1Mbyte I/O operations.
    
    The default of 128Kbytes for the maximum I/O size has
    not been changed for two reasons:
    - kern.ipc.maxsockbuf must be increased to support 1Mbyte I/O
    - The limited benchmarking I can do actually shows a drop in I/O rate
      when the I/O size is above 256Kbytes.
    However, daveb@spectralogic.com reports seeing an increase
    in I/O rate for the 1Mbyte I/O size vs 128Kbytes using a Linux client.
    
    (cherry picked from commit ee29e6f31111ea3cd490248987f2f2ec412eb0bb)
---
 sys/fs/nfs/nfs.h                 |  2 +-
 sys/fs/nfs/nfs_commonport.c      |  7 ++--
 sys/fs/nfs/nfs_commonsubs.c      |  5 ++-
 sys/fs/nfs/nfsproto.h            |  1 -
 sys/fs/nfsserver/nfs_nfsdport.c  | 81 ++++++++++++++++++++++++++++++++++++++++
 sys/fs/nfsserver/nfs_nfsdserv.c  | 12 ++++--
 sys/fs/nfsserver/nfs_nfsdstate.c | 11 +++---
 7 files changed, 104 insertions(+), 15 deletions(-)

diff --git a/sys/fs/nfs/nfs.h b/sys/fs/nfs/nfs.h
index 272b8dbfee22..1a29a7e1d6ec 100644
--- a/sys/fs/nfs/nfs.h
+++ b/sys/fs/nfs/nfs.h
@@ -156,7 +156,7 @@
 	(t).tv_sec = time.tv_sec; (t).tv_nsec = 1000 * time.tv_usec; } while (0)
 #define	NFS_SRVMAXDATA(n) 						\
 		(((n)->nd_flag & (ND_NFSV3 | ND_NFSV4)) ? 		\
-		 NFS_SRVMAXIO : NFS_V2MAXDATA)
+		 nfs_srvmaxio : NFS_V2MAXDATA)
 #define	NFS64BITSSET	0xffffffffffffffffull
 #define	NFS64BITSMINUS1	0xfffffffffffffffeull
 
diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c
index 47038980c640..8a100749fc32 100644
--- a/sys/fs/nfs/nfs_commonport.c
+++ b/sys/fs/nfs/nfs_commonport.c
@@ -76,6 +76,7 @@ void (*nfsd_call_servertimer)(void) = NULL;
 void (*ncl_call_invalcaches)(struct vnode *) = NULL;
 vop_advlock_t *nfs_advlock_p = NULL;
 vop_reclaim_t *nfs_reclaim_p = NULL;
+uint32_t nfs_srvmaxio = NFS_SRVMAXIO;
 
 int nfs_pnfsio(task_fn_t *, void *);
 
@@ -303,11 +304,11 @@ nfsvno_getfs(struct nfsfsinfo *sip, int isdgram)
 	if (isdgram)
 		pref = NFS_MAXDGRAMDATA;
 	else
-		pref = NFS_SRVMAXIO;
-	sip->fs_rtmax = NFS_SRVMAXIO;
+		pref = nfs_srvmaxio;
+	sip->fs_rtmax = nfs_srvmaxio;
 	sip->fs_rtpref = pref;
 	sip->fs_rtmult = NFS_FABLKSIZE;
-	sip->fs_wtmax = NFS_SRVMAXIO;
+	sip->fs_wtmax = nfs_srvmaxio;
 	sip->fs_wtpref = pref;
 	sip->fs_wtmult = NFS_FABLKSIZE;
 	sip->fs_dtpref = pref;
diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c
index 1bdc13123aac..817d89284091 100644
--- a/sys/fs/nfs/nfs_commonsubs.c
+++ b/sys/fs/nfs/nfs_commonsubs.c
@@ -85,6 +85,7 @@ extern volatile int nfsrv_devidcnt;
 extern int nfscl_debuglevel;
 extern struct nfsdevicehead nfsrv_devidhead;
 extern struct nfsstatsv1 nfsstatsv1;
+extern uint32_t nfs_srvmaxio;
 
 SYSCTL_DECL(_vfs_nfs);
 SYSCTL_INT(_vfs_nfs, OID_AUTO, enable_uidtostring, CTLFLAG_RW,
@@ -2201,7 +2202,7 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			attrsum += NFSX_UNSIGNED;
 			i = fxdr_unsigned(int, *tl);
-			if (compare && !(*retcmpp) && i != NFS_SRVMAXIO)
+			if (compare && !(*retcmpp) && i != nfs_srvmaxio)
 				*retcmpp = NFSERR_NOTSAME;
 			break;
 		default:
@@ -3012,7 +3013,7 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
 		case NFSATTRBIT_LAYOUTALIGNMENT:
 		case NFSATTRBIT_LAYOUTBLKSIZE:
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
-			*tl = txdr_unsigned(NFS_SRVMAXIO);
+			*tl = txdr_unsigned(nfs_srvmaxio);
 			retnum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_XATTRSUPPORT:
diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h
index 62d86c3a4593..13e146154805 100644
--- a/sys/fs/nfs/nfsproto.h
+++ b/sys/fs/nfs/nfsproto.h
@@ -73,7 +73,6 @@
  */
 #define	NFS_MAXPKTHDR	404
 #define	NFS_MAXXDR	4096
-#define	NFS_MAXPACKET	(NFS_SRVMAXIO + NFS_MAXXDR)
 #define	NFS_MINPACKET	20
 #define	NFS_FABLKSIZE	512	/* Size in bytes of a block wrt fa_blocks */
 #define	NFSV4_MINORVERSION	0	/* V4 Minor version */
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index 7bcbc738d61b..efe9aac7a136 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -76,6 +76,9 @@ extern struct nfsdontlisthead nfsrv_dontlisthead;
 extern volatile int nfsrv_dontlistlen;
 extern volatile int nfsrv_devidcnt;
 extern int nfsrv_maxpnfsmirror;
+extern uint32_t nfs_srvmaxio;
+extern int nfs_bufpackets;
+extern u_long sb_max_adj;
 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
 NFSDLOCKMUTEX;
 NFSSTATESPINLOCK;
@@ -195,6 +198,84 @@ SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize,
     CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize),
     sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers");
 
+/*
+ * nfs_srvmaxio can only be increased and only when the nfsd threads are
+ * not running.  The setting must be a power of 2, with the current limit of
+ * 1Mbyte.
+ */
+static int
+sysctl_srvmaxio(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	u_int newsrvmaxio;
+	uint64_t tval;
+
+	newsrvmaxio = nfs_srvmaxio;
+	error = sysctl_handle_int(oidp, &newsrvmaxio, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (newsrvmaxio == nfs_srvmaxio)
+		return (0);
+	if (newsrvmaxio < nfs_srvmaxio) {
+		printf("nfsd: vfs.nfsd.srvmaxio can only be increased\n");
+		return (EINVAL);
+	}
+	if (newsrvmaxio > 1048576) {
+		printf("nfsd: vfs.nfsd.srvmaxio cannot be > 1Mbyte\n");
+		return (EINVAL);
+	}
+	if ((newsrvmaxio & (newsrvmaxio - 1)) != 0) {
+		printf("nfsd: vfs.nfsd.srvmaxio must be a power of 2\n");
+		return (EINVAL);
+	}
+
+	/*
+	 * Check that kern.ipc.maxsockbuf is large enough for
+	 * newsrviomax, given the setting of vfs.nfs.bufpackets.
+	 */
+	if ((newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets >
+	    sb_max_adj) {
+		/*
+		 * Suggest vfs.nfs.bufpackets * maximum RPC message for
+		 * sb_max_adj.
+		 */
+		tval = (newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets;
+
+		/*
+		 * Convert suggested sb_max_adj value to a suggested
+		 * sb_max value, which is what is set via kern.ipc.maxsockbuf.
+		 * Perform the inverse calculation of (from uipc_sockbuf.c):
+		 * sb_max_adj = (u_quad_t)sb_max * MCLBYTES /
+		 *     (MSIZE + MCLBYTES);
+		 * XXX If the calculation of sb_max_adj from sb_max changes,
+		 *     this calculation must be changed as well.
+		 */
+		tval *= (MSIZE + MCLBYTES);  /* Brackets for readability. */
+		tval += MCLBYTES - 1;        /* Round up divide. */
+		tval /= MCLBYTES;
+		printf("nfsd: set kern.ipc.maxsockbuf to a minimum of "
+		    "%ju to support %ubyte NFS I/O\n", (uintmax_t)tval,
+		    newsrvmaxio);
+		return (EINVAL);
+	}
+
+	NFSD_LOCK();
+	if (newnfs_numnfsd != 0) {
+		NFSD_UNLOCK();
+		printf("nfsd: cannot set vfs.nfsd.srvmaxio when nfsd "
+		    "threads are running\n");
+		return (EINVAL);
+	}
+
+
+	nfs_srvmaxio = newsrvmaxio;
+	NFSD_UNLOCK();
+	return (0);
+}
+SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio,
+    CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
+    sysctl_srvmaxio, "IU", "Maximum I/O size in bytes");
+
 #define	MAX_REORDERED_RPC	16
 #define	NUM_HEURISTIC		1031
 #define	NHUSE_INIT		64
diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c
index e564a6a48b79..0ba3472b4ff9 100644
--- a/sys/fs/nfsserver/nfs_nfsdserv.c
+++ b/sys/fs/nfsserver/nfs_nfsdserv.c
@@ -66,6 +66,7 @@ extern u_long sb_max_adj;
 extern int nfsrv_pnfsatime;
 extern int nfsrv_maxpnfsmirror;
 extern int nfs_maxcopyrange;
+extern uint32_t nfs_srvmaxio;
 
 static int	nfs_async = 0;
 SYSCTL_DECL(_vfs_nfsd);
@@ -1023,7 +1024,7 @@ nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram,
 			lop->lo_end = NFS64BITSSET;
 	}
 
-	if (retlen > NFS_SRVMAXIO || retlen < 0)
+	if (retlen > nfs_srvmaxio || retlen < 0)
 		nd->nd_repstat = EIO;
 	if (vnode_vtype(vp) != VREG && !nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3)
@@ -4417,6 +4418,7 @@ nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram,
 	struct nfsdsession *sep = NULL;
 	uint32_t rdmacnt;
 	struct thread *p = curthread;
+	static bool do_printf = true;
 
 	if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0)
 		goto nfsmout;
@@ -4438,12 +4440,16 @@ nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram,
 	sep->sess_maxreq = fxdr_unsigned(uint32_t, *tl++);
 	if (sep->sess_maxreq > sb_max_adj - NFS_MAXXDR) {
 		sep->sess_maxreq = sb_max_adj - NFS_MAXXDR;
-		printf("Consider increasing kern.ipc.maxsockbuf\n");
+		if (do_printf)
+			printf("Consider increasing kern.ipc.maxsockbuf\n");
+		do_printf = false;
 	}
 	sep->sess_maxresp = fxdr_unsigned(uint32_t, *tl++);
 	if (sep->sess_maxresp > sb_max_adj - NFS_MAXXDR) {
 		sep->sess_maxresp = sb_max_adj - NFS_MAXXDR;
-		printf("Consider increasing kern.ipc.maxsockbuf\n");
+		if (do_printf)
+			printf("Consider increasing kern.ipc.maxsockbuf\n");
+		do_printf = false;
 	}
 	sep->sess_maxrespcached = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_maxops = fxdr_unsigned(uint32_t, *tl++);
diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c
index 01280c8e49c6..750eda2027ec 100644
--- a/sys/fs/nfsserver/nfs_nfsdstate.c
+++ b/sys/fs/nfsserver/nfs_nfsdstate.c
@@ -42,6 +42,7 @@ struct nfsv4lock nfsv4rootfs_lock;
 time_t nfsdev_time = 0;
 int nfsrv_layouthashsize;
 volatile int nfsrv_layoutcnt = 0;
+extern uint32_t nfs_srvmaxio;
 
 extern int newnfs_numnfsd;
 extern struct nfsstatsv1 nfsstatsv1;
@@ -6898,7 +6899,7 @@ nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp,
 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 
 	/* Set the stripe size to the maximum I/O size. */
-	*tl++ = txdr_unsigned(NFS_SRVMAXIO & NFSFLAYUTIL_STRIPE_MASK);
+	*tl++ = txdr_unsigned(nfs_srvmaxio & NFSFLAYUTIL_STRIPE_MASK);
 	*tl++ = 0;					/* 1st stripe index. */
 	pattern_offset = 0;
 	txdr_hyper(pattern_offset, tl); tl += 2;	/* Pattern offset. */
@@ -7964,13 +7965,13 @@ nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost)
 	*tl++ = txdr_unsigned(2);		/* Two NFS Versions. */
 	*tl++ = txdr_unsigned(NFS_VER4);	/* NFSv4. */
 	*tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */
-	*tl++ = txdr_unsigned(NFS_SRVMAXIO);	/* DS max rsize. */
-	*tl++ = txdr_unsigned(NFS_SRVMAXIO);	/* DS max wsize. */
+	*tl++ = txdr_unsigned(nfs_srvmaxio);	/* DS max rsize. */
+	*tl++ = txdr_unsigned(nfs_srvmaxio);	/* DS max wsize. */
 	*tl++ = newnfs_true;			/* Tightly coupled. */
 	*tl++ = txdr_unsigned(NFS_VER4);	/* NFSv4. */
 	*tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */
-	*tl++ = txdr_unsigned(NFS_SRVMAXIO);	/* DS max rsize. */
-	*tl++ = txdr_unsigned(NFS_SRVMAXIO);	/* DS max wsize. */
+	*tl++ = txdr_unsigned(nfs_srvmaxio);	/* DS max rsize. */
+	*tl++ = txdr_unsigned(nfs_srvmaxio);	/* DS max wsize. */
 	*tl = newnfs_true;			/* Tightly coupled. */
 
 	ds->nfsdev_hostnamelen = strlen(dnshost);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202108032354.173NsABE014110>