Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 3 Jul 2017 19:46:12 +0000 (UTC)
From:      Rick Macklem <rmacklem@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r320615 - in stable/11/sys: fs/nfs fs/nfsclient kern sys
Message-ID:  <201707031946.v63JkCUS072818@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: rmacklem
Date: Mon Jul  3 19:46:12 2017
New Revision: 320615
URL: https://svnweb.freebsd.org/changeset/base/320615

Log:
  MFC: r319882, r320062, r320070, r320126
  Make MAXBCACHEBUF a tunable called vfs.maxbcachebuf.
  
  By making MAXBCACHEBUF a tunable, it can be increased to allow for
  larger read/write data sizes for the NFS client.
  The tunable is limited to MAXPHYS, which is currently 128K.
  Making MAXPHYS a tunable or increasing its value is being discussed,
  since it would be nice to support a read/write data size of 1Mbyte
  for the NFS client when mounting the AmazonEFS file service.
  
  Also, define NFS_MAXXDR as the upper bound on XDR overhead in an NFS RPC.

Modified:
  stable/11/sys/fs/nfs/nfs_commonkrpc.c
  stable/11/sys/fs/nfs/nfsport.h
  stable/11/sys/fs/nfs/nfsproto.h
  stable/11/sys/fs/nfsclient/nfs_clrpcops.c
  stable/11/sys/kern/vfs_bio.c
  stable/11/sys/sys/buf.h
  stable/11/sys/sys/param.h
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/fs/nfs/nfs_commonkrpc.c
==============================================================================
--- stable/11/sys/fs/nfs/nfs_commonkrpc.c	Mon Jul  3 19:39:58 2017	(r320614)
+++ stable/11/sys/fs/nfs/nfs_commonkrpc.c	Mon Jul  3 19:46:12 2017	(r320615)
@@ -161,7 +161,7 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq
     struct ucred *cred, NFSPROC_T *p, int callback_retry_mult)
 {
 	int rcvreserve, sndreserve;
-	int pktscale;
+	int pktscale, pktscalesav;
 	struct sockaddr *saddr;
 	struct ucred *origcred;
 	CLIENT *client;
@@ -210,6 +210,7 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq
 		pktscale = 2;
 	if (pktscale > 64)
 		pktscale = 64;
+	pktscalesav = pktscale;
 	/*
 	 * soreserve() can fail if sb_max is too small, so shrink pktscale
 	 * and try again if there is an error.
@@ -228,8 +229,12 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq
 		goto out;
 	}
 	do {
-	    if (error != 0 && pktscale > 2)
+	    if (error != 0 && pktscale > 2) {
+		if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
+		    pktscale == pktscalesav)
+		    printf("Consider increasing kern.ipc.maxsockbuf\n");
 		pktscale--;
+	    }
 	    if (nrp->nr_sotype == SOCK_DGRAM) {
 		if (nmp != NULL) {
 			sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) *
@@ -243,15 +248,19 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq
 		if (nrp->nr_sotype != SOCK_STREAM)
 			panic("nfscon sotype");
 		if (nmp != NULL) {
-			sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR +
+			sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR +
 			    sizeof (u_int32_t)) * pktscale;
-			rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR +
+			rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR +
 			    sizeof (u_int32_t)) * pktscale;
 		} else {
 			sndreserve = rcvreserve = 1024 * pktscale;
 		}
 	    }
 	    error = soreserve(so, sndreserve, rcvreserve);
+	    if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
+		pktscale <= 2)
+		printf("Must increase kern.ipc.maxsockbuf or reduce"
+		    " rsize, wsize\n");
 	} while (error != 0 && pktscale > 2);
 	soclose(so);
 	if (error) {

Modified: stable/11/sys/fs/nfs/nfsport.h
==============================================================================
--- stable/11/sys/fs/nfs/nfsport.h	Mon Jul  3 19:39:58 2017	(r320614)
+++ stable/11/sys/fs/nfs/nfsport.h	Mon Jul  3 19:46:12 2017	(r320615)
@@ -1022,7 +1022,7 @@ struct nfsreq {
 };
 
 #ifndef NFS_MAXBSIZE
-#define	NFS_MAXBSIZE	MAXBCACHEBUF
+#define	NFS_MAXBSIZE	(maxbcachebuf)
 #endif
 
 /*

Modified: stable/11/sys/fs/nfs/nfsproto.h
==============================================================================
--- stable/11/sys/fs/nfs/nfsproto.h	Mon Jul  3 19:39:58 2017	(r320614)
+++ stable/11/sys/fs/nfs/nfsproto.h	Mon Jul  3 19:46:12 2017	(r320615)
@@ -56,8 +56,22 @@
 #define	NFS_MAXDGRAMDATA 16384
 #define	NFS_MAXPATHLEN	1024
 #define	NFS_MAXNAMLEN	255
+/*
+ * Calculating the maximum XDR overhead for an NFS RPC isn't easy.
+ * NFS_MAXPKTHDR is antiquated and assumes AUTH_SYS over UDP.
+ * NFS_MAXXDR should be sufficient for all NFS versions over TCP.
+ * It includes:
+ * - Maximum RPC message header. It can include 2 400byte authenticators plus
+ *   a machine name of unlimited length, although it is usually relatively
+ *   small.
+ * - XDR overheads for the NFSv4 compound. This can include Owner and
+ *   Owner_group strings, which are usually fairly small, but are allowed
+ *   to be up to 1024 bytes each.
+ * 4096 is overkill, but should always be sufficient.
+ */
 #define	NFS_MAXPKTHDR	404
-#define	NFS_MAXPACKET	(NFS_SRVMAXIO + 2048)
+#define	NFS_MAXXDR	4096
+#define	NFS_MAXPACKET	(NFS_SRVMAXIO + NFS_MAXXDR)
 #define	NFS_MINPACKET	20
 #define	NFS_FABLKSIZE	512	/* Size in bytes of a block wrt fa_blocks */
 #define	NFSV4_MINORVERSION	0	/* V4 Minor version */

Modified: stable/11/sys/fs/nfsclient/nfs_clrpcops.c
==============================================================================
--- stable/11/sys/fs/nfsclient/nfs_clrpcops.c	Mon Jul  3 19:39:58 2017	(r320614)
+++ stable/11/sys/fs/nfsclient/nfs_clrpcops.c	Mon Jul  3 19:46:12 2017	(r320615)
@@ -4615,7 +4615,7 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsc
     struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred,
     NFSPROC_T *p)
 {
-	uint32_t crflags, *tl;
+	uint32_t crflags, maxval, *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	int error, irdcnt;
@@ -4633,8 +4633,8 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsc
 	/* Fill in fore channel attributes. */
 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
 	*tl++ = 0;				/* Header pad size */
-	*tl++ = txdr_unsigned(100000);		/* Max request size */
-	*tl++ = txdr_unsigned(100000);		/* Max response size */
+	*tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */
+	*tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */
 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
 	*tl++ = txdr_unsigned(20);		/* Max operations */
 	*tl++ = txdr_unsigned(64);		/* Max slots */
@@ -4681,7 +4681,26 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsc
 
 		/* Get the fore channel slot count. */
 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
-		tl += 3;		/* Skip the other counts. */		
+		tl++;			/* Skip the header pad size. */
+
+		/* Make sure nm_wsize is small enough. */
+		maxval = fxdr_unsigned(uint32_t, *tl++);
+		while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
+			if (nmp->nm_wsize > 8096)
+				nmp->nm_wsize /= 2;
+			else
+				break;
+		}
+
+		/* Make sure nm_rsize is small enough. */
+		maxval = fxdr_unsigned(uint32_t, *tl++);
+		while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
+			if (nmp->nm_rsize > 8096)
+				nmp->nm_rsize /= 2;
+			else
+				break;
+		}
+
 		sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
 		tl++;
 		sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);

Modified: stable/11/sys/kern/vfs_bio.c
==============================================================================
--- stable/11/sys/kern/vfs_bio.c	Mon Jul  3 19:39:58 2017	(r320614)
+++ stable/11/sys/kern/vfs_bio.c	Mon Jul  3 19:46:12 2017	(r320615)
@@ -131,6 +131,7 @@ static void bufkva_reclaim(vmem_t *, int);
 static void bufkva_free(struct buf *);
 static int buf_import(void *, void **, int, int);
 static void buf_release(void *, void **, int);
+static void maxbcachebuf_adjust(void);
 
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
@@ -245,6 +246,9 @@ SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW,
 SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD,
     &unmapped_buf_allowed, 0,
     "Permit the use of the unmapped i/o");
+int maxbcachebuf = MAXBCACHEBUF;
+SYSCTL_INT(_vfs, OID_AUTO, maxbcachebuf, CTLFLAG_RDTUN, &maxbcachebuf, 0,
+    "Maximum size of a buffer cache block");
 
 /*
  * This lock synchronizes access to bd_request.
@@ -856,6 +860,29 @@ bd_wakeup(void)
 }
 
 /*
+ * Adjust the maxbcachbuf tunable.
+ */
+static void
+maxbcachebuf_adjust(void)
+{
+	int i;
+
+	/*
+	 * maxbcachebuf must be a power of 2 >= MAXBSIZE.
+	 */
+	i = 2;
+	while (i * 2 <= maxbcachebuf)
+		i *= 2;
+	maxbcachebuf = i;
+	if (maxbcachebuf < MAXBSIZE)
+		maxbcachebuf = MAXBSIZE;
+	if (maxbcachebuf > MAXPHYS)
+		maxbcachebuf = MAXPHYS;
+	if (bootverbose != 0 && maxbcachebuf != MAXBCACHEBUF)
+		printf("maxbcachebuf=%d\n", maxbcachebuf);
+}
+
+/*
  * bd_speedup - speedup the buffer cache flushing code
  */
 void
@@ -902,6 +929,7 @@ kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
 	 */
 	physmem_est = physmem_est * (PAGE_SIZE / 1024);
 
+	maxbcachebuf_adjust();
 	/*
 	 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
 	 * For the first 64MB of ram nominally allocate sufficient buffers to
@@ -1012,7 +1040,9 @@ bufinit(void)
 	struct buf *bp;
 	int i;
 
-	CTASSERT(MAXBCACHEBUF >= MAXBSIZE);
+	KASSERT(maxbcachebuf >= MAXBSIZE,
+	    ("maxbcachebuf (%d) must be >= MAXBSIZE (%d)\n", maxbcachebuf,
+	    MAXBSIZE));
 	mtx_init(&bqlocks[QUEUE_DIRTY], "bufq dirty lock", NULL, MTX_DEF);
 	mtx_init(&bqlocks[QUEUE_EMPTY], "bufq empty lock", NULL, MTX_DEF);
 	for (i = QUEUE_CLEAN; i < QUEUE_CLEAN + CLEAN_QUEUES; i++)
@@ -1059,7 +1089,7 @@ bufinit(void)
 	 * PAGE_SIZE.
 	 */
 	maxbufspace = (long)nbuf * BKVASIZE;
-	hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBCACHEBUF * 10);
+	hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - maxbcachebuf * 10);
 	lobufspace = (hibufspace / 20) * 19; /* 95% */
 	bufspacethresh = lobufspace + (hibufspace - lobufspace) / 2;
 
@@ -1071,9 +1101,9 @@ bufinit(void)
 	 * The lower 1 MiB limit is the historical upper limit for
 	 * hirunningspace.
 	 */
-	hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBCACHEBUF),
+	hirunningspace = lmax(lmin(roundup(hibufspace / 64, maxbcachebuf),
 	    16 * 1024 * 1024), 1024 * 1024);
-	lorunningspace = roundup((hirunningspace * 2) / 3, MAXBCACHEBUF);
+	lorunningspace = roundup((hirunningspace * 2) / 3, maxbcachebuf);
 
 	/*
 	 * Limit the amount of malloc memory since it is wired permanently into
@@ -3498,9 +3528,9 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int 
 	KASSERT((flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC,
 	    ("GB_KVAALLOC only makes sense with GB_UNMAPPED"));
 	ASSERT_VOP_LOCKED(vp, "getblk");
-	if (size > MAXBCACHEBUF)
-		panic("getblk: size(%d) > MAXBCACHEBUF(%d)\n", size,
-		    MAXBCACHEBUF);
+	if (size > maxbcachebuf)
+		panic("getblk: size(%d) > maxbcachebuf(%d)\n", size,
+		    maxbcachebuf);
 	if (!unmapped_buf_allowed)
 		flags &= ~(GB_UNMAPPED | GB_KVAALLOC);
 

Modified: stable/11/sys/sys/buf.h
==============================================================================
--- stable/11/sys/sys/buf.h	Mon Jul  3 19:39:58 2017	(r320614)
+++ stable/11/sys/sys/buf.h	Mon Jul  3 19:46:12 2017	(r320615)
@@ -459,6 +459,7 @@ buf_countdeps(struct buf *bp, int i)
 extern int	nbuf;			/* The number of buffer headers */
 extern long	maxswzone;		/* Max KVA for swap structures */
 extern long	maxbcache;		/* Max KVA for buffer cache */
+extern int	maxbcachebuf;		/* Max buffer cache block size */
 extern long	runningbufspace;
 extern long	hibufspace;
 extern int	dirtybufthresh;

Modified: stable/11/sys/sys/param.h
==============================================================================
--- stable/11/sys/sys/param.h	Mon Jul  3 19:39:58 2017	(r320614)
+++ stable/11/sys/sys/param.h	Mon Jul  3 19:46:12 2017	(r320615)
@@ -244,9 +244,7 @@
  *		Filesystems can of course request smaller chunks.  Actual
  *		backing memory uses a chunk size of a page (PAGE_SIZE).
  *		The default value here can be overridden on a per-architecture
- *		basis by defining it in <machine/param.h>.  This should
- *		probably be done to increase its value, when MAXBCACHEBUF is
- *		defined as a larger value in <machine/param.h>.
+ *		basis by defining it in <machine/param.h>.
  *
  *		If you make BKVASIZE too small you risk seriously fragmenting
  *		the buffer KVM map which may slow things down a bit.  If you



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201707031946.v63JkCUS072818>