Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 29 Nov 2013 18:50:00 -0500 (EST)
From:      Rick Macklem <rmacklem@uoguelph.ca>
To:        FreeBSD FS <freebsd-fs@freebsd.org>
Cc:        Kostik Belousov <kib@freebsd.org>
Subject:   Re: RFC: NFS client patch to reduce sychronous writes
Message-ID:  <5797959.23550239.1385769000059.JavaMail.root@uoguelph.ca>
In-Reply-To: <20131128191851.GM59496@kib.kiev.ua>

next in thread | previous in thread | raw e-mail | index | archive | help
Ok, I've put an updated patch here:
  http://people.freebsd.org/~rmacklem/noncontig-write.patch

It now enables the change which allows non-contiguous byte
range writes within the same buffer cache block when a mount
option I called "noncontigwr" is given to a mount point.
(It still disables this if there has been a file lock applied
 to the file since it was opened by the process(es) that currently
 have the file open and/or mmap()'d.)

If anyone has a suggestion for a better name for the option,
please suggest it.

I didn't do a count for when the non-contiguous writes happen,
since I suspect it will happen on most any mount point that
uses the option (and it seemed to be too NFS specific and
obscure for "mount -v" imho).

So, does this sound reasonable to commit to head?

rick
--- here is the patch, in case you want to look at it ---
--- fs/nfsclient/nfs_clbio.c.orig	2013-08-28 18:45:41.000000000 -0400
+++ fs/nfsclient/nfs_clbio.c	2013-11-28 19:59:30.000000000 -0500
@@ -874,7 +874,7 @@ ncl_write(struct vop_write_args *ap)
 	struct vattr vattr;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	daddr_t lbn;
-	int bcount;
+	int bcount, noncontig_write, obcount;
 	int bp_cached, n, on, error = 0, error1;
 	size_t orig_resid, local_resid;
 	off_t orig_size, tmp_off;
@@ -1037,7 +1037,15 @@ again:
 		 * unaligned buffer size.
 		 */
 		mtx_lock(&np->n_mtx);
-		if (uio->uio_offset == np->n_size && n) {
+		if ((np->n_flag & NHASBEENLOCKED) == 0 &&
+		    (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0)
+			noncontig_write = 1;
+		else
+			noncontig_write = 0;
+		if ((uio->uio_offset == np->n_size ||
+		    (noncontig_write != 0 &&
+		    lbn == (np->n_size / biosize) &&
+		    uio->uio_offset + n > np->n_size)) && n) {
 			mtx_unlock(&np->n_mtx);
 			/*
 			 * Get the buffer (in its pre-append state to maintain
@@ -1045,8 +1053,8 @@ again:
 			 * nfsnode after we have locked the buffer to prevent
 			 * readers from reading garbage.
 			 */
-			bcount = on;
-			bp = nfs_getcacheblk(vp, lbn, bcount, td);
+			obcount = np->n_size - (lbn * biosize);
+			bp = nfs_getcacheblk(vp, lbn, obcount, td);
 
 			if (bp != NULL) {
 				long save;
@@ -1058,9 +1066,12 @@ again:
 				mtx_unlock(&np->n_mtx);
 
 				save = bp->b_flags & B_CACHE;
-				bcount += n;
+				bcount = on + n;
 				allocbuf(bp, bcount);
 				bp->b_flags |= save;
+				if (noncontig_write != 0 && on > obcount)
+					vfs_bio_bzero_buf(bp, obcount, on -
+					    obcount);
 			}
 		} else {
 			/*
@@ -1159,19 +1170,23 @@ again:
 		 * area, just update the b_dirtyoff and b_dirtyend,
 		 * otherwise force a write rpc of the old dirty area.
 		 *
+		 * If there has been a file lock applied to this file
+		 * or vfs.nfs.old_noncontig_writing is set, do the following:
 		 * While it is possible to merge discontiguous writes due to
 		 * our having a B_CACHE buffer ( and thus valid read data
 		 * for the hole), we don't because it could lead to
 		 * significant cache coherency problems with multiple clients,
 		 * especially if locking is implemented later on.
 		 *
-		 * As an optimization we could theoretically maintain
-		 * a linked list of discontinuous areas, but we would still
-		 * have to commit them separately so there isn't much
-		 * advantage to it except perhaps a bit of asynchronization.
+		 * If vfs.nfs.old_noncontig_writing is not set and there has
+		 * not been file locking done on this file:
+		 * Relax coherency a bit for the sake of performance and
+		 * expand the current dirty region to contain the new
+		 * write even if it means we mark some non-dirty data as
+		 * dirty.
 		 */
 
-		if (bp->b_dirtyend > 0 &&
+		if (noncontig_write == 0 && bp->b_dirtyend > 0 &&
 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
 			if (bwrite(bp) == EINTR) {
 				error = EINTR;
--- fs/nfsclient/nfsnode.h.orig	2013-11-19 18:17:37.000000000 -0500
+++ fs/nfsclient/nfsnode.h	2013-11-25 21:29:58.000000000 -0500
@@ -157,6 +157,7 @@ struct nfsnode {
 #define	NLOCKWANT	0x00010000  /* Want the sleep lock */
 #define	NNOLAYOUT	0x00020000  /* Can't get a layout for this file */
 #define	NWRITEOPENED	0x00040000  /* Has been opened for writing */
+#define	NHASBEENLOCKED	0x00080000  /* Has been file locked. */
 
 /*
  * Convert between nfsnode pointers and vnode pointers
--- fs/nfsclient/nfs_clvnops.c.orig	2013-11-19 18:19:42.000000000 -0500
+++ fs/nfsclient/nfs_clvnops.c	2013-11-25 21:32:47.000000000 -0500
@@ -3079,6 +3079,10 @@ nfs_advlock(struct vop_advlock_args *ap)
 					np->n_change = va.va_filerev;
 				}
 			}
+			/* Mark that a file lock has been acquired. */
+			mtx_lock(&np->n_mtx);
+			np->n_flag |= NHASBEENLOCKED;
+			mtx_unlock(&np->n_mtx);
 		}
 		NFSVOPUNLOCK(vp, 0);
 		return (0);
@@ -3098,6 +3102,12 @@ nfs_advlock(struct vop_advlock_args *ap)
 				error = ENOLCK;
 			}
 		}
+		if (error == 0 && ap->a_op == F_SETLK) {
+			/* Mark that a file lock has been acquired. */
+			mtx_lock(&np->n_mtx);
+			np->n_flag |= NHASBEENLOCKED;
+			mtx_unlock(&np->n_mtx);
+		}
 	}
 	return (error);
 }
--- fs/nfsclient/nfs_clvfsops.c.orig	2013-11-28 20:00:58.000000000 -0500
+++ fs/nfsclient/nfs_clvfsops.c	2013-11-28 20:06:32.000000000 -0500
@@ -719,7 +719,8 @@ static const char *nfs_opts[] = { "from"
     "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
     "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
     "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion",
-    "nametimeo", "negnametimeo", "nocto", "pnfs", "wcommitsize",
+    "nametimeo", "negnametimeo", "nocto", "noncontigwr", "pnfs",
+    "wcommitsize",
     NULL };
 
 /*
@@ -840,6 +841,8 @@ nfs_mount(struct mount *mp)
 		args.flags |= NFSMNT_ALLGSSNAME;
 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
 		args.flags |= NFSMNT_NOCTO;
+	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
+		args.flags |= NFSMNT_NONCONTIGWR;
 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
 		args.flags |= NFSMNT_PNFS;
 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
@@ -1792,6 +1795,8 @@ void nfscl_retopts(struct nfsmount *nmp,
 	    &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
 	    &blen);
+	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
+	    ",noncontigwr", &buf, &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
 	    0, ",lockd", &buf, &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
--- nfsclient/nfsargs.h.orig	2013-11-28 19:53:56.000000000 -0500
+++ nfsclient/nfsargs.h	2013-11-28 19:56:38.000000000 -0500
@@ -99,5 +99,6 @@ struct nfs_args {
 #define	NFSMNT_STRICT3530	0x10000000 /* Adhere strictly to RFC3530 */
 #define	NFSMNT_NOCTO		0x20000000 /* Don't flush attrcache on open */
 #define	NFSMNT_PNFS		0x40000000 /* Enable pNFS support */
+#define	NFSMNT_NONCONTIGWR	0x80000000 /* Enable non-contiguous writes */
 
 #endif



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?5797959.23550239.1385769000059.JavaMail.root>