Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 03 Sep 2019 14:06:12 -0000
From:      Alan Somers <asomers@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r345876 - in projects/fuse2: sys/fs/fuse tests/sys/fs/fusefs
Message-ID:  <201904041651.x34GpYcs017250@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: asomers
Date: Thu Apr  4 16:51:34 2019
New Revision: 345876
URL: https://svnweb.freebsd.org/changeset/base/345876

Log:
  fusefs: correctly handle short writes
  
  If a FUSE daemon returns FOPEN_DIRECT_IO when a file is opened, then it's
  allowed to write less data than was requested during a FUSE_WRITE operation
  on that file handle.  fusefs should simply return a short write to userland.
  
  The old code attempted to resend the unsent data.  Not only was that
  incorrect behavior, but it did it in an ineffective way, by attempting to
  "rewind" the uio and uiomove the unsent data again.
  
  This commit correctly handles short writes by returning directly to
  userland if FOPEN_DIRECT_IO was set.  If it wasn't set (making the short
  write technically a protocol violation), then we resend the unsent data.
  But instead of rewinding the uio, just resend the data that's already in the
  kernel.
  
  That necessitated a few changes to fuse_ipc.c to reduce the amount of bzero
  activity.  fusefs may be marginally faster as a result.
  
  PR:		236381
  Sponsored by:	The FreeBSD Foundation

Modified:
  projects/fuse2/sys/fs/fuse/fuse_internal.c
  projects/fuse2/sys/fs/fuse/fuse_io.c
  projects/fuse2/sys/fs/fuse/fuse_ipc.c
  projects/fuse2/sys/fs/fuse/fuse_ipc.h
  projects/fuse2/sys/fs/fuse/fuse_vnops.c
  projects/fuse2/tests/sys/fs/fusefs/mockfs.cc
  projects/fuse2/tests/sys/fs/fusefs/write.cc

Modified: projects/fuse2/sys/fs/fuse/fuse_internal.c
==============================================================================
--- projects/fuse2/sys/fs/fuse/fuse_internal.c	Thu Apr  4 16:32:27 2019	(r345875)
+++ projects/fuse2/sys/fs/fuse/fuse_internal.c	Thu Apr  4 16:51:34 2019	(r345876)
@@ -282,7 +282,7 @@ fuse_internal_fsync(struct vnode *vp,
     int waitfor,
     bool datasync)
 {
-	struct fuse_fsync_in *ffsi;
+	struct fuse_fsync_in *ffsi = NULL;
 	struct fuse_dispatcher fdi;
 	struct fuse_filehandle *fufh;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
@@ -295,15 +295,20 @@ fuse_internal_fsync(struct vnode *vp,
 	}
 	if (vnode_isdir(vp))
 		op = FUSE_FSYNCDIR;
+
+	fdisp_init(&fdi, sizeof(*ffsi));
 	/*
 	 * fsync every open file handle for this file, because we can't be sure
 	 * which file handle the caller is really referring to.
 	 */
 	LIST_FOREACH(fufh, &fvdat->handles, next) {
-		fdisp_init(&fdi, sizeof(*ffsi));
-		fdisp_make_vp(&fdi, op, vp, td, NULL);
+		if (ffsi == NULL)
+			fdisp_make_vp(&fdi, op, vp, td, NULL);
+		else
+			fdisp_refresh_vp(&fdi, op, vp, td, NULL);
 		ffsi = fdi.indata;
 		ffsi->fh = fufh->fh_id;
+		ffsi->fsync_flags = 0;
 
 		if (datasync)
 			ffsi->fsync_flags = 1;
@@ -315,8 +320,8 @@ fuse_internal_fsync(struct vnode *vp,
 				fuse_internal_fsync_callback);
 			fuse_insert_message(fdi.tick);
 		}
-		fdisp_destroy(&fdi);
 	}
+	fdisp_destroy(&fdi);
 
 	return err;
 }
@@ -331,7 +336,7 @@ fuse_internal_readdir(struct vnode *vp,
 {
 	int err = 0;
 	struct fuse_dispatcher fdi;
-	struct fuse_read_in *fri;
+	struct fuse_read_in *fri = NULL;
 
 	if (uio_resid(uio) == 0) {
 		return 0;
@@ -344,9 +349,11 @@ fuse_internal_readdir(struct vnode *vp,
 	 */
 
 	while (uio_resid(uio) > 0) {
-
 		fdi.iosize = sizeof(*fri);
-		fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
+		if (fri == NULL)
+			fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
+		else
+			fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
 
 		fri = fdi.indata;
 		fri->fh = fufh->fh_id;
@@ -419,8 +426,8 @@ fuse_internal_readdir_processdata(struct uio *uio,
 			err = -1;
 			break;
 		}
-		fiov_refresh(cookediov);
 		fiov_adjust(cookediov, bytesavail);
+		bzero(cookediov->base, bytesavail);
 
 		de = (struct dirent *)cookediov->base;
 		de->d_fileno = fudge->ino;

Modified: projects/fuse2/sys/fs/fuse/fuse_io.c
==============================================================================
--- projects/fuse2/sys/fs/fuse/fuse_io.c	Thu Apr  4 16:32:27 2019	(r345875)
+++ projects/fuse2/sys/fs/fuse/fuse_io.c	Thu Apr  4 16:51:34 2019	(r345876)
@@ -341,10 +341,14 @@ fuse_write_directbackend(struct vnode *vp, struct uio 
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_write_in *fwi;
+	struct fuse_write_out *fwo;
 	struct fuse_dispatcher fdi;
 	size_t chunksize;
+	void *fwi_data;
+	off_t as_written_offset;
 	int diff;
 	int err = 0;
+	bool direct_io = fufh->fuse_open_flags & FOPEN_DIRECT_IO;
 
 	if (uio->uio_resid == 0)
 		return (0);
@@ -364,36 +368,61 @@ fuse_write_directbackend(struct vnode *vp, struct uio 
 		fwi->fh = fufh->fh_id;
 		fwi->offset = uio->uio_offset;
 		fwi->size = chunksize;
+		fwi_data = (char *)fdi.indata + sizeof(*fwi);
 
-		if ((err = uiomove((char *)fdi.indata + sizeof(*fwi),
-		    chunksize, uio)))
+		if ((err = uiomove(fwi_data, chunksize, uio)))
 			break;
 
+retry:
 		if ((err = fdisp_wait_answ(&fdi)))
 			break;
 
+		fwo = ((struct fuse_write_out *)fdi.answ);
+
 		/* Adjust the uio in the case of short writes */
-		diff = chunksize - ((struct fuse_write_out *)fdi.answ)->size;
-		if (diff < 0) {
-			err = EINVAL;
-			break;
-		} else if (diff > 0 && !(ioflag & IO_DIRECT)) {
-			/* 
-			 * XXX We really should be directly checking whether
-			 * the file was opened with FOPEN_DIRECT_IO, not
-			 * IO_DIRECT.  IO_DIRECT can be set in multiple ways.
-			 */
-			SDT_PROBE2(fuse, , io, trace, 1,
-				"misbehaving filesystem: short writes are only "
-				"allowed with direct_io");
-		}
-		uio->uio_resid += diff;
-		uio->uio_offset -= diff;
+		diff = fwi->size - fwo->size;
+		as_written_offset = uio->uio_offset - diff;
 
-		if (uio->uio_offset > fvdat->filesize &&
+		if (as_written_offset - diff > fvdat->filesize &&
 		    fuse_data_cache_mode != FUSE_CACHE_UC) {
-			fuse_vnode_setsize(vp, cred, uio->uio_offset);
+			fuse_vnode_setsize(vp, cred, as_written_offset);
 			fvdat->flag &= ~FN_SIZECHANGE;
+		}
+
+		if (diff < 0) {
+			printf("WARNING: misbehaving FUSE filesystem "
+				"wrote more data than we provided it\n");
+			err = EINVAL;
+			break;
+		} else if (diff > 0) {
+			/* Short write */
+			if (!direct_io) {
+				printf("WARNING: misbehaving FUSE filesystem: "
+					"short writes are only allowed with "
+					"direct_io\n");
+			}
+			if (ioflag & IO_DIRECT) {
+				/* Return early */
+				uio->uio_resid += diff;
+				uio->uio_offset -= diff;
+				break;
+			} else {
+				/* Resend the unwritten portion of data */
+				fdi.iosize = sizeof(*fwi) + diff;
+				/* Refresh fdi without clearing data buffer */
+				fdisp_refresh_vp(&fdi, FUSE_WRITE, vp,
+					uio->uio_td, cred);
+				fwi = fdi.indata;
+				MPASS2(fwi == fdi.indata, "FUSE dispatcher "
+					"reallocated despite no increase in "
+					"size?");
+				void *src = (char*)fwi_data + fwo->size;
+				memmove(fwi_data, src, diff);
+				fwi->fh = fufh->fh_id;
+				fwi->offset = as_written_offset;
+				fwi->size = diff;
+				goto retry;
+			}
 		}
 	}
 

Modified: projects/fuse2/sys/fs/fuse/fuse_ipc.c
==============================================================================
--- projects/fuse2/sys/fs/fuse/fuse_ipc.c	Thu Apr  4 16:32:27 2019	(r345875)
+++ projects/fuse2/sys/fs/fuse/fuse_ipc.c	Thu Apr  4 16:51:34 2019	(r345876)
@@ -92,6 +92,7 @@ SDT_PROVIDER_DECLARE(fuse);
  */
 SDT_PROBE_DEFINE2(fuse, , ipc, trace, "int", "char*");
 
+static void fiov_clear(struct fuse_iov *fiov);
 static struct fuse_ticket *fticket_alloc(struct fuse_data *data);
 static void fticket_refresh(struct fuse_ticket *ftick);
 static void fticket_destroy(struct fuse_ticket *ftick);
@@ -185,11 +186,19 @@ fiov_adjust(struct fuse_iov *fiov, size_t size)
 	fiov->len = size;
 }
 
+/* Clear the fiov's data buffer */
+static void
+fiov_clear(struct fuse_iov *fiov)
+{
+	bzero(fiov->base, fiov->len);
+}
+
+/* Resize the fiov if needed, and clear it's buffer */
 void
 fiov_refresh(struct fuse_iov *fiov)
 {
-	bzero(fiov->base, fiov->len);
 	fiov_adjust(fiov, 0);
+	bzero(fiov->base, fiov->len);
 }
 
 static int
@@ -267,7 +276,7 @@ fticket_destroy(struct fuse_ticket *ftick)
 	return uma_zfree(ticket_zone, ftick);
 }
 
-static	inline
+static inline
 void
 fticket_refresh(struct fuse_ticket *ftick)
 {
@@ -290,6 +299,27 @@ fticket_refresh(struct fuse_ticket *ftick)
 	ftick->tk_flag = 0;
 }
 
+/* Prepar the ticket to be reused, but don't clear its data buffers */
+static inline void
+fticket_reset(struct fuse_ticket *ftick)
+{
+	FUSE_ASSERT_MS_DONE(ftick);
+	FUSE_ASSERT_AW_DONE(ftick);
+
+	ftick->tk_ms_bufdata = NULL;
+	ftick->tk_ms_bufsize = 0;
+	ftick->tk_ms_type = FT_M_FIOV;
+
+	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
+
+	ftick->tk_aw_errno = 0;
+	ftick->tk_aw_bufdata = NULL;
+	ftick->tk_aw_bufsize = 0;
+	ftick->tk_aw_type = FT_A_FIOV;
+
+	ftick->tk_flag = 0;
+}
+
 static int
 fticket_wait_answer(struct fuse_ticket *ftick)
 {
@@ -703,7 +733,26 @@ fuse_standard_handler(struct fuse_ticket *ftick, struc
 	return err;
 }
 
-void
+/*
+ * Reinitialize a dispatcher from a pid and node id, without resizing or
+ * clearing its data buffers
+ */
+static void
+fdisp_refresh_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
+    struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred)
+{
+	MPASS(fdip->tick);
+	fticket_reset(fdip->tick);
+
+	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
+	    fdip->indata, fdip->iosize);
+
+	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid,
+		cred);
+}
+
+/* Initialize a dispatcher from a pid and node id */
+static void
 fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred)
 {
@@ -737,6 +786,22 @@ fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_
 	RECTIFY_TDCR(td, cred);
 	return fdisp_make_pid(fdip, op, vnode_mount(vp), VTOI(vp),
 	    td->td_proc->p_pid, cred);
+}
+
+/* Refresh a fuse_dispatcher so it can be reused, but don't zero its data */
+void
+fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
+    struct vnode *vp, struct thread *td, struct ucred *cred)
+{
+	RECTIFY_TDCR(td, cred);
+	return fdisp_refresh_pid(fdip, op, vnode_mount(vp), VTOI(vp),
+	    td->td_proc->p_pid, cred);
+}
+
+void
+fdisp_refresh(struct fuse_dispatcher *fdip)
+{
+	fticket_refresh(fdip->tick);
 }
 
 SDT_PROBE_DEFINE2(fuse, , ipc, fdisp_wait_answ_error, "char*", "int");

Modified: projects/fuse2/sys/fs/fuse/fuse_ipc.h
==============================================================================
--- projects/fuse2/sys/fs/fuse/fuse_ipc.h	Thu Apr  4 16:32:27 2019	(r345875)
+++ projects/fuse2/sys/fs/fuse/fuse_ipc.h	Thu Apr  4 16:51:34 2019	(r345876)
@@ -374,13 +374,15 @@ fdisp_destroy(struct fuse_dispatcher *fdisp)
 #endif
 }
 
+void fdisp_refresh(struct fuse_dispatcher *fdip);
+
 void fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct mount *mp, uint64_t nid, struct thread *td, struct ucred *cred);
 
-void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
-    struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred);
-
 void fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
+    struct vnode *vp, struct thread *td, struct ucred *cred);
+
+void fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct vnode *vp, struct thread *td, struct ucred *cred);
 
 int fdisp_wait_answ(struct fuse_dispatcher *fdip);

Modified: projects/fuse2/sys/fs/fuse/fuse_vnops.c
==============================================================================
--- projects/fuse2/sys/fs/fuse/fuse_vnops.c	Thu Apr  4 16:32:27 2019	(r345875)
+++ projects/fuse2/sys/fs/fuse/fuse_vnops.c	Thu Apr  4 16:51:34 2019	(r345876)
@@ -2313,9 +2313,10 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap)
 	/*
 	 * Retrieve Linux / FUSE compatible list values.
 	 */
-	fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
+	fdisp_refresh_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
 	list_xattr_in = fdi.indata;
 	list_xattr_in->size = linux_list_len + sizeof(*list_xattr_out);
+	list_xattr_in->flags = 0;
 	attr_str = (char *)fdi.indata + sizeof(*list_xattr_in);
 	snprintf(attr_str, len, "%s%c", prefix, extattr_namespace_separator);
 

Modified: projects/fuse2/tests/sys/fs/fusefs/mockfs.cc
==============================================================================
--- projects/fuse2/tests/sys/fs/fusefs/mockfs.cc	Thu Apr  4 16:32:27 2019	(r345875)
+++ projects/fuse2/tests/sys/fs/fusefs/mockfs.cc	Thu Apr  4 16:51:34 2019	(r345876)
@@ -246,7 +246,8 @@ void debug_fuseop(const mockfs_buf_in *in)
 			printf(" %s=%s", name, value);
 			break;
 		case FUSE_WRITE:
-			printf(" offset=%lu size=%u flags=%u",
+			printf(" fh=%#lx offset=%lu size=%u flags=%u",
+				in->body.write.fh,
 				in->body.write.offset, in->body.write.size,
 				in->body.write.write_flags);
 			break;

Modified: projects/fuse2/tests/sys/fs/fusefs/write.cc
==============================================================================
--- projects/fuse2/tests/sys/fs/fusefs/write.cc	Thu Apr  4 16:32:27 2019	(r345875)
+++ projects/fuse2/tests/sys/fs/fusefs/write.cc	Thu Apr  4 16:51:34 2019	(r345876)
@@ -270,12 +270,43 @@ TEST_F(Write, DISABLED_direct_io_evicts_cache)
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
+/*
+ * If the server doesn't return FOPEN_DIRECT_IO during FUSE_OPEN, then it's not
+ * allowed to return a short write for that file handle.  However, if it does
+ * then we should still do our darndest to handle it by resending the unwritten
+ * portion.
+ */
+TEST_F(Write, indirect_io_short_write)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const char *CONTENTS = "abcdefghijklmnop";
+	uint64_t ino = 42;
+	int fd;
+	ssize_t bufsize = strlen(CONTENTS);
+	ssize_t bufsize0 = 11;
+	ssize_t bufsize1 = strlen(CONTENTS) - bufsize0;
+	const char *contents1 = CONTENTS + bufsize0;
+
+	expect_lookup(RELPATH, ino, 0);
+	expect_open(ino, 0, 1);
+	expect_getattr(ino, 0);
+	expect_write(ino, 0, bufsize, bufsize0, 0, CONTENTS);
+	expect_write(ino, bufsize0, bufsize1, bufsize1, 0,
+		contents1);
+
+	fd = open(FULLPATH, O_WRONLY);
+	EXPECT_LE(0, fd) << strerror(errno);
+
+	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
+	/* Deliberately leak fd.  close(2) will be tested in release.cc */
+}
+
 /* 
  * When the direct_io option is used, filesystems are allowed to write less
- * data than requested
+ * data than requested.  We should return the short write to userland.
  */
-/* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236381 */
-TEST_F(Write, DISABLED_direct_io_short_write)
+TEST_F(Write, direct_io_short_write)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
@@ -284,19 +315,16 @@ TEST_F(Write, DISABLED_direct_io_short_write)
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 	ssize_t halfbufsize = bufsize / 2;
-	const char *halfcontents = CONTENTS + halfbufsize;
 
 	expect_lookup(RELPATH, ino, 0);
 	expect_open(ino, FOPEN_DIRECT_IO, 1);
 	expect_getattr(ino, 0);
 	expect_write(ino, 0, bufsize, halfbufsize, 0, CONTENTS);
-	expect_write(ino, halfbufsize, halfbufsize, halfbufsize, 0,
-		halfcontents);
 
 	fd = open(FULLPATH, O_WRONLY);
 	EXPECT_LE(0, fd) << strerror(errno);
 
-	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
+	ASSERT_EQ(halfbufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
@@ -305,15 +333,13 @@ TEST_F(Write, DISABLED_direct_io_short_write)
  * difference between what we requested and what it actually wrote crosses an
  * iov element boundary
  */
-/* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236381 */
-TEST_F(Write, DISABLED_direct_io_short_write_iov)
+TEST_F(Write, direct_io_short_write_iov)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS0 = "abcdefgh";
 	const char *CONTENTS1 = "ijklmnop";
 	const char *EXPECTED0 = "abcdefghijklmnop";
-	const char *EXPECTED1 = "hijklmnop";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t size0 = strlen(CONTENTS0) - 1;
@@ -325,7 +351,6 @@ TEST_F(Write, DISABLED_direct_io_short_write_iov)
 	expect_open(ino, FOPEN_DIRECT_IO, 1);
 	expect_getattr(ino, 0);
 	expect_write(ino, 0, totalsize, size0, 0, EXPECTED0);
-	expect_write(ino, size0, size1, size1, 0, EXPECTED1);
 
 	fd = open(FULLPATH, O_WRONLY);
 	EXPECT_LE(0, fd) << strerror(errno);
@@ -334,7 +359,7 @@ TEST_F(Write, DISABLED_direct_io_short_write_iov)
 	iov[0].iov_len = strlen(CONTENTS0);
 	iov[1].iov_base = (void*)CONTENTS1;
 	iov[1].iov_len = strlen(CONTENTS1);
-	ASSERT_EQ(totalsize, writev(fd, iov, 2)) << strerror(errno);
+	ASSERT_EQ(size0, writev(fd, iov, 2)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 





Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201904041651.x34GpYcs017250>