Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 12 Apr 2016 22:58:40 +0000 (UTC)
From:      Rick Macklem <rmacklem@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r297887 - projects/pnfs-server/sys/fs/fuse
Message-ID:  <201604122258.u3CMweir079752@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: rmacklem
Date: Tue Apr 12 22:58:40 2016
New Revision: 297887
URL: https://svnweb.freebsd.org/changeset/base/297887

Log:
  Check in the fuse changes that allow it to export the file system via NFS.

Modified:
  projects/pnfs-server/sys/fs/fuse/fuse_file.c
  projects/pnfs-server/sys/fs/fuse/fuse_file.h
  projects/pnfs-server/sys/fs/fuse/fuse_internal.c
  projects/pnfs-server/sys/fs/fuse/fuse_internal.h
  projects/pnfs-server/sys/fs/fuse/fuse_io.c
  projects/pnfs-server/sys/fs/fuse/fuse_ipc.c
  projects/pnfs-server/sys/fs/fuse/fuse_node.c
  projects/pnfs-server/sys/fs/fuse/fuse_node.h
  projects/pnfs-server/sys/fs/fuse/fuse_vfsops.c
  projects/pnfs-server/sys/fs/fuse/fuse_vnops.c

Modified: projects/pnfs-server/sys/fs/fuse/fuse_file.c
==============================================================================
--- projects/pnfs-server/sys/fs/fuse/fuse_file.c	Tue Apr 12 22:55:47 2016	(r297886)
+++ projects/pnfs-server/sys/fs/fuse/fuse_file.c	Tue Apr 12 22:58:40 2016	(r297887)
@@ -88,6 +88,8 @@ static int fuse_fh_count = 0;
 SYSCTL_INT(_vfs_fuse, OID_AUTO, filehandle_count, CTLFLAG_RD,
     &fuse_fh_count, 0, "");
 
+extern int fuse_force_directio;
+
 int
 fuse_filehandle_open(struct vnode *vp,
     fufh_type_t fufh_type,
@@ -141,7 +143,11 @@ fuse_filehandle_open(struct vnode *vp,
 	foo = fdi.answ;
 
 	fuse_filehandle_init(vp, fufh_type, fufhp, foo->fh);
-	fuse_vnode_open(vp, foo->open_flags, td);
+
+	if (fufh_type == FUFH_WRONLY || fuse_force_directio > 0)
+		fuse_vnode_open(vp, foo->open_flags | FOPEN_DIRECT_IO, td);
+	else
+		fuse_vnode_open(vp, foo->open_flags, td);
 
 out:
 	fdisp_destroy(&fdi);
@@ -206,6 +212,28 @@ fuse_filehandle_valid(struct vnode *vp, 
 	return FUFH_IS_VALID(fufh);
 }
 
+/*
+ * Check for a valid file handle, first the type requested, but if that
+ * isn't valid, try for FUFH_RDWR.
+ * Return the FUFH type that is valid or FUFH_INVALID if there are none.
+ * This is a variant of fuse_filehandle_vaild() analygous to
+ * fuse_filehandle_getrw().
+ */
+fufh_type_t
+fuse_filehandle_validrw(struct vnode *vp, fufh_type_t fufh_type)
+{
+	struct fuse_vnode_data *fvdat = VTOFUD(vp);
+	struct fuse_filehandle *fufh;
+
+	fufh = &fvdat->fufh[fufh_type];
+	if (FUFH_IS_VALID(fufh) != 0)
+		return (fufh_type);
+	fufh = &fvdat->fufh[FUFH_RDWR];
+	if (FUFH_IS_VALID(fufh) != 0)
+		return (FUFH_RDWR);
+	return (FUFH_INVALID);
+}
+
 int
 fuse_filehandle_get(struct vnode *vp, fufh_type_t fufh_type,
     struct fuse_filehandle **fufhp)

Modified: projects/pnfs-server/sys/fs/fuse/fuse_file.h
==============================================================================
--- projects/pnfs-server/sys/fs/fuse/fuse_file.h	Tue Apr 12 22:55:47 2016	(r297886)
+++ projects/pnfs-server/sys/fs/fuse/fuse_file.h	Tue Apr 12 22:58:40 2016	(r297887)
@@ -137,6 +137,7 @@ fuse_filehandle_xlate_to_oflags(fufh_typ
 }
 
 int fuse_filehandle_valid(struct vnode *vp, fufh_type_t fufh_type);
+fufh_type_t fuse_filehandle_validrw(struct vnode *vp, fufh_type_t fufh_type);
 int fuse_filehandle_get(struct vnode *vp, fufh_type_t fufh_type,
                         struct fuse_filehandle **fufhp);
 int fuse_filehandle_getrw(struct vnode *vp, fufh_type_t fufh_type,

Modified: projects/pnfs-server/sys/fs/fuse/fuse_internal.c
==============================================================================
--- projects/pnfs-server/sys/fs/fuse/fuse_internal.c	Tue Apr 12 22:55:47 2016	(r297886)
+++ projects/pnfs-server/sys/fs/fuse/fuse_internal.c	Tue Apr 12 22:58:40 2016	(r297887)
@@ -251,16 +251,19 @@ fuse_internal_fsync(struct vnode *vp,
 int
 fuse_internal_readdir(struct vnode *vp,
     struct uio *uio,
+    off_t startoff,
     struct fuse_filehandle *fufh,
-    struct fuse_iov *cookediov)
+    struct fuse_iov *cookediov,
+    int *ncookies,
+    u_long *cookies)
 {
 	int err = 0;
 	struct fuse_dispatcher fdi;
 	struct fuse_read_in *fri;
+	int fnd_start;
 
-	if (uio_resid(uio) == 0) {
-		return 0;
-	}
+	if (uio_resid(uio) == 0)
+		return (0);
 	fdisp_init(&fdi, 0);
 
 	/*
@@ -268,8 +271,18 @@ fuse_internal_readdir(struct vnode *vp,
 	 * I/O).
 	 */
 
+	/*
+	 * fnd_start is set non-zero once the offset in the directory gets
+	 * to the startoff.  This is done because directories must be read
+	 * from the beginning (offset == 0) when fuse_vnop_readdir() needs
+	 * to do an open of the directory.
+	 * If it is not set non-zero here, it will be set non-zero in
+	 * fuse_internal_readdir_processdata() when uio_offset == startoff.
+	 */
+	fnd_start = 0;
+	if (uio->uio_offset == startoff)
+		fnd_start = 1;
 	while (uio_resid(uio) > 0) {
-
 		fdi.iosize = sizeof(*fri);
 		fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
 
@@ -277,53 +290,67 @@ fuse_internal_readdir(struct vnode *vp,
 		fri->fh = fufh->fh_id;
 		fri->offset = uio_offset(uio);
 		fri->size = min(uio_resid(uio), FUSE_DEFAULT_IOSIZE);
-		/* mp->max_read */
 
-		    if ((err = fdisp_wait_answ(&fdi))) {
+		if ((err = fdisp_wait_answ(&fdi)) != 0)
 			break;
-		}
-		if ((err = fuse_internal_readdir_processdata(uio, fri->size, fdi.answ,
-		    fdi.iosize, cookediov))) {
+		if ((err = fuse_internal_readdir_processdata(uio, startoff,
+		    &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov,
+		    ncookies, &cookies)) != 0)
 			break;
-		}
 	}
 
 	fdisp_destroy(&fdi);
 	return ((err == -1) ? 0 : err);
 }
 
+/*
+ * Return -1 to indicate that this readdir is finished, 0 if it copied
+ * all the directory data read in and it may be possible to read more
+ * and greater than 0 for a failure.
+ */
 int
 fuse_internal_readdir_processdata(struct uio *uio,
+    off_t startoff,
+    int *fnd_start,
     size_t reqsize,
     void *buf,
     size_t bufsize,
-    void *param)
+    struct fuse_iov *cookediov,
+    int *ncookies,
+    u_long **cookiesp)
 {
 	int err = 0;
-	int cou = 0;
 	int bytesavail;
 	size_t freclen;
-
 	struct dirent *de;
 	struct fuse_dirent *fudge;
-	struct fuse_iov *cookediov = param;
+	u_long *cookies;
 
+	cookies = *cookiesp;
 	if (bufsize < FUSE_NAME_OFFSET) {
-		return -1;
+		return (-1);
 	}
 	for (;;) {
 
 		if (bufsize < FUSE_NAME_OFFSET) {
-			err = -1;
+			/*
+			 * Return 0 since we have done at least one loop
+			 * iteration and we are at the end of the buffer.
+			 * As such, we might be able to read more directory
+			 * data.
+			 */
+			err = 0;
 			break;
 		}
 		fudge = (struct fuse_dirent *)buf;
 		freclen = FUSE_DIRENT_SIZE(fudge);
 
-		cou++;
-
 		if (bufsize < freclen) {
-			err = ((cou == 1) ? -1 : 0);
+			/*
+			 * This indicates a partial directory entry at the
+			 * end of the directory data.
+			 */
+			err = -1;
 			break;
 		}
 #ifdef ZERO_PAD_INCOMPLETE_BUFS
@@ -333,40 +360,60 @@ fuse_internal_readdir_processdata(struct
 		}
 #endif
 
-		if (!fudge->namelen || fudge->namelen > MAXNAMLEN) {
+		if (fudge->namelen == 0 || fudge->namelen > MAXNAMLEN) {
 			err = EINVAL;
 			break;
 		}
-		bytesavail = GENERIC_DIRSIZ((struct pseudo_dirent *)
-					    &fudge->namelen);
 
+		/* bytesavail is the size of the BSD dirent. */
+		bytesavail = GENERIC_DIRSIZ((struct pseudo_dirent *)
+		    &fudge->namelen);
 		if (bytesavail > uio_resid(uio)) {
+			/* Out of space for the dir so we are done. */
 			err = -1;
 			break;
 		}
-		fiov_refresh(cookediov);
-		fiov_adjust(cookediov, bytesavail);
 
-		de = (struct dirent *)cookediov->base;
-		de->d_fileno = fudge->ino;	/* XXX: truncation */
-		de->d_reclen = bytesavail;
-		de->d_type = fudge->type;
-		de->d_namlen = fudge->namelen;
-		memcpy((char *)cookediov->base + sizeof(struct dirent) - 
-		       MAXNAMLEN - 1,
-		       (char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
-		((char *)cookediov->base)[bytesavail] = '\0';
+		/*
+		 * Don't start to copy the directory entries out until
+		 * the requested offset in the directory is found.
+		 */
+		if (*fnd_start != 0) {
+			fiov_refresh(cookediov);
+			fiov_adjust(cookediov, bytesavail);
+			de = (struct dirent *)cookediov->base;
+			de->d_fileno = fudge->ino;	/* XXX: truncation */
+			de->d_reclen = bytesavail;
+			de->d_type = fudge->type;
+			de->d_namlen = fudge->namelen;
+			memcpy((char *)cookediov->base +
+			    sizeof(struct dirent) - MAXNAMLEN - 1,
+			   (char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
+			((char *)cookediov->base)[bytesavail] = '\0';
+	
+			err = uiomove(cookediov->base, cookediov->len, uio);
+			if (err != 0)
+				break;
+			if (cookies != NULL) {
+				if (*ncookies == 0) {
+					err = -1;
+					break;
+				}
+				*cookies = fudge->off;
+				cookies++;
+				(*ncookies)--;
+			}
+		} else if (startoff == fudge->off)
+			*fnd_start = 1;
 
-		err = uiomove(cookediov->base, cookediov->len, uio);
-		if (err) {
-			break;
-		}
+		/* Move to the next fuse directory entry. */
+		uio_setoffset(uio, fudge->off);
 		buf = (char *)buf + freclen;
 		bufsize -= freclen;
-		uio_setoffset(uio, fudge->off);
 	}
+	*cookiesp = cookies;
 
-	return err;
+	return (err);
 }
 
 /* remove */

Modified: projects/pnfs-server/sys/fs/fuse/fuse_internal.h
==============================================================================
--- projects/pnfs-server/sys/fs/fuse/fuse_internal.h	Tue Apr 12 22:55:47 2016	(r297886)
+++ projects/pnfs-server/sys/fs/fuse/fuse_internal.h	Tue Apr 12 22:58:40 2016	(r297887)
@@ -261,15 +261,22 @@ struct pseudo_dirent {
 int
 fuse_internal_readdir(struct vnode           *vp,
                       struct uio             *uio,
+                      off_t                   startoff,
                       struct fuse_filehandle *fufh,
-                      struct fuse_iov        *cookediov);
+                      struct fuse_iov        *cookediov,
+                      int                    *ncookies,
+                      u_long                 *cookies);
 
 int
 fuse_internal_readdir_processdata(struct uio *uio,
+                                  off_t startoff,
+                                  int *fnd_start,
                                   size_t reqsize,
                                   void *buf,
                                   size_t bufsize,
-                                  void *param);
+                                  struct fuse_iov *cookediov,
+                                  int *ncookies,
+                                  u_long **cookies);
 
 /* remove */
 

Modified: projects/pnfs-server/sys/fs/fuse/fuse_io.c
==============================================================================
--- projects/pnfs-server/sys/fs/fuse/fuse_io.c	Tue Apr 12 22:55:47 2016	(r297886)
+++ projects/pnfs-server/sys/fs/fuse/fuse_io.c	Tue Apr 12 22:58:40 2016	(r297887)
@@ -119,15 +119,10 @@ fuse_io_dispatch(struct vnode *vp, struc
 {
 	struct fuse_filehandle *fufh;
 	int err, directio;
+	fufh_type_t fufhtype;
 
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 
-	err = fuse_filehandle_getrw(vp,
-	    (uio->uio_rw == UIO_READ) ? FUFH_RDONLY : FUFH_WRONLY, &fufh);
-	if (err) {
-		printf("FUSE: io dispatch: filehandles are closed\n");
-		return err;
-	}
 	/*
          * Ideally, when the daemon asks for direct io at open time, the
          * standard file flag should be set according to this, so that would
@@ -140,6 +135,18 @@ fuse_io_dispatch(struct vnode *vp, struc
          */
 	directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp));
 
+	if (uio->uio_rw == UIO_READ)
+		fufhtype = FUFH_RDONLY;
+	else if (directio != 0)
+		fufhtype = FUFH_WRONLY;
+	else
+		/* Buffer cache writing might read a block in. */
+		fufhtype = FUFH_RDWR;
+	err = fuse_filehandle_getrw(vp, fufhtype, &fufh);
+	if (err) {
+		printf("FUSE: io dispatch: filehandles are closed\n");
+		return err;
+	}
 	switch (uio->uio_rw) {
 	case UIO_READ:
 		if (directio) {

Modified: projects/pnfs-server/sys/fs/fuse/fuse_ipc.c
==============================================================================
--- projects/pnfs-server/sys/fs/fuse/fuse_ipc.c	Tue Apr 12 22:55:47 2016	(r297886)
+++ projects/pnfs-server/sys/fs/fuse/fuse_ipc.c	Tue Apr 12 22:58:40 2016	(r297887)
@@ -549,6 +549,7 @@ fuse_body_audit(struct fuse_ticket *ftic
 {
 	int err = 0;
 	enum fuse_opcode opcode;
+	struct fuse_getxattr_in *fgin;
 
 	debug_printf("ftick=%p, blen = %zu\n", ftick, blen);
 
@@ -636,23 +637,23 @@ fuse_body_audit(struct fuse_ticket *ftic
 		break;
 
 	case FUSE_SETXATTR:
-		panic("FUSE_SETXATTR implementor has forgotten to define a"
-		      " response body format check");
+		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_GETXATTR:
-		panic("FUSE_GETXATTR implementor has forgotten to define a"
-		      " response body format check");
-		break;
-
 	case FUSE_LISTXATTR:
-		panic("FUSE_LISTXATTR implementor has forgotten to define a"
-		      " response body format check");
+		fgin = (struct fuse_getxattr_in *)
+		    ((char *)ftick->tk_ms_fiov.base +
+		     sizeof(struct fuse_in_header));
+		if (fgin->size == 0)
+			err = (blen == sizeof(struct fuse_getxattr_out)) ? 0 :
+			    EINVAL;
+		else
+			err = (blen <= fgin->size) ? 0 : EINVAL;
 		break;
 
 	case FUSE_REMOVEXATTR:
-		panic("FUSE_REMOVEXATTR implementor has forgotten to define a"
-		      " response body format check");
+		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_FLUSH:
@@ -687,15 +688,15 @@ fuse_body_audit(struct fuse_ticket *ftic
 		break;
 
 	case FUSE_GETLK:
-		panic("FUSE: no response body format check for FUSE_GETLK");
+		err = (blen == sizeof(struct fuse_lk_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_SETLK:
-		panic("FUSE: no response body format check for FUSE_SETLK");
+		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_SETLKW:
-		panic("FUSE: no response body format check for FUSE_SETLKW");
+		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_ACCESS:

Modified: projects/pnfs-server/sys/fs/fuse/fuse_node.c
==============================================================================
--- projects/pnfs-server/sys/fs/fuse/fuse_node.c	Tue Apr 12 22:55:47 2016	(r297886)
+++ projects/pnfs-server/sys/fs/fuse/fuse_node.c	Tue Apr 12 22:58:40 2016	(r297887)
@@ -169,7 +169,7 @@ fuse_vnode_hash(uint64_t id)
 	return (fnv_32_buf(&id, sizeof(id), FNV1_32_INIT));
 }
 
-static int
+int
 fuse_vnode_alloc(struct mount *mp,
     struct thread *td,
     uint64_t nodeid,
@@ -289,7 +289,16 @@ fuse_vnode_open(struct vnode *vp, int32_
 	 * XXXIP: Handle fd based DIRECT_IO
 	 */
 	if (fuse_open_flags & FOPEN_DIRECT_IO) {
-		VTOFUD(vp)->flag |= FN_DIRECTIO;
+		ASSERT_VOP_ELOCKED(vp, __func__);
+		/*
+		 * If switching from buffer cache I/O to direct I/O, the
+		 * buffer cache blocks must be invalidated to avoid accessing
+		 * stale data in the buffer cache.
+		 */
+		if ((VTOFUD(vp)->flag & FN_DIRECTIO) == 0) {
+			VTOFUD(vp)->flag |= FN_DIRECTIO;
+			fuse_io_invalbuf(vp, td);
+		}
 	} else {
 	        VTOFUD(vp)->flag &= ~FN_DIRECTIO;
 	}

Modified: projects/pnfs-server/sys/fs/fuse/fuse_node.h
==============================================================================
--- projects/pnfs-server/sys/fs/fuse/fuse_node.h	Tue Apr 12 22:55:47 2016	(r297886)
+++ projects/pnfs-server/sys/fs/fuse/fuse_node.h	Tue Apr 12 22:58:40 2016	(r297887)
@@ -111,6 +111,12 @@ fuse_vnode_setparent(struct vnode *vp, s
 
 void fuse_vnode_destroy(struct vnode *vp);
 
+int fuse_vnode_alloc(struct mount       *mp,
+                     struct thread      *td,
+                     uint64_t            nodeid,
+                     enum vtype          vtyp,
+                     struct vnode      **vpp);
+
 int fuse_vnode_get(struct mount         *mp,
                    uint64_t              nodeid,
                    struct vnode         *dvp,
@@ -128,4 +134,21 @@ int fuse_vnode_savesize(struct vnode *vp
 
 int fuse_vnode_setsize(struct vnode *vp, struct ucred *cred, off_t newsize);
 
+/*
+ * Since making a structure that is a file system specific "struct fid"
+ * is too big, due to alignment issues, this structure is copied into the
+ * fid_data field.  fid_data0 in "struct fid" is used for the vnode type.
+ * Until MAXFIDSZ is increased, only the first half of this structure will
+ * fit in fid_data. This is fine for GlusterFS, since it always sets the
+ * "generation" to 0.  As such, the rest is #ifdef notyet.
+ */
+struct fuse_fid_data {
+	uint64_t	ffid_nid;
+	uint64_t	ffid_parent_nid;
+#ifdef notyet
+	uint64_t	ffid_nid_gen;
+	uint64_t	ffid_parent_nid_gen;
+#endif
+};
+
 #endif /* _FUSE_NODE_H_ */

Modified: projects/pnfs-server/sys/fs/fuse/fuse_vfsops.c
==============================================================================
--- projects/pnfs-server/sys/fs/fuse/fuse_vfsops.c	Tue Apr 12 22:55:47 2016	(r297886)
+++ projects/pnfs-server/sys/fs/fuse/fuse_vfsops.c	Tue Apr 12 22:58:40 2016	(r297887)
@@ -105,12 +105,14 @@ static vfs_mount_t fuse_vfsop_mount;
 static vfs_unmount_t fuse_vfsop_unmount;
 static vfs_root_t fuse_vfsop_root;
 static vfs_statfs_t fuse_vfsop_statfs;
+static vfs_fhtovp_t fuse_vfsop_fhtovp;
 
 struct vfsops fuse_vfsops = {
 	.vfs_mount = fuse_vfsop_mount,
 	.vfs_unmount = fuse_vfsop_unmount,
 	.vfs_root = fuse_vfsop_root,
 	.vfs_statfs = fuse_vfsop_statfs,
+	.vfs_fhtovp = fuse_vfsop_fhtovp,
 };
 
 SYSCTL_INT(_vfs_fuse, OID_AUTO, init_backgrounded, CTLFLAG_RD,
@@ -232,8 +234,12 @@ fuse_vfsop_mount(struct mount *mp)
 
 	fuse_trace_printf_vfsop();
 
+	/*
+	 * Allow MNT_UPDATE only so the mountd can set exports on the file
+	 * system.
+	 */
 	if (mp->mnt_flag & MNT_UPDATE)
-		return EOPNOTSUPP;
+		return (0);
 
 	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_SYNCHRONOUS;
@@ -532,3 +538,35 @@ fake:
 
 	return 0;
 }
+
+/*
+ * Translate a file handle into a vnode.  fid_data0 is the v_type and
+ * fid_data is "struct fuse_fid" memcpy()'d so that alignment doesn't
+ * make the structure too big.
+ * There is no "generation" field in the file handle.  GlusterFS never
+ * sets it.  I don't know if other fuse filesystems do set it, but there
+ * is no space for it in fhandle_t.
+ */
+static int
+fuse_vfsop_fhtovp(struct mount *mp, struct fid *fhp, int flags,
+    struct vnode **vpp)
+{
+	struct fuse_fid_data ffid;
+	enum vtype vtyp;
+	int err;
+
+	if (fhp->fid_len != offsetof(struct fid, fid_data) +
+	    sizeof(struct fuse_fid_data))
+		return (ESTALE);
+	vtyp = (enum vtype)fhp->fid_data0;
+	if (vtyp != VREG && vtyp != VDIR && vtyp != VLNK)
+		return (ESTALE);
+	memcpy(&ffid, fhp->fid_data, sizeof(ffid));
+	err = fuse_vnode_alloc(mp, curthread, ffid.ffid_nid, vtyp, vpp);
+	if (err != 0)
+		return (ESTALE);
+	if (vtyp == VDIR)
+		VTOFUD(*vpp)->parent_nid = ffid.ffid_parent_nid;
+	return (0);
+}
+

Modified: projects/pnfs-server/sys/fs/fuse/fuse_vnops.c
==============================================================================
--- projects/pnfs-server/sys/fs/fuse/fuse_vnops.c	Tue Apr 12 22:55:47 2016	(r297886)
+++ projects/pnfs-server/sys/fs/fuse/fuse_vnops.c	Tue Apr 12 22:58:40 2016	(r297887)
@@ -82,6 +82,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sysctl.h>
+#include <sys/extattr.h>
+#include <sys/jail.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
@@ -133,6 +135,12 @@ static vop_write_t fuse_vnop_write;
 static vop_getpages_t fuse_vnop_getpages;
 static vop_putpages_t fuse_vnop_putpages;
 static vop_print_t fuse_vnop_print;
+static vop_getextattr_t fuse_vnop_getextattr;
+static vop_setextattr_t fuse_vnop_setextattr;
+static vop_listextattr_t fuse_vnop_listextattr;
+static vop_deleteextattr_t fuse_vnop_deleteextattr;
+static vop_advlock_t fuse_vnop_advlock;
+static vop_vptofh_t fuse_vnop_vptofh;
 
 struct vop_vector fuse_vnops = {
 	.vop_default = &default_vnodeops,
@@ -162,6 +170,12 @@ struct vop_vector fuse_vnops = {
 	.vop_getpages = fuse_vnop_getpages,
 	.vop_putpages = fuse_vnop_putpages,
 	.vop_print = fuse_vnop_print,
+	.vop_getextattr = fuse_vnop_getextattr,
+	.vop_setextattr = fuse_vnop_setextattr,
+	.vop_listextattr = fuse_vnop_listextattr,
+	.vop_deleteextattr = fuse_vnop_deleteextattr,
+	.vop_advlock = fuse_vnop_advlock,
+	.vop_vptofh = fuse_vnop_vptofh,
 };
 
 static u_long fuse_lookup_cache_hits = 0;
@@ -190,6 +204,8 @@ SYSCTL_INT(_vfs_fuse, OID_AUTO, reclaim_
 
 int	fuse_pbuf_freecnt = -1;
 
+int	fuse_force_directio = 0;
+
 #define fuse_vm_page_lock(m)		vm_page_lock((m));
 #define fuse_vm_page_unlock(m)		vm_page_unlock((m));
 #define fuse_vm_page_lock_queues()	((void)0)
@@ -391,6 +407,8 @@ bringup:
 
 	x_fh_id = ((struct fuse_open_out *)(feo + 1))->fh;
 	x_open_flags = ((struct fuse_open_out *)(feo + 1))->open_flags;
+	if (fuse_force_directio > 0)
+		x_open_flags |= FOPEN_DIRECT_IO;
 	fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, x_fh_id);
 	fuse_vnode_open(*vpp, x_open_flags, td);
 	cache_purge_negative(dvp);
@@ -560,27 +578,39 @@ fuse_vnop_inactive(struct vop_inactive_a
 
 	FS_DEBUG("inode=%ju\n", (uintmax_t)VTOI(vp));
 
-	for (type = 0; type < FUFH_MAXTYPE; type++) {
-		fufh = &(fvdat->fufh[type]);
-		if (FUFH_IS_VALID(fufh)) {
-			if (need_flush && vp->v_type == VREG) {
-				if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
-					fuse_vnode_savesize(vp, NULL);
+	/*
+	 * For NFS exported mounts, delay buffer cache flushing and the
+	 * close until the vnode is reclaimed.  This is done since NFS
+	 * does the open implicitly when a read/write occurs and then
+	 * it reduces the vnode reference count to 0 for each read/write.
+	 */
+	if ((vp->v_mount->mnt_flag & MNT_EXPORTED) == 0) {
+		for (type = 0; type < FUFH_MAXTYPE; type++) {
+			fufh = &(fvdat->fufh[type]);
+			if (FUFH_IS_VALID(fufh)) {
+				if (need_flush && vp->v_type == VREG) {
+					if ((VTOFUD(vp)->flag & FN_SIZECHANGE)
+					    != 0) {
+						fuse_vnode_savesize(vp, NULL);
+					}
+					if (fuse_data_cache_invalidate ||
+					    (fvdat->flag & FN_REVOKED) != 0)
+						fuse_io_invalbuf(vp, td);
+					else
+						fuse_io_flushbuf(vp, MNT_WAIT,
+						    td);
+					need_flush = 0;
 				}
-				if (fuse_data_cache_invalidate ||
-				    (fvdat->flag & FN_REVOKED) != 0)
-					fuse_io_invalbuf(vp, td);
-				else
-					fuse_io_flushbuf(vp, MNT_WAIT, td);
-				need_flush = 0;
+				fuse_filehandle_close(vp, type, td, NULL);
 			}
-			fuse_filehandle_close(vp, type, td, NULL);
 		}
 	}
 
+#ifdef notnow
 	if ((fvdat->flag & FN_REVOKED) != 0 && fuse_reclaim_revoked) {
 		vrecycle(vp);
 	}
+#endif
 	return 0;
 }
 
@@ -1125,6 +1155,7 @@ fuse_vnop_open(struct vop_open_args *ap)
 	struct fuse_vnode_data *fvdat;
 
 	int error, isdir = 0;
+	int32_t fuse_open_flags;
 
 	FS_DEBUG2G("inode=%ju mode=0x%x\n", (uintmax_t)VTOI(vp), mode);
 
@@ -1136,14 +1167,19 @@ fuse_vnop_open(struct vop_open_args *ap)
 	if (vnode_isdir(vp)) {
 		isdir = 1;
 	}
+	fuse_open_flags = 0;
 	if (isdir) {
 		fufh_type = FUFH_RDONLY;
 	} else {
 		fufh_type = fuse_filehandle_xlate_from_fflags(mode);
+		if (fufh_type == FUFH_WRONLY ||
+		    (fvdat->flag & FN_DIRECTIO) != 0 ||
+		    fuse_force_directio > 0)
+			fuse_open_flags = FOPEN_DIRECT_IO;
 	}
 
 	if (fuse_filehandle_valid(vp, fufh_type)) {
-		fuse_vnode_open(vp, 0, td);
+		fuse_vnode_open(vp, fuse_open_flags, td);
 		return 0;
 	}
 	error = fuse_filehandle_open(vp, fufh_type, NULL, td, cred);
@@ -1165,20 +1201,53 @@ fuse_vnop_read(struct vop_read_args *ap)
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	int ioflag = ap->a_ioflag;
-	struct ucred *cred = ap->a_cred;
+	struct ucred *cred = NULL;
+	int err, freecred;
+	gid_t gid;
 
 	FS_DEBUG2G("inode=%ju offset=%jd resid=%zd\n",
 	    (uintmax_t)VTOI(vp), uio->uio_offset, uio->uio_resid);
 
-	if (fuse_isdeadfs(vp)) {
-		return ENXIO;
-	}
+	if (fuse_isdeadfs(vp))
+		return (ENXIO);
 
-	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
+	if ((VTOFUD(vp)->flag & FN_DIRECTIO) != 0)
 		ioflag |= IO_DIRECT;
-	}
 
-	return fuse_io_dispatch(vp, uio, ioflag, cred);
+	err = 0;
+	freecred = 0;
+	if ((vnode_mount(vp)->mnt_flag & MNT_EXPORTED) != 0 &&
+	    fuse_filehandle_validrw(vp, FUFH_RDONLY) == FUFH_INVALID) {
+		FS_DEBUG("doing open() before read");
+		/*
+		 * This should only happen when VOP_READ() is done by an
+		 * nfsd thread.  Since the nfsd thread has already done
+		 * permission checks, I believe it is safe to open the
+		 * file as root.
+		 */
+		cred = crget();
+		cred->cr_uid = cred->cr_ruid = cred->cr_svuid = 0;
+		gid = 0;
+		crsetgroups(cred, 1, &gid);
+		cred->cr_rgid = cred->cr_svgid = cred->cr_groups[0];
+		cred->cr_prison = &prison0;
+		prison_hold(cred->cr_prison);
+		freecred = 1;
+		err = fuse_filehandle_open(vp, FUFH_RDONLY, NULL, curthread,
+		    cred);
+	} else
+		cred = ap->a_cred;
+
+	if (err == 0)
+		err = fuse_io_dispatch(vp, uio, ioflag, cred);
+
+#ifdef notdef
+	if (freefufh != 0)
+		fuse_filehandle_close(vp, FUFH_RDONLY, NULL, cred);
+#endif
+	if (freecred != 0)
+		crfree(cred);
+	return (err);
 }
 
 /*
@@ -1187,7 +1256,7 @@ fuse_vnop_read(struct vop_read_args *ap)
 	struct uio *a_uio;
 	struct ucred *a_cred;
 	int *a_eofflag;
-	int *ncookies;
+	int *a_ncookies;
 	u_long **a_cookies;
     };
 */
@@ -1203,10 +1272,14 @@ fuse_vnop_readdir(struct vop_readdir_arg
 	struct fuse_iov cookediov;
 
 	int err = 0;
-	int freefufh = 0;
+	int ncookies;
+	u_long *cookies;
+	off_t startoff;
+	ssize_t tresid;
 
 	FS_DEBUG2G("inode=%ju\n", (uintmax_t)VTOI(vp));
 
+	*ap->a_eofflag = 0;
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
@@ -1216,25 +1289,61 @@ fuse_vnop_readdir(struct vop_readdir_arg
 	}
 	fvdat = VTOFUD(vp);
 
+	tresid = uio->uio_resid;
+	startoff = uio->uio_offset;
 	if (!fuse_filehandle_valid(vp, FUFH_RDONLY)) {
 		FS_DEBUG("calling readdir() before open()");
 		err = fuse_filehandle_open(vp, FUFH_RDONLY, &fufh, NULL, cred);
-		freefufh = 1;
+		if (err == 0) {
+			/*
+			 * When a directory is opened, it must be read from
+			 * the beginning.  Hopefully, the "startoff" still
+			 * exists as an offset cookie for the directory.
+			 * If not, it will read the entire directory without
+			 * returning any entries and just return eof.
+			 */
+			uio->uio_offset = 0;
+		}
 	} else {
 		err = fuse_filehandle_get(vp, FUFH_RDONLY, &fufh);
 	}
 	if (err) {
 		return (err);
 	}
+
+	if (ap->a_ncookies != NULL) {
+		ncookies = uio->uio_resid;
+		ncookies = ncookies / (offsetof(struct dirent, d_name) + 4) + 1;
+		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
+		*ap->a_ncookies = ncookies;
+		*ap->a_cookies = cookies;
+	} else {
+		ncookies = 0;
+		cookies = NULL;
+	}
 #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1)
 	fiov_init(&cookediov, DIRCOOKEDSIZE);
 
-	err = fuse_internal_readdir(vp, uio, fufh, &cookediov);
+	err = fuse_internal_readdir(vp, uio, startoff, fufh, &cookediov,
+	    &ncookies, cookies);
 
 	fiov_teardown(&cookediov);
+#ifdef notdef
 	if (freefufh) {
 		fuse_filehandle_close(vp, FUFH_RDONLY, NULL, cred);
 	}
+#endif
+	if (ap->a_ncookies != NULL) {
+		if (err == 0) {
+			*ap->a_ncookies -= ncookies;
+		} else {
+			free(*ap->a_cookies, M_TEMP);
+			*ap->a_ncookies = 0;
+			*ap->a_cookies = NULL;
+		}
+	}
+	if (err == 0 && tresid == uio->uio_resid)
+		*ap->a_eofflag = 1;
 	return err;
 }
 
@@ -1307,7 +1416,7 @@ fuse_vnop_reclaim(struct vop_reclaim_arg
 	for (type = 0; type < FUFH_MAXTYPE; type++) {
 		fufh = &(fvdat->fufh[type]);
 		if (FUFH_IS_VALID(fufh)) {
-			printf("FUSE: vnode being reclaimed but fufh (type=%d) is valid",
+			FS_DEBUG("FUSE: vnode being reclaimed but fufh (type=%d) is valid",
 			    type);
 			fuse_filehandle_close(vp, type, td, NULL);
 		}
@@ -1708,20 +1817,56 @@ fuse_vnop_write(struct vop_write_args *a
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	int ioflag = ap->a_ioflag;
-	struct ucred *cred = ap->a_cred;
+	struct ucred *cred = NULL;
+	int err, freecred;
+	gid_t gid;
 
 	fuse_trace_printf_vnop();
 
-	if (fuse_isdeadfs(vp)) {
-		return ENXIO;
-	}
-	fuse_vnode_refreshsize(vp, cred);
+	if (fuse_isdeadfs(vp))
+		return (ENXIO);
+	fuse_vnode_refreshsize(vp, ap->a_cred);
 
-	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
+	if ((VTOFUD(vp)->flag & FN_DIRECTIO) != 0)
 		ioflag |= IO_DIRECT;
-	}
 
-	return fuse_io_dispatch(vp, uio, ioflag, cred);
+	err = 0;
+	freecred = 0;
+	if ((vnode_mount(vp)->mnt_flag & MNT_EXPORTED) != 0 &&
+	    !fuse_filehandle_valid(vp, FUFH_RDWR)) {
+		FS_DEBUG("doing open() before write");
+		/*
+		 * This should only happen when VOP_WRITE() is done by an
+		 * nfsd thread.  Since the nfsd thread has already done
+		 * permission checks, I believe it is safe to open the
+		 * file as root.
+		 * The open is done FUFH_RDWR and not FUFH_WRONLY because
+		 * a write of a partial buffer cache block will require
+		 * that the block be read in first.
+		 */
+		cred = crget();
+		cred->cr_uid = cred->cr_ruid = cred->cr_svuid = 0;
+		gid = 0;
+		crsetgroups(cred, 1, &gid);
+		cred->cr_rgid = cred->cr_svgid = cred->cr_groups[0];
+		cred->cr_prison = &prison0;
+		prison_hold(cred->cr_prison);
+		freecred = 1;
+		err = fuse_filehandle_open(vp, FUFH_RDWR, NULL, curthread,
+		    cred);
+	} else
+		cred = ap->a_cred;
+
+	if (err == 0)
+		err = fuse_io_dispatch(vp, uio, ioflag, cred);
+
+#ifdef notdef
+	if (freefufh != 0)
+		fuse_filehandle_close(vp, FUFH_RDWR, NULL, cred);
+#endif
+	if (freecred != 0)
+		crfree(cred);
+	return (err);
 }
 
 /*
@@ -1752,24 +1897,35 @@ fuse_vnop_getpages(struct vop_getpages_a
 	td = curthread;			/* XXX */
 	cred = curthread->td_ucred;	/* XXX */
 	pages = ap->a_m;
-	npages = ap->a_count;
+	count = ap->a_count;
 
 	if (!fsess_opt_mmap(vnode_mount(vp))) {
 		FS_DEBUG("called on non-cacheable vnode??\n");
 		return (VM_PAGER_ERROR);
 	}
+	npages = btoc(count);
 
 	/*
-	 * If the last page is partially valid, just return it and allow
-	 * the pager to zero-out the blanks.  Partially valid pages can
-	 * only occur at the file EOF.
-	 *
-	 * XXXGL: is that true for FUSE, which is a local filesystem,
-	 * but still somewhat disconnected from the kernel?
+	 * If the requested page is partially valid, just return it and
+	 * allow the pager to zero-out the blanks.  Partially valid pages
+	 * can only occur at the file EOF.
 	 */
+
 	VM_OBJECT_WLOCK(vp->v_object);
-	if (pages[npages - 1]->valid != 0 && --npages == 0)
-		goto out;
+	fuse_vm_page_lock_queues();
+	if (pages[ap->a_reqpage]->valid != 0) {
+		for (i = 0; i < npages; ++i) {
+			if (i != ap->a_reqpage) {
+				fuse_vm_page_lock(pages[i]);
+				vm_page_free(pages[i]);
+				fuse_vm_page_unlock(pages[i]);
+			}
+		}
+		fuse_vm_page_unlock_queues();
+		VM_OBJECT_WUNLOCK(vp->v_object);
+		return 0;
+	}
+	fuse_vm_page_unlock_queues();
 	VM_OBJECT_WUNLOCK(vp->v_object);
 
 	/*
@@ -1783,7 +1939,6 @@ fuse_vnop_getpages(struct vop_getpages_a
 	PCPU_INC(cnt.v_vnodein);
 	PCPU_ADD(cnt.v_vnodepgsin, npages);
 
-	count = npages << PAGE_SHIFT;
 	iov.iov_base = (caddr_t)kva;
 	iov.iov_len = count;
 	uio.uio_iov = &iov;
@@ -1801,6 +1956,17 @@ fuse_vnop_getpages(struct vop_getpages_a
 
 	if (error && (uio.uio_resid == count)) {
 		FS_DEBUG("error %d\n", error);
+		VM_OBJECT_WLOCK(vp->v_object);
+		fuse_vm_page_lock_queues();
+		for (i = 0; i < npages; ++i) {
+			if (i != ap->a_reqpage) {
+				fuse_vm_page_lock(pages[i]);
+				vm_page_free(pages[i]);
+				fuse_vm_page_unlock(pages[i]);
+			}
+		}
+		fuse_vm_page_unlock_queues();
+		VM_OBJECT_WUNLOCK(vp->v_object);
 		return VM_PAGER_ERROR;
 	}
 	/*
@@ -1841,15 +2007,12 @@ fuse_vnop_getpages(struct vop_getpages_a
 			 */
 			;
 		}
+		if (i != ap->a_reqpage)
+			vm_page_readahead_finish(m);
 	}
 	fuse_vm_page_unlock_queues();
-out:
 	VM_OBJECT_WUNLOCK(vp->v_object);
-	if (ap->a_rbehind)
-		*ap->a_rbehind = 0;
-	if (ap->a_rahead)
-		*ap->a_rahead = 0;
-	return (VM_PAGER_OK);
+	return 0;
 }
 
 /*
@@ -1962,3 +2125,411 @@ fuse_vnop_print(struct vop_print_args *a
 
 	return 0;
 }
+
+/*
+    struct vop_getextattr_args {
+	struct vnode *a_vp;
+	int a_attrnamespace;
+	const char *a_name;
+	struct uio *a_uio;
+	size_t *a_size;
+	struct ucred *a_cred;
+	struct thread *a_td;
+    };

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201604122258.u3CMweir079752>