Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 10 Aug 2017 05:38:31 +0000 (UTC)
From:      Dmitry Chagin <dchagin@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r322340 - in stable/11: share/man/man4 share/man/man5 sys/fs/fdescfs sys/kern sys/sys
Message-ID:  <201708100538.v7A5cVsi013346@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: dchagin
Date: Thu Aug 10 05:38:31 2017
New Revision: 322340
URL: https://svnweb.freebsd.org/changeset/base/322340

Log:
  MFC r321839:
  
  Implement proper Linux /dev/fd and /proc/self/fd behavior by adding
  Linux specific things to the native fdescfs file system.
  
  Unlike FreeBSD, the Linux fdescfs is a directory containing a symbolic
  links to the actual files, which the process has open.
  A readlink(2) call on this file returns a full path in case of regular file
  or a string in a special format (type:[inode], anon_inode:<file-type>, etc..).
  As well as in a FreeBSD, opening the file in the Linux fdescfs directory is
  equivalent to duplicating the corresponding file descriptor.
  
  Here we have mutually exclusive requirements:
  - in case of readlink(2) call fdescfs lookup() method should return VLNK
  vnode otherwise our kern_readlink() fail with EINVAL error;
  - in the other calls fdescfs lookup() method should return non VLNK vnode.
  
  For what new vnode v_flag VV_READLINK was added, which is set if fdescfs has beed
  mounted with linrdlnk option an modified kern_readlinkat() to properly handle it.
  
  For now For Linux ABI compatibility mount fdescfs volume with linrdlnk option:
  
    mount -t fdescfs -o linrdlnk null /compat/linux/dev/fd
  
  Relnotes:	yes
  Differential Revision:	https://reviews.freebsd.org/D11452

Modified:
  stable/11/share/man/man4/linux.4
  stable/11/share/man/man5/fdescfs.5
  stable/11/sys/fs/fdescfs/fdesc.h
  stable/11/sys/fs/fdescfs/fdesc_vfsops.c
  stable/11/sys/fs/fdescfs/fdesc_vnops.c
  stable/11/sys/kern/vfs_syscalls.c
  stable/11/sys/sys/vnode.h
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/share/man/man4/linux.4
==============================================================================
--- stable/11/share/man/man4/linux.4	Thu Aug 10 05:35:45 2017	(r322339)
+++ stable/11/share/man/man4/linux.4	Thu Aug 10 05:38:31 2017	(r322340)
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd February 8, 2010
+.Dd August 1, 2017
 .Dt LINUX 4
 .Os
 .Sh NAME
@@ -127,9 +127,11 @@ regardless of whether the
 module is statically linked into the kernel
 or loaded as a module.
 .Sh FILES
-.Bl -tag -width /compat/linux/proc -compact
+.Bl -tag -width /compat/linux/dev/fd -compact
 .It Pa /compat/linux
 minimal Linux run-time environment
+.It Pa /compat/linux/dev/fd
+limited Linux file-descriptor file system
 .It Pa /compat/linux/proc
 limited Linux process file system
 .It Pa /compat/linux/sys
@@ -138,6 +140,7 @@ limited Linux system file system
 .Sh SEE ALSO
 .Xr brandelf 1 ,
 .Xr elf 5 ,
+.Xr fdescfs 5 ,
 .Xr linprocfs 5 ,
 .Xr linsysfs 5
 .Sh HISTORY

Modified: stable/11/share/man/man5/fdescfs.5
==============================================================================
--- stable/11/share/man/man5/fdescfs.5	Thu Aug 10 05:35:45 2017	(r322339)
+++ stable/11/share/man/man5/fdescfs.5	Thu Aug 10 05:38:31 2017	(r322340)
@@ -34,7 +34,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd September 18, 2010
+.Dd August 1, 2017
 .Dt FDESCFS 5
 .Os
 .Sh NAME
@@ -92,6 +92,14 @@ and
 files are created by default when devfs alone is mounted.
 .Nm
 creates entries for all file descriptors opened by the process.
+.Pp
+For
+.Xr linux 4
+ABI compatibility mount
+.Nm
+volume with
+.Cm linrdlnk
+option.
 .Sh FILES
 .Bl -tag -width /dev/stderr -compact
 .It Pa /dev/fd/#
@@ -103,6 +111,12 @@ volume located on
 .Pa /dev/fd :
 .Pp
 .Dl "mount -t fdescfs null /dev/fd"
+.Pp
+For
+.Xr linux 4
+ABI compatibility:
+.Pp
+.Dl "mount -t fdescfs -o linrdlnk null /compat/linux/dev/fd"
 .Sh SEE ALSO
 .Xr devfs 5 ,
 .Xr mount 8

Modified: stable/11/sys/fs/fdescfs/fdesc.h
==============================================================================
--- stable/11/sys/fs/fdescfs/fdesc.h	Thu Aug 10 05:35:45 2017	(r322339)
+++ stable/11/sys/fs/fdescfs/fdesc.h	Thu Aug 10 05:38:31 2017	(r322340)
@@ -38,7 +38,9 @@
 #define _FS_FDESC_H_
 
 /* Private mount flags for fdescfs. */
-#define FMNT_UNMOUNTF 0x01
+#define FMNT_UNMOUNTF	0x01
+#define FMNT_LINRDLNKF	0x02
+
 struct fdescmount {
 	struct vnode	*f_root;	/* Root node */
 	int flags;

Modified: stable/11/sys/fs/fdescfs/fdesc_vfsops.c
==============================================================================
--- stable/11/sys/fs/fdescfs/fdesc_vfsops.c	Thu Aug 10 05:35:45 2017	(r322339)
+++ stable/11/sys/fs/fdescfs/fdesc_vfsops.c	Thu Aug 10 05:38:31 2017	(r322340)
@@ -101,6 +101,8 @@ fdesc_mount(struct mount *mp)
 	 */
 	mp->mnt_data = fmp;
 	fmp->flags = 0;
+	if (vfs_getopt(mp->mnt_optnew, "linrdlnk", NULL, NULL) == 0)
+		fmp->flags |= FMNT_LINRDLNKF;
 	error = fdesc_allocvp(Froot, -1, FD_ROOT, mp, &rvp);
 	if (error) {
 		free(fmp, M_FDESCMNT);

Modified: stable/11/sys/fs/fdescfs/fdesc_vnops.c
==============================================================================
--- stable/11/sys/fs/fdescfs/fdesc_vnops.c	Thu Aug 10 05:35:45 2017	(r322339)
+++ stable/11/sys/fs/fdescfs/fdesc_vnops.c	Thu Aug 10 05:38:31 2017	(r322340)
@@ -69,6 +69,7 @@ static vop_getattr_t	fdesc_getattr;
 static vop_lookup_t	fdesc_lookup;
 static vop_open_t	fdesc_open;
 static vop_readdir_t	fdesc_readdir;
+static vop_readlink_t	fdesc_readlink;
 static vop_reclaim_t	fdesc_reclaim;
 static vop_setattr_t	fdesc_setattr;
 
@@ -81,6 +82,7 @@ static struct vop_vector fdesc_vnodeops = {
 	.vop_open =		fdesc_open,
 	.vop_pathconf =		vop_stdpathconf,
 	.vop_readdir =		fdesc_readdir,
+	.vop_readlink =		fdesc_readlink,
 	.vop_reclaim =		fdesc_reclaim,
 	.vop_setattr =		fdesc_setattr,
 };
@@ -195,6 +197,8 @@ loop:
 	fd->fd_type = ftype;
 	fd->fd_fd = fd_fd;
 	fd->fd_ix = ix;
+	if (ftype == Fdesc && fmp->flags & FMNT_LINRDLNKF)
+		vp->v_vflag |= VV_READLINK;
 	error = insmntque1(vp, mp, fdesc_insmntque_dtr, NULL);
 	if (error != 0) {
 		*vpp = NULLVP;
@@ -420,7 +424,7 @@ fdesc_getattr(struct vop_getattr_args *ap)
 		break;
 
 	case Fdesc:
-		vap->va_type = VCHR;
+		vap->va_type = (vp->v_vflag & VV_READLINK) == 0 ? VCHR : VLNK;
 		vap->va_nlink = 1;
 		vap->va_size = 0;
 		vap->va_rdev = makedev(0, vap->va_fileid);
@@ -490,6 +494,7 @@ fdesc_setattr(struct vop_setattr_args *ap)
 static int
 fdesc_readdir(struct vop_readdir_args *ap)
 {
+	struct fdescmount *fmp;
 	struct uio *uio = ap->a_uio;
 	struct filedesc *fdp;
 	struct dirent d;
@@ -499,6 +504,7 @@ fdesc_readdir(struct vop_readdir_args *ap)
 	if (VTOFDESC(ap->a_vp)->fd_type != Froot)
 		panic("fdesc_readdir: not dir");
 
+	fmp = VFSTOFDESC(ap->a_vp->v_mount);
 	if (ap->a_ncookies != NULL)
 		*ap->a_ncookies = 0;
 
@@ -530,7 +536,8 @@ fdesc_readdir(struct vop_readdir_args *ap)
 				break;
 			dp->d_namlen = sprintf(dp->d_name, "%d", fcnt);
 			dp->d_reclen = UIO_MX;
-			dp->d_type = DT_CHR;
+			dp->d_type = (fmp->flags & FMNT_LINRDLNKF) == 0 ?
+			    DT_CHR : DT_LNK;
 			dp->d_fileno = i + FD_DESC;
 			break;
 		}
@@ -566,4 +573,53 @@ fdesc_reclaim(struct vop_reclaim_args *ap)
 	free(vp->v_data, M_TEMP);
 	vp->v_data = NULL;
 	return (0);
+}
+
+static int
+fdesc_readlink(struct vop_readlink_args *va)
+{
+	struct vnode *vp, *vn;
+	cap_rights_t rights;
+	struct thread *td;
+	struct uio *uio;
+	struct file *fp;
+	char *freepath, *fullpath;
+	size_t pathlen;
+	int lockflags, fd_fd;
+	int error;
+
+	freepath = NULL;
+	vn = va->a_vp;
+	if (VTOFDESC(vn)->fd_type != Fdesc)
+		panic("fdesc_readlink: not fdescfs link");
+	fd_fd = ((struct fdescnode *)vn->v_data)->fd_fd;
+	lockflags = VOP_ISLOCKED(vn);
+	VOP_UNLOCK(vn, 0);
+
+	td = curthread;
+	error = fget_cap(td, fd_fd, cap_rights_init(&rights), &fp, NULL);
+	if (error != 0)
+		goto out;
+
+	switch (fp->f_type) {
+	case DTYPE_VNODE:
+		vp = fp->f_vnode;
+		error = vn_fullpath(td, vp, &fullpath, &freepath);
+		break;
+	default:
+		fullpath = "anon_inode:[unknown]";
+		break;
+	}
+	if (error == 0) {
+		uio = va->a_uio;
+		pathlen = strlen(fullpath);
+		error = uiomove(fullpath, pathlen, uio);
+	}
+	if (freepath != NULL)
+		free(freepath, M_TEMP);
+	fdrop(fp, td);
+
+out:
+	vn_lock(vn, lockflags | LK_RETRY);
+	return (error);
 }

Modified: stable/11/sys/kern/vfs_syscalls.c
==============================================================================
--- stable/11/sys/kern/vfs_syscalls.c	Thu Aug 10 05:35:45 2017	(r322339)
+++ stable/11/sys/kern/vfs_syscalls.c	Thu Aug 10 05:38:31 2017	(r322340)
@@ -2409,7 +2409,7 @@ kern_readlinkat(struct thread *td, int fd, char *path,
 		return (error);
 	}
 #endif
-	if (vp->v_type != VLNK)
+	if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0)
 		error = EINVAL;
 	else {
 		aiov.iov_base = buf;

Modified: stable/11/sys/sys/vnode.h
==============================================================================
--- stable/11/sys/sys/vnode.h	Thu Aug 10 05:35:45 2017	(r322339)
+++ stable/11/sys/sys/vnode.h	Thu Aug 10 05:38:31 2017	(r322340)
@@ -253,6 +253,7 @@ struct xvnode {
 #define	VV_DELETED	0x0400	/* should be removed */
 #define	VV_MD		0x0800	/* vnode backs the md device */
 #define	VV_FORCEINSMQ	0x1000	/* force the insmntque to succeed */
+#define	VV_READLINK	0x2000	/* fdescfs linux vnode */
 
 /*
  * Vnode attributes.  A field value of VNOVAL represents a field whose value



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201708100538.v7A5cVsi013346>