Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 3 Mar 2003 18:22:40 +0000 (UTC)
From:      "Bjoern A. Zeeb" <bzeeb+freebsd@zabbadoz.net>
To:        freebsd-hackers@FreeBSD.ORG
Cc:        Christian Kratzer <ck@cksoft.de>, Oliver Fromme <olli@secnetix.de>
Subject:   jail statfs patch
Message-ID:  <Pine.BSF.4.53.0303031811160.738@e0-0.zab2.int.zabbadoz.net>

next in thread | raw e-mail | index | archive | help
Hi,

attached is a patch for 5.0/HEAD that adds a fine grained option to
control what fs stats can be seen from within jails.

I know that there is also a kernel module available but as I already
had started to work on this I finished it for those people who
preferr it this way.

--- description ---
		The patch is derived from a private patch done by
		Christian Kratzer for RELENG_4 and the public patches
		by Oliver Fromme (see kern/47586).

		It adds following sysctl option:

		     security.jail.statfs_restricted
			  This fine grained option lets you control what and how filesystem
			  statistcs are seen from within jails:

				security.jail.statfs_restricted=0

			  this is the old behaviour where you could see everything from the
			  whole host.

				security.jail.statfs_restricted=1

			  this is the default for now. It shows only partitions related to the
			  jail.  If there is no root partition resp. the jail is on a shared
			  partition a ``fake'' root with the correct values but a stripped
			  f_mntonname will be shown.

				security.jail.statfs_restricted=2

			  this is almost the same as 1 but it will show a ``full fake'' for a
			  shared root mount. It will zero out almost all values and write
			  jail-specific ``fakes'' to the others.

				security.jail.statfs_restricted=3

			  this is almost the same as 1 but it will not show a shared root at
			  all.

				security.jail.statfs_restricted>=4

			  this will not show anything but procfs, devfs, etc. within the jail.
			  Be warned that this renders the jail to be almost unusable.
--- /description ---

for some sample output or to download the diff please have look at
	http://sources.zabbadoz.net/freebsd/jail.html


PS: I am really happy about all the other people currently annouced
other jail patches. Could you also please update the manpage(s) ? ;-)

-- 
Greetings

Bjoern A. Zeeb				bzeeb at Zabbadoz dot NeT
56 69 73 69 74				http://www.zabbadoz.net/



--- ./sys/kern/kern_jail.c.orig	Mon Feb  3 12:57:06 2003
+++ ./sys/kern/kern_jail.c	Tue Feb  4 18:54:55 2003
@@ -49,6 +49,11 @@
     &jail_sysvipc_allowed, 0,
     "Processes in jail can use System V IPC primitives");

+int	jail_statfs_restricted = 1;
+SYSCTL_INT(_security_jail, OID_AUTO, statfs_restricted, CTLFLAG_RW,
+    &jail_statfs_restricted, 0,
+    "Processes in jail may not see all currently mounted file systems");
+
 /*
  * MPSAFE
  */
@@ -76,6 +81,9 @@
 	mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
 	pr->pr_securelevel = securelevel;
 	error = copyinstr(j.hostname, &pr->pr_host, sizeof pr->pr_host, 0);
+	if (error)
+		goto bail;
+	error = copyinstr(j.path, &pr->pr_path, sizeof pr->pr_path, 0);
 	if (error)
 		goto bail;
 	ca.path = j.path;
--- ./sys/kern/vfs_syscalls.c.orig	Mon Feb  3 13:12:26 2003
+++ ./sys/kern/vfs_syscalls.c	Sun Mar  2 19:31:38 2003
@@ -227,6 +227,10 @@
 	int error;
 	struct nameidata nd;
 	struct statfs sb;
+	int notsu, jrlen;
+
+	if (jail_statfs_restricted >= 4 && jailed(td->td_ucred))
+		return (ENOENT);

 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	if ((error = namei(&nd)) != 0)
@@ -244,9 +248,47 @@
 	if (error)
 		return (error);
 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
-	if (suser(td)) {
+	notsu = suser(td);
+	if (notsu || (jail_statfs_restricted && jailed(td->td_ucred))) {
 		bcopy(sp, &sb, sizeof(sb));
-		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
+		if (notsu)
+			sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
+
+		if (jail_statfs_restricted && jailed(td->td_ucred)) {
+			jrlen = strlen(td->td_ucred->cr_prison->pr_path);
+
+			if (strlen(mp->mnt_stat.f_mntonname) < jrlen) {
+				switch (jail_statfs_restricted) {
+					case 1:
+						bzero(sb.f_mntonname,
+							sizeof(sb.f_mntonname));
+						*sb.f_mntonname = '/';
+						break;
+					case 2:
+						bzero(&sb, sizeof(sb));
+						strcpy(sb.f_fstypename,
+								"jailfs");
+						strcpy(sb.f_mntfromname,
+								"jailroot");
+						sb.f_flags |= MNT_LOCAL;
+						*sb.f_mntonname = '/';
+						break;
+					case 3:
+					default:
+						return (ENOENT);
+				}
+
+			} else {
+				/* strip jail root (jr) path */
+				bzero(sb.f_mntonname, sizeof(sb.f_mntonname));
+				strcpy(sb.f_mntonname, sp->f_mntonname + jrlen);
+				/* hack for jail root filesystem */
+				if ( ! *sb.f_mntonname ) {
+					*sb.f_mntonname='/';
+				}
+			}
+		}
+
 		sp = &sb;
 	}
 	return (copyout(sp, uap->buf, sizeof(*sp)));
@@ -275,6 +317,10 @@
 	register struct statfs *sp;
 	int error;
 	struct statfs sb;
+	int notsu, jrlen;
+
+	if (jail_statfs_restricted >= 4 && jailed(td->td_ucred))
+		return (EBADF);

 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
@@ -292,9 +338,47 @@
 	if (error)
 		return (error);
 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
-	if (suser(td)) {
+	notsu = suser(td);
+	if (notsu || (jail_statfs_restricted && jailed(td->td_ucred))) {
 		bcopy(sp, &sb, sizeof(sb));
-		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
+		if (notsu)
+			sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
+
+		if (jail_statfs_restricted && jailed(td->td_ucred)) {
+			jrlen = strlen(td->td_ucred->cr_prison->pr_path);
+
+			if (strlen(mp->mnt_stat.f_mntonname) < jrlen) {
+				switch (jail_statfs_restricted) {
+					case 1:
+						bzero(sb.f_mntonname,
+							sizeof(sb.f_mntonname));
+						*sb.f_mntonname = '/';
+						break;
+					case 2:
+						bzero(&sb, sizeof(sb));
+						strcpy(sb.f_fstypename,
+								"jailfs");
+						strcpy(sb.f_mntfromname,
+								"jailroot");
+						sb.f_flags |= MNT_LOCAL;
+						*sb.f_mntonname = '/';
+						break;
+					case 3:
+					default:
+						return (ENOENT);
+				}
+
+			} else {
+				/* strip jail root (jr) path */
+				bzero(sb.f_mntonname, sizeof(sb.f_mntonname));
+				strcpy(sb.f_mntonname, sp->f_mntonname + jrlen);
+				/* hack for jail root filesystem */
+				if ( ! *sb.f_mntonname ) {
+					*sb.f_mntonname='/';
+				}
+			}
+		}
+
 		sp = &sb;
 	}
 	return (copyout(sp, uap->buf, sizeof(*sp)));
@@ -323,10 +407,18 @@
 	register struct statfs *sp;
 	caddr_t sfsp;
 	long count, maxcount, error;
+	struct statfs js, jss;
+	int had_jail_root, base_path_len, mntlen;
+
+	if (jail_statfs_restricted >= 4 && jailed(td->td_ucred))
+		return (ENOENT);
+	if (jail_statfs_restricted == 1 && jailed(td->td_ucred))
+		bzero(&jss, sizeof(jss));

 	maxcount = uap->bufsize / sizeof(struct statfs);
 	sfsp = (caddr_t)uap->buf;
 	count = 0;
+	base_path_len = had_jail_root = 0;
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 #ifdef MAC
@@ -339,6 +431,36 @@
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
+		if (jail_statfs_restricted && jailed(td->td_ucred)) {
+			/*
+			 * If process is jailed skip files systems that are
+			 * not at or below the prison chroot path.
+			 */
+			if (strncmp(td->td_ucred->cr_prison->pr_path,
+				mp->mnt_stat.f_mntonname,
+				strlen(td->td_ucred->cr_prison->pr_path))) {
+
+				/*
+				 * remember for fake root if appropriate
+				 */
+				if (jail_statfs_restricted == 1) {
+				    mntlen = strlen(mp->mnt_stat.f_mntonname);
+				    if (!strncmp(mp->mnt_stat.f_mntonname,
+					    td->td_ucred->cr_prison->pr_path,
+					    mntlen) && mntlen > base_path_len) {
+
+						base_path_len = mntlen;
+						bcopy(&mp->mnt_stat, &jss,
+								sizeof(jss));
+				    }
+				}
+
+				mtx_lock(&mountlist_mtx);
+				nmp = TAILQ_NEXT(mp, mnt_list);
+				vfs_unbusy(mp, td);
+				continue;
+			}
+		}
 		if (sfsp && count < maxcount) {
 			sp = &mp->mnt_stat;
 			/*
@@ -355,6 +477,25 @@
 				continue;
 			}
 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+			if (jail_statfs_restricted && jailed(td->td_ucred)) {
+				/*
+				 * If this process is jailed we strip away
+				 * the chroot path from the f_mntonname
+				 * and copy to user space.
+				 */
+				bcopy(sp, &js, sizeof(js));
+				bzero(js.f_mntonname, sizeof(js.f_mntonname));
+				strcpy(js.f_mntonname, sp->f_mntonname +
+					strlen(td->td_ucred->cr_prison->pr_path)
+					);
+				/* hack for jail root filesystem */
+				if ( ! *js.f_mntonname ) {
+					*js.f_mntonname='/';
+					++had_jail_root;
+				}
+
+				sp = &js;
+			}
 			error = copyout(sp, sfsp, sizeof(*sp));
 			if (error) {
 				vfs_unbusy(mp, td);
@@ -368,6 +509,28 @@
 		vfs_unbusy(mp, td);
 	}
 	mtx_unlock(&mountlist_mtx);
+	if (jail_statfs_restricted && jailed(td->td_ucred) && !had_jail_root &&
+		sfsp && count < maxcount) {
+
+		if (jail_statfs_restricted==1 || jail_statfs_restricted==2) {
+			if (jail_statfs_restricted == 1) {
+				bzero(jss.f_mntonname, sizeof(jss.f_mntonname));
+			}
+			if (jail_statfs_restricted == 2) {
+				bzero(&jss, sizeof(jss));
+				strcpy(jss.f_fstypename, "jailfs");
+				strcpy(jss.f_mntfromname, "jailroot");
+				jss.f_flags |= MNT_LOCAL;
+			}
+			*jss.f_mntonname='/';
+
+			error = copyout(&jss, sfsp, sizeof(jss));
+			if (error)
+				return (error);
+			sfsp += sizeof(js);
+			count++;
+		}
+	}
 	if (sfsp && count > maxcount)
 		td->td_retval[0] = maxcount;
 	else
@@ -3748,6 +3911,7 @@
 	struct statfs sb;
 	fhandle_t fh;
 	int error;
+	int notsu, jrlen;

 	/*
 	 * Must be super user
@@ -3756,6 +3920,9 @@
 	if (error)
 		return (error);

+	if (jail_statfs_restricted >= 4 && jailed(td->td_ucred))
+		return (ESTALE);
+
 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
 		return (error);

@@ -3774,11 +3941,50 @@
 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
 		return (error);
 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
-	if (suser(td)) {
+	notsu = suser(td);
+	if (notsu || (jail_statfs_restricted && jailed(td->td_ucred))) {
 		bcopy(sp, &sb, sizeof(sb));
-		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
+		if (notsu)
+			sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
+
+		if (jail_statfs_restricted && jailed(td->td_ucred)) {
+			jrlen = strlen(td->td_ucred->cr_prison->pr_path);
+
+			if (strlen(mp->mnt_stat.f_mntonname) < jrlen) {
+				switch (jail_statfs_restricted) {
+					case 1:
+						bzero(sb.f_mntonname,
+							sizeof(sb.f_mntonname));
+						*sb.f_mntonname = '/';
+						break;
+					case 2:
+						bzero(&sb, sizeof(sb));
+						strcpy(sb.f_fstypename,
+								"jailfs");
+						strcpy(sb.f_mntfromname,
+								"jailroot");
+						sb.f_flags |= MNT_LOCAL;
+						*sb.f_mntonname = '/';
+						break;
+					case 3:
+					default:
+						return (ENOENT);
+				}
+
+			} else {
+				/* strip jail root (jr) path */
+				bzero(sb.f_mntonname, sizeof(sb.f_mntonname));
+				strcpy(sb.f_mntonname, sp->f_mntonname + jrlen);
+				/* hack for jail root filesystem */
+				if ( ! *sb.f_mntonname ) {
+					*sb.f_mntonname='/';
+				}
+			}
+		}
+
 		sp = &sb;
 	}
+
 	return (copyout(sp, uap->buf, sizeof(*sp)));
 }

--- ./sys/sys/jail.h.orig	Mon Feb  3 12:41:38 2003
+++ ./sys/sys/jail.h	Thu Feb  6 18:20:12 2003
@@ -48,6 +48,7 @@
 struct prison {
 	int		 pr_ref;			/* (p) refcount */
 	char 		 pr_host[MAXHOSTNAMELEN];	/* (p) jail hostname */
+	char		 pr_path[MAXPATHLEN];		/* (c) chroot base */
 	u_int32_t	 pr_ip;				/* (c) ip addr host */
 	void		*pr_linux;			/* (p) linux abi */
 	int		 pr_securelevel;		/* (p) securelevel */
@@ -62,6 +63,7 @@
 extern int	jail_set_hostname_allowed;
 extern int	jail_socket_unixiproute_only;
 extern int	jail_sysvipc_allowed;
+extern int	jail_statfs_restricted;

 /*
  * Kernel support functions for jail().
--- ./usr.sbin/jail/jail.8.orig	Sat Mar  1 20:33:18 2003
+++ ./usr.sbin/jail/jail.8	Sun Mar  2 21:02:15 2003
@@ -370,6 +370,36 @@
 with) processes outside of the jail, and in other jails.
 As such, this functionality is disabled by default, but can be enabled
 by setting this MIB entry to 1.
+.It Va security.jail.statfs_restricted
+This fine grained option lets you control what and how filesystem statistcs
+are seen from within jails:
+.Pp
+.Dl security.jail.statfs_restricted=0
+.Pp
+this is the old behaviour where you could see everything from the whole host.
+.Pp
+.Dl security.jail.statfs_restricted=1
+.Pp
+this is the default for now. It shows only partitions related to the jail.
+If there is no root partition resp. the jail is on a shared partition a ``fake''
+root with the correct values but a stripped
+.Dv f_mntonname
+will be shown.
+.Pp
+.Dl security.jail.statfs_restricted=2
+.Pp
+this is almost the same as 1 but it will show a ``full fake'' for a shared root
+mount. It will zero out almost all values and write jail-specific ``fakes'' to
+the others.
+.Pp
+.Dl security.jail.statfs_restricted=3
+.Pp
+this is almost the same as 1 but it will not show a shared root at all.
+.Pp
+.Dl security.jail.statfs_restricted>=4
+.Pp
+this will not show anything but procfs, devfs, etc. within the jail. Be warned
+that this renders the jail to be almost unusable.
 .El
 .Sh SEE ALSO
 .Xr newaliases 1 ,

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-hackers" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?Pine.BSF.4.53.0303031811160.738>