From owner-svn-src-all@FreeBSD.ORG Sat Aug 13 09:21:17 2011 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 12F15106566C; Sat, 13 Aug 2011 09:21:17 +0000 (UTC) (envelope-from jonathan@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 002CD8FC13; Sat, 13 Aug 2011 09:21:16 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id p7D9LGPx005251; Sat, 13 Aug 2011 09:21:16 GMT (envelope-from jonathan@svn.freebsd.org) Received: (from jonathan@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id p7D9LGIK005244; Sat, 13 Aug 2011 09:21:16 GMT (envelope-from jonathan@svn.freebsd.org) Message-Id: <201108130921.p7D9LGIK005244@svn.freebsd.org> From: Jonathan Anderson Date: Sat, 13 Aug 2011 09:21:16 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r224810 - in head/sys: kern sys X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 13 Aug 2011 09:21:17 -0000 Author: jonathan Date: Sat Aug 13 09:21:16 2011 New Revision: 224810 URL: http://svn.freebsd.org/changeset/base/224810 Log: Allow Capsicum capabilities to delegate constrained access to file system subtrees to sandboxed processes. - Use of absolute paths and '..' are limited in capability mode. - Use of absolute paths and '..' are limited when looking up relative to a capability. - When a name lookup is performed, identify what operation is to be performed (such as CAP_MKDIR) as well as check for CAP_LOOKUP. With these constraints, openat() and friends are now safe in capability mode, and can then be used by code such as the capability-mode runtime linker. Approved by: re (bz), mentor (rwatson) Sponsored by: Google Inc Modified: head/sys/kern/kern_descrip.c head/sys/kern/sys_capability.c head/sys/kern/vfs_lookup.c head/sys/kern/vfs_syscalls.c head/sys/sys/capability.h head/sys/sys/namei.h Modified: head/sys/kern/kern_descrip.c ============================================================================== --- head/sys/kern/kern_descrip.c Sat Aug 13 00:56:42 2011 (r224809) +++ head/sys/kern/kern_descrip.c Sat Aug 13 09:21:16 2011 (r224810) @@ -2336,6 +2336,16 @@ _fget(struct thread *td, int fd, struct #ifdef CAPABILITIES /* + * If this is a capability, what rights does it have? + */ + if (haverightsp != NULL) { + if (fp->f_type == DTYPE_CAPABILITY) + *haverightsp = cap_rights(fp); + else + *haverightsp = CAP_MASK_VALID; + } + + /* * If a capability has been requested, return the capability directly. * Otherwise, check capability rights, extract the underlying object, * and check its access flags. Modified: head/sys/kern/sys_capability.c ============================================================================== --- head/sys/kern/sys_capability.c Sat Aug 13 00:56:42 2011 (r224809) +++ head/sys/kern/sys_capability.c Sat Aug 13 09:21:16 2011 (r224810) @@ -220,7 +220,7 @@ cap_new(struct thread *td, struct cap_ne { int error, capfd; int fd = uap->fd; - struct file *fp, *fcapp; + struct file *fp; cap_rights_t rights = uap->rights; AUDIT_ARG_FD(fd); @@ -229,7 +229,7 @@ cap_new(struct thread *td, struct cap_ne if (error) return (error); AUDIT_ARG_FILE(td->td_proc, fp); - error = kern_capwrap(td, fp, rights, &fcapp, &capfd); + error = kern_capwrap(td, fp, rights, &capfd); if (error) return (error); @@ -267,10 +267,10 @@ cap_getrights(struct thread *td, struct */ int kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights, - struct file **fcappp, int *capfdp) + int *capfdp) { struct capability *cp, *cp_old; - struct file *fp_object; + struct file *fp_object, *fcapp; int error; if ((rights | CAP_MASK_VALID) != CAP_MASK_VALID) @@ -290,7 +290,7 @@ kern_capwrap(struct thread *td, struct f /* * Allocate a new file descriptor to hang the capability off of. */ - error = falloc(td, fcappp, capfdp, fp->f_flag); + error = falloc(td, &fcapp, capfdp, fp->f_flag); if (error) return (error); @@ -309,18 +309,18 @@ kern_capwrap(struct thread *td, struct f cp = uma_zalloc(capability_zone, M_WAITOK | M_ZERO); cp->cap_rights = rights; cp->cap_object = fp_object; - cp->cap_file = *fcappp; + cp->cap_file = fcapp; if (fp->f_flag & DFLAG_PASSABLE) - finit(*fcappp, fp->f_flag, DTYPE_CAPABILITY, cp, + finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp, &capability_ops); else - finit(*fcappp, fp->f_flag, DTYPE_CAPABILITY, cp, + finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp, &capability_ops_unpassable); /* * Release our private reference (the proc filedesc still has one). */ - fdrop(*fcappp, td); + fdrop(fcapp, td); return (0); } Modified: head/sys/kern/vfs_lookup.c ============================================================================== --- head/sys/kern/vfs_lookup.c Sat Aug 13 00:56:42 2011 (r224809) +++ head/sys/kern/vfs_lookup.c Sat Aug 13 09:21:16 2011 (r224810) @@ -180,6 +180,18 @@ namei(struct nameidata *ndp) if (!error && *cnp->cn_pnbuf == '\0') error = ENOENT; +#ifdef CAPABILITY_MODE + /* + * In capability mode, lookups must be "strictly relative" (i.e. + * not an absolute path, and not containing '..' components) to + * a real file descriptor, not the pseudo-descriptor AT_FDCWD. + */ + if (IN_CAPABILITY_MODE(td)) { + ndp->ni_strictrelative = 1; + if (ndp->ni_dirfd == AT_FDCWD) + error = ECAPMODE; + } +#endif if (error) { uma_zfree(namei_zone, cnp->cn_pnbuf); #ifdef DIAGNOSTIC @@ -214,12 +226,20 @@ namei(struct nameidata *ndp) AUDIT_ARG_ATFD1(ndp->ni_dirfd); if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG_ATFD2(ndp->ni_dirfd); -#ifdef CAPABILITY_MODE - KASSERT(!IN_CAPABILITY_MODE(td), - ("%s: reached %s:%d in capability mode", - __func__, __FILE__, __LINE__)); + error = fgetvp_rights(td, ndp->ni_dirfd, + ndp->ni_rightsneeded | CAP_LOOKUP, + &(ndp->ni_baserights), &dp); +#ifdef CAPABILITIES + /* + * Lookups relative to a capability must also be + * strictly relative. + * + * Note that a capability with rights CAP_MASK_VALID + * is treated exactly like a regular file descriptor. + */ + if (ndp->ni_baserights != CAP_MASK_VALID) + ndp->ni_strictrelative = 1; #endif - error = fgetvp(td, ndp->ni_dirfd, 0, &dp); } if (error != 0 || dp != NULL) { FILEDESC_SUNLOCK(fdp); @@ -261,6 +281,8 @@ namei(struct nameidata *ndp) if (*(cnp->cn_nameptr) == '/') { vrele(dp); VFS_UNLOCK_GIANT(vfslocked); + if (ndp->ni_strictrelative != 0) + return (ENOTCAPABLE); while (*(cnp->cn_nameptr) == '/') { cnp->cn_nameptr++; ndp->ni_pathlen--; @@ -604,7 +626,10 @@ dirloop: } /* - * Handle "..": four special cases. + * Handle "..": five special cases. + * 0. If doing a capability lookup, return ENOTCAPABLE (this is a + * fairly conservative design choice, but it's the only one that we + * are satisfied guarantees the property we're looking for). * 1. Return an error if this is the last component of * the name and the operation is DELETE or RENAME. * 2. If at root directory (e.g. after chroot) @@ -618,6 +643,10 @@ dirloop: * the jail or chroot, don't let them out. */ if (cnp->cn_flags & ISDOTDOT) { + if (ndp->ni_strictrelative != 0) { + error = ENOTCAPABLE; + goto bad; + } if ((cnp->cn_flags & ISLASTCN) != 0 && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EINVAL; Modified: head/sys/kern/vfs_syscalls.c ============================================================================== --- head/sys/kern/vfs_syscalls.c Sat Aug 13 00:56:42 2011 (r224809) +++ head/sys/kern/vfs_syscalls.c Sat Aug 13 09:21:16 2011 (r224810) @@ -993,6 +993,41 @@ change_root(vp, td) return (0); } +static __inline cap_rights_t +flags_to_rights(int flags) +{ + cap_rights_t rights = 0; + + switch ((flags & O_ACCMODE)) { + case O_RDONLY: + rights |= CAP_READ; + break; + + case O_RDWR: + rights |= CAP_READ; + /* fall through */ + + case O_WRONLY: + rights |= CAP_WRITE; + break; + + case O_EXEC: + rights |= CAP_FEXECVE; + break; + } + + if (flags & O_CREAT) + rights |= CAP_CREATE; + + if (flags & O_TRUNC) + rights |= CAP_FTRUNCATE; + + if ((flags & O_EXLOCK) || (flags & O_SHLOCK)) + rights |= CAP_FLOCK; + + return (rights); +} + /* * Check permissions, allocate an open file structure, and call the device * open routine if any. @@ -1055,10 +1090,12 @@ kern_openat(struct thread *td, int fd, c struct flock lf; struct nameidata nd; int vfslocked; + cap_rights_t rights_needed = CAP_LOOKUP; AUDIT_ARG_FFLAGS(flags); AUDIT_ARG_MODE(mode); /* XXX: audit dirfd */ + rights_needed |= flags_to_rights(flags); /* * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags * may be specified. @@ -1082,8 +1119,8 @@ kern_openat(struct thread *td, int fd, c /* Set the flags early so the finit in devfs can pick them up. */ fp->f_flag = flags & FMASK; cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; - NDINIT_AT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, fd, - td); + NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, + path, fd, rights_needed, td); td->td_dupfd = -1; /* XXX check for fdopen */ error = vn_open(&nd, &flags, cmode, fp); if (error) { @@ -1092,18 +1129,20 @@ kern_openat(struct thread *td, int fd, c * wonderous happened deep below and we just pass it up * pretending we know what we do. */ - if (error == ENXIO && fp->f_ops != &badfileops) { - fdrop(fp, td); - td->td_retval[0] = indx; - return (0); - } + if (error == ENXIO && fp->f_ops != &badfileops) + goto success; /* * handle special fdopen() case. bleh. dupfdopen() is * responsible for dropping the old contents of ofiles[indx] * if it succeeds. + * + * Don't do this for relative (capability) lookups; we don't + * understand exactly what would happen, and we don't think + * that it ever should. */ - if ((error == ENODEV || error == ENXIO) && + if ((nd.ni_strictrelative == 0) && + (error == ENODEV || error == ENXIO) && (td->td_dupfd >= 0)) { /* XXX from fdopen */ if ((error = finstall(td, fp, &indx, flags)) != 0) @@ -1172,9 +1211,22 @@ success: /* * If we haven't already installed the FD (for dupfdopen), do so now. */ - if (indx == -1) - if ((error = finstall(td, fp, &indx, flags)) != 0) - goto bad_unlocked; + if (indx == -1) { +#ifdef CAPABILITIES + if (nd.ni_strictrelative == 1) { + /* + * We are doing a strict relative lookup; wrap the + * result in a capability. + */ + if ((error = kern_capwrap(td, fp, nd.ni_baserights, + &indx)) != 0) + goto bad_unlocked; + } else +#endif + if ((error = finstall(td, fp, &indx, flags)) != 0) + goto bad_unlocked; + + } /* * Release our private reference, leaving the one associated with @@ -1301,8 +1353,9 @@ kern_mknodat(struct thread *td, int fd, return (error); restart: bwillwrite(); - NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, - pathseg, path, fd, td); + NDINIT_ATRIGHTS(&nd, CREATE, + LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, pathseg, path, fd, + CAP_MKFIFO, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); @@ -2153,8 +2206,8 @@ kern_accessat(struct thread *td, int fd, } else cred = tmpcred = td->td_ucred; AUDIT_ARG_VALUE(mode); - NDINIT_AT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | - AUDITVNODE1, pathseg, path, fd, td); + NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | + AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td); if ((error = namei(&nd)) != 0) goto out1; vfslocked = NDHASGIANT(&nd); @@ -2363,9 +2416,9 @@ kern_statat_vnhook(struct thread *td, in if (flag & ~AT_SYMLINK_NOFOLLOW) return (EINVAL); - NDINIT_AT(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : + NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg, - path, fd, td); + path, fd, CAP_FSTAT, td); if ((error = namei(&nd)) != 0) return (error); @@ -2920,8 +2973,8 @@ kern_fchmodat(struct thread *td, int fd, AUDIT_ARG_MODE(mode); follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; - NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, path, - fd, td); + NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, + path, fd, CAP_FCHMOD, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); @@ -3063,8 +3116,8 @@ kern_fchownat(struct thread *td, int fd, AUDIT_ARG_OWNER(uid, gid); follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; - NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, path, - fd, td); + NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, + path, fd, CAP_FCHOWN, td); if ((error = namei(&nd)) != 0) return (error); @@ -3279,8 +3332,8 @@ kern_utimesat(struct thread *td, int fd, if ((error = getutimes(tptr, tptrseg, ts)) != 0) return (error); - NDINIT_AT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, - fd, td); + NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, + path, fd, CAP_FUTIMES, td); if ((error = namei(&nd)) != 0) return (error); @@ -3610,11 +3663,11 @@ kern_renameat(struct thread *td, int old bwillwrite(); #ifdef MAC - NDINIT_AT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE | - AUDITVNODE1, pathseg, old, oldfd, td); + NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | + MPSAFE | AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td); #else - NDINIT_AT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE | - AUDITVNODE1, pathseg, old, oldfd, td); + NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE | + AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td); #endif if ((error = namei(&fromnd)) != 0) @@ -3637,8 +3690,9 @@ kern_renameat(struct thread *td, int old vrele(fvp); goto out1; } - NDINIT_AT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | - MPSAFE | AUDITVNODE2, pathseg, new, newfd, td); + NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | + SAVESTART | MPSAFE | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE, + td); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&tond)) != 0) { @@ -3764,8 +3818,8 @@ kern_mkdirat(struct thread *td, int fd, AUDIT_ARG_MODE(mode); restart: bwillwrite(); - NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, - segflg, path, fd, td); + NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | + AUDITVNODE1, segflg, path, fd, CAP_MKDIR, td); nd.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&nd)) != 0) return (error); @@ -3853,8 +3907,8 @@ kern_rmdirat(struct thread *td, int fd, restart: bwillwrite(); - NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1, - pathseg, path, fd, td); + NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | + AUDITVNODE1, pathseg, path, fd, CAP_RMDIR, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); Modified: head/sys/sys/capability.h ============================================================================== --- head/sys/sys/capability.h Sat Aug 13 00:56:42 2011 (r224809) +++ head/sys/sys/capability.h Sat Aug 13 09:21:16 2011 (r224810) @@ -142,7 +142,7 @@ * Create a capability to wrap a file object. */ int kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights, - struct file **cap, int *capfd); + int *capfd); /* * Unwrap a capability if its rights mask is a superset of 'rights'. Modified: head/sys/sys/namei.h ============================================================================== --- head/sys/sys/namei.h Sat Aug 13 00:56:42 2011 (r224809) +++ head/sys/sys/namei.h Sat Aug 13 09:21:16 2011 (r224810) @@ -63,6 +63,7 @@ struct nameidata { */ const char *ni_dirp; /* pathname pointer */ enum uio_seg ni_segflg; /* location of pathname */ + cap_rights_t ni_rightsneeded; /* rights required to look up vnode */ /* * Arguments to lookup. */ @@ -70,6 +71,11 @@ struct nameidata { struct vnode *ni_rootdir; /* logical root directory */ struct vnode *ni_topdir; /* logical top directory */ int ni_dirfd; /* starting directory for *at functions */ + int ni_strictrelative; /* relative lookup only; no '..' */ + /* + * Results: returned from namei + */ + cap_rights_t ni_baserights; /* rights the *at base has (or -1) */ /* * Results: returned from/manipulated by lookup */ @@ -151,11 +157,13 @@ struct nameidata { * Initialization of a nameidata structure. */ #define NDINIT(ndp, op, flags, segflg, namep, td) \ - NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, td) + NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, 0, td) #define NDINIT_AT(ndp, op, flags, segflg, namep, dirfd, td) \ - NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, td) + NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, 0, td) +#define NDINIT_ATRIGHTS(ndp, op, flags, segflg, namep, dirfd, rights, td) \ + NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, rights, td) #define NDINIT_ATVP(ndp, op, flags, segflg, namep, vp, td) \ - NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, td) + NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, 0, td) static __inline void NDINIT_ALL(struct nameidata *ndp, @@ -164,6 +172,7 @@ NDINIT_ALL(struct nameidata *ndp, const char *namep, int dirfd, struct vnode *startdir, + cap_rights_t rights, struct thread *td) { ndp->ni_cnd.cn_nameiop = op; @@ -172,6 +181,9 @@ NDINIT_ALL(struct nameidata *ndp, ndp->ni_dirp = namep; ndp->ni_dirfd = dirfd; ndp->ni_startdir = startdir; + ndp->ni_strictrelative = 0; + ndp->ni_rightsneeded = rights; + ndp->ni_baserights = 0; ndp->ni_cnd.cn_thread = td; }