From owner-svn-src-all@freebsd.org Wed Nov 16 16:14:02 2016 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id EFC0DC4587C; Wed, 16 Nov 2016 16:14:02 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id C4DA7CE2; Wed, 16 Nov 2016 16:14:02 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id uAGGE1E2023388; Wed, 16 Nov 2016 16:14:01 GMT (envelope-from kib@FreeBSD.org) Received: (from kib@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id uAGGE16c023384; Wed, 16 Nov 2016 16:14:01 GMT (envelope-from kib@FreeBSD.org) Message-Id: <201611161614.uAGGE16c023384@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: kib set sender to kib@FreeBSD.org using -f From: Konstantin Belousov Date: Wed, 16 Nov 2016 16:14:01 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r308732 - in stable/11/sys: fs/nfsserver kern sys X-SVN-Group: stable-11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 16 Nov 2016 16:14:03 -0000 Author: kib Date: Wed Nov 16 16:14:01 2016 New Revision: 308732 URL: https://svnweb.freebsd.org/changeset/base/308732 Log: MFC r308212: Allow some dotdot lookups in capability mode. Modified: stable/11/sys/fs/nfsserver/nfs_nfsdport.c stable/11/sys/kern/vfs_lookup.c stable/11/sys/kern/vfs_syscalls.c stable/11/sys/sys/namei.h Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/fs/nfsserver/nfs_nfsdport.c ============================================================================== --- stable/11/sys/fs/nfsserver/nfs_nfsdport.c Wed Nov 16 15:21:32 2016 (r308731) +++ stable/11/sys/fs/nfsserver/nfs_nfsdport.c Wed Nov 16 16:14:01 2016 (r308732) @@ -350,7 +350,7 @@ nfsvno_namei(struct nfsrv_descript *nd, *retdirp = NULL; cnp->cn_nameptr = cnp->cn_pnbuf; - ndp->ni_strictrelative = 0; + ndp->ni_lcf = 0; /* * Extract and set starting directory. */ Modified: stable/11/sys/kern/vfs_lookup.c ============================================================================== --- stable/11/sys/kern/vfs_lookup.c Wed Nov 16 15:21:32 2016 (r308731) +++ stable/11/sys/kern/vfs_lookup.c Wed Nov 16 16:14:01 2016 (r308732) @@ -79,12 +79,22 @@ uma_zone_t namei_zone; /* Placeholder vnode for mp traversal. */ static struct vnode *vp_crossmp; +struct nameicap_tracker { + struct vnode *dp; + TAILQ_ENTRY(nameicap_tracker) nm_link; +}; + +/* Zone for cap mode tracker elements used for dotdot capability checks. */ +static uma_zone_t nt_zone; + static void nameiinit(void *dummy __unused) { namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + nt_zone = uma_zcreate("rentr", sizeof(struct nameicap_tracker), + NULL, NULL, NULL, NULL, sizeof(void *), 0); getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp); vn_lock(vp_crossmp, LK_EXCLUSIVE); VN_LOCK_ASHARE(vp_crossmp); @@ -96,6 +106,76 @@ static int lookup_shared = 1; SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RWTUN, &lookup_shared, 0, "enables shared locks for path name translation"); +/* + * Intent is that lookup_cap_dotdot becomes unconditionally enabled, + * but it defaults to the disabled state until verification efforts + * are complete. + */ +static int lookup_cap_dotdot = 0; +SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot, CTLFLAG_RWTUN, + &lookup_cap_dotdot, 0, + "enables \"..\" components in path lookup in capability mode"); +static int lookup_cap_dotdot_nonlocal = 0; +SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot_nonlocal, CTLFLAG_RWTUN, + &lookup_cap_dotdot_nonlocal, 0, + "enables \"..\" components in path lookup in capability mode " + "on non-local mount"); + +static void +nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp) +{ + struct nameicap_tracker *nt; + + if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR) + return; + nt = uma_zalloc(nt_zone, M_WAITOK); + vhold(dp); + nt->dp = dp; + TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link); +} + +static void +nameicap_cleanup(struct nameidata *ndp) +{ + struct nameicap_tracker *nt, *nt1; + + KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) || + (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative")); + TAILQ_FOREACH_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { + TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link); + vdrop(nt->dp); + uma_zfree(nt_zone, nt); + } +} + +/* + * For dotdot lookups in capability mode, only allow the component + * lookup to succeed if the resulting directory was already traversed + * during the operation. Also fail dotdot lookups for non-local + * filesystems, where external agents might assist local lookups to + * escape the compartment. + */ +static int +nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp) +{ + struct nameicap_tracker *nt; + struct mount *mp; + + if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp == NULL || + dp->v_type != VDIR) + return (0); + mp = dp->v_mount; + if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL && + (mp->mnt_flag & MNT_LOCAL) == 0) + return (ENOTCAPABLE); + TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head, + nm_link) { + if (dp == nt->dp) + return (0); + } + return (ENOTCAPABLE); +} + static void namei_cleanup_cnp(struct componentname *cnp) { @@ -113,7 +193,7 @@ namei_handle_root(struct nameidata *ndp, struct componentname *cnp; cnp = &ndp->ni_cnd; - if (ndp->ni_strictrelative != 0) { + if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); @@ -177,6 +257,8 @@ namei(struct nameidata *ndp) if (!lookup_shared) cnp->cn_flags &= ~LOCKSHARED; fdp = p->p_fd; + TAILQ_INIT(&ndp->ni_cap_tracker); + ndp->ni_lcf = 0; /* We will set this ourselves if we need it. */ cnp->cn_flags &= ~TRAILINGSLASH; @@ -202,13 +284,21 @@ namei(struct nameidata *ndp) #ifdef CAPABILITY_MODE /* - * In capability mode, lookups must be "strictly relative" (i.e. - * not an absolute path, and not containing '..' components) to - * a real file descriptor, not the pseudo-descriptor AT_FDCWD. + * In capability mode, lookups must be restricted to happen in + * the subtree with the root specified by the file descriptor: + * - The root must be real file descriptor, not the pseudo-descriptor + * AT_FDCWD. + * - The passed path must be relative and not absolute. + * - If lookup_cap_dotdot is disabled, path must not contain the + * '..' components. + * - If lookup_cap_dotdot is enabled, we verify that all '..' + * components lookups result in the directories which were + * previously walked by us, which prevents an escape from + * the relative root. */ if (error == 0 && IN_CAPABILITY_MODE(td) && (cnp->cn_flags & NOCAPCHECK) == 0) { - ndp->ni_strictrelative = 1; + ndp->ni_lcf |= NI_LCF_STRICTRELATIVE; if (ndp->ni_dirfd == AT_FDCWD) { #ifdef KTRACE if (KTRPOINT(td, KTR_CAPFAIL)) @@ -282,7 +372,7 @@ namei(struct nameidata *ndp) &rights) || ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL || ndp->ni_filecaps.fc_nioctls != -1) { - ndp->ni_strictrelative = 1; + ndp->ni_lcf |= NI_LCF_STRICTRELATIVE; } #endif } @@ -299,6 +389,9 @@ namei(struct nameidata *ndp) namei_cleanup_cnp(cnp); return (error); } + if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0 && + lookup_cap_dotdot != 0) + ndp->ni_lcf |= NI_LCF_CAP_DOTDOT; SDT_PROBE3(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf, cnp->cn_flags); for (;;) { @@ -319,7 +412,7 @@ namei(struct nameidata *ndp) namei_cleanup_cnp(cnp); } else cnp->cn_flags |= HASBUF; - + nameicap_cleanup(ndp); SDT_PROBE2(vfs, namei, lookup, return, 0, ndp->ni_vp); return (0); } @@ -395,6 +488,7 @@ namei(struct nameidata *ndp) vput(ndp->ni_vp); ndp->ni_vp = NULL; vrele(ndp->ni_dvp); + nameicap_cleanup(ndp); SDT_PROBE2(vfs, namei, lookup, return, error, NULL); return (error); } @@ -591,6 +685,8 @@ dirloop: goto bad; } + nameicap_tracker_add(ndp, dp); + /* * Check for degenerate name (e.g. / or "") * which is a way of talking about a directory, @@ -626,9 +722,8 @@ dirloop: /* * Handle "..": five special cases. - * 0. If doing a capability lookup, return ENOTCAPABLE (this is a - * fairly conservative design choice, but it's the only one that we - * are satisfied guarantees the property we're looking for). + * 0. If doing a capability lookup and lookup_cap_dotdot is + * disabled, return ENOTCAPABLE. * 1. Return an error if this is the last component of * the name and the operation is DELETE or RENAME. * 2. If at root directory (e.g. after chroot) @@ -640,9 +735,15 @@ dirloop: * .. in the other filesystem. * 4. If the vnode is the top directory of * the jail or chroot, don't let them out. + * 5. If doing a capability lookup and lookup_cap_dotdot is + * enabled, return ENOTCAPABLE if the lookup would escape + * from the initial file descriptor directory. Checks are + * done by ensuring that namei() already traversed the + * result of dotdot lookup. */ if (cnp->cn_flags & ISDOTDOT) { - if (ndp->ni_strictrelative != 0) { + if ((ndp->ni_lcf & (NI_LCF_STRICTRELATIVE | NI_LCF_CAP_DOTDOT)) + == NI_LCF_STRICTRELATIVE) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); @@ -684,6 +785,14 @@ dirloop: vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, ISDOTDOT)); + error = nameicap_check_dotdot(ndp, dp); + if (error != 0) { +#ifdef KTRACE + if (KTRPOINT(curthread, KTR_CAPFAIL)) + ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); +#endif + goto bad; + } } } @@ -743,6 +852,7 @@ unionlookup: vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, cnp->cn_flags)); + nameicap_tracker_add(ndp, dp); goto unionlookup; } @@ -863,6 +973,16 @@ nextname: vrele(ndp->ni_dvp); goto dirloop; } + if (cnp->cn_flags & ISDOTDOT) { + error = nameicap_check_dotdot(ndp, ndp->ni_vp); + if (error != 0) { +#ifdef KTRACE + if (KTRPOINT(curthread, KTR_CAPFAIL)) + ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); +#endif + goto bad2; + } + } if (*ndp->ni_next == '/') { cnp->cn_nameptr = ndp->ni_next; while (*cnp->cn_nameptr == '/') { @@ -1089,7 +1209,6 @@ NDINIT_ALL(struct nameidata *ndp, u_long ndp->ni_dirp = namep; ndp->ni_dirfd = dirfd; ndp->ni_startdir = startdir; - ndp->ni_strictrelative = 0; if (rightsp != NULL) ndp->ni_rightsneeded = *rightsp; else Modified: stable/11/sys/kern/vfs_syscalls.c ============================================================================== --- stable/11/sys/kern/vfs_syscalls.c Wed Nov 16 15:21:32 2016 (r308731) +++ stable/11/sys/kern/vfs_syscalls.c Wed Nov 16 16:14:01 2016 (r308732) @@ -1012,7 +1012,7 @@ kern_openat(struct thread *td, int fd, c * understand exactly what would happen, and we don't think * that it ever should. */ - if (nd.ni_strictrelative == 0 && + if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && (error == ENODEV || error == ENXIO) && td->td_dupfd >= 0) { error = dupfdopen(td, fdp, td->td_dupfd, flags, error, @@ -1058,7 +1058,7 @@ success: struct filecaps *fcaps; #ifdef CAPABILITIES - if (nd.ni_strictrelative == 1) + if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) fcaps = &nd.ni_filecaps; else #endif Modified: stable/11/sys/sys/namei.h ============================================================================== --- stable/11/sys/sys/namei.h Wed Nov 16 15:21:32 2016 (r308731) +++ stable/11/sys/sys/namei.h Wed Nov 16 16:14:01 2016 (r308732) @@ -55,6 +55,9 @@ struct componentname { long cn_namelen; /* length of looked up component */ }; +struct nameicap_tracker; +TAILQ_HEAD(nameicap_tracker_head, nameicap_tracker); + /* * Encapsulation of namei parameters. */ @@ -72,7 +75,7 @@ struct nameidata { struct vnode *ni_rootdir; /* logical root directory */ struct vnode *ni_topdir; /* logical top directory */ int ni_dirfd; /* starting directory for *at functions */ - int ni_strictrelative; /* relative lookup only; no '..' */ + int ni_lcf; /* local call flags */ /* * Results: returned from namei */ @@ -94,6 +97,7 @@ struct nameidata { * through the VOP interface. */ struct componentname ni_cnd; + struct nameicap_tracker_head ni_cap_tracker; }; #ifdef _KERNEL @@ -152,6 +156,12 @@ struct nameidata { #define PARAMASK 0x3ffffe00 /* mask of parameter descriptors */ /* + * Flags in ni_lcf, valid for the duration of the namei call. + */ +#define NI_LCF_STRICTRELATIVE 0x0001 /* relative lookup only */ +#define NI_LCF_CAP_DOTDOT 0x0002 /* ".." in strictrelative case */ + +/* * Initialization of a nameidata structure. */ #define NDINIT(ndp, op, flags, segflg, namep, td) \