From owner-freebsd-current@FreeBSD.ORG  Fri May  7 02:22:39 2004
Return-Path: <owner-freebsd-current@FreeBSD.ORG>
Delivered-To: freebsd-current@www.freebsd.org
Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125])
	by hub.freebsd.org (Postfix) with ESMTP id DC2A916A4CE
	for <freebsd-current@www.freebsd.org>;
	Fri,  7 May 2004 02:22:39 -0700 (PDT)
Received: from mx2.freebsd.org (mx2.freebsd.org [216.136.204.119])
	by mx1.FreeBSD.org (Postfix) with ESMTP id 2C8D043D3F
	for <freebsd-current@www.freebsd.org>;
	Fri,  7 May 2004 02:22:39 -0700 (PDT)
	(envelope-from marcolz@stack.nl)
Received: from hub.freebsd.org (hub.freebsd.org [216.136.204.18])
	by mx2.freebsd.org (Postfix) with ESMTP id 1B6B855E8A
	for <freebsd-current@lists.freebsd.org>;
	Fri,  7 May 2004 02:22:39 -0700 (PDT)
	(envelope-from marcolz@stack.nl)
Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125])
	by hub.freebsd.org (Postfix) with ESMTP id 151DC16A4CF
	for <freebsd-current@lists.freebsd.org>;
	Fri,  7 May 2004 02:22:39 -0700 (PDT)
Received: from hexagon.stack.nl (hexagon.stack.nl [131.155.140.144])
	by mx1.FreeBSD.org (Postfix) with ESMTP id AAB1D43D49
	for <freebsd-current@lists.freebsd.org>;
	Fri,  7 May 2004 02:22:35 -0700 (PDT)
	(envelope-from marcolz@stack.nl)
Received: from hammer.stack.nl (hammer.stack.nl
	[IPv6:2001:610:1108:5010::153])
	by hexagon.stack.nl (Postfix) with ESMTP id A24424CA9;
	Fri,  7 May 2004 11:22:34 +0200 (CEST)
Received: by hammer.stack.nl (Postfix, from userid 333)
	id 456B565F4; Fri,  7 May 2004 11:22:35 +0200 (CEST)
Date: Fri, 7 May 2004 11:22:35 +0200
From: Marc Olzheim <marcolz@stack.nl>
To: Bruce M Simpson <bms@spc.org>
Message-ID: <20040507092235.GA61837@stack.nl>
Mime-Version: 1.0
Content-Type: multipart/signed; micalg=pgp-sha1;
	protocol="application/pgp-signature"; boundary="K8nIJk4ghYZn606h"
Content-Disposition: inline
X-Operating-System: FreeBSD hammer.stack.nl 5.2-CURRENT FreeBSD 5.2-CURRENT
X-URL: http://www.stack.nl/~marcolz/
User-Agent: Mutt/1.5.6i
cc: Marc Olzheim <marcolz@stack.nl>
cc: Poul-Henning Kamp <phk@phk.freebsd.dk>
cc: freebsd-current@lists.freebsd.org
Subject: Re: Unified getcwd() implementation
X-BeenThere: freebsd-current@freebsd.org
X-Mailman-Version: 2.1.1
Precedence: list
List-Id: Discussions about the use of FreeBSD-current
	<freebsd-current.freebsd.org>
List-Unsubscribe: <http://lists.freebsd.org/mailman/listinfo/freebsd-current>,
	<mailto:freebsd-current-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/freebsd-current>
List-Post: <mailto:freebsd-current@freebsd.org>
List-Help: <mailto:freebsd-current-request@freebsd.org?subject=help>
List-Subscribe: <http://lists.freebsd.org/mailman/listinfo/freebsd-current>,
	<mailto:freebsd-current-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Fri, 07 May 2004 09:22:40 -0000


--K8nIJk4ghYZn606h
Content-Type: multipart/mixed; boundary="17pEHd4RhPHOinZp"
Content-Disposition: inline


--17pEHd4RhPHOinZp
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Hi,

(Re: http://lists.freebsd.org/pipermail/freebsd-arch/2003-August/001152.html)

> Yes, it's quite an old patch, and much has happened since it was written.

Mostly some fine-grained locking was introduced.

I hope I got everything covered. Here Bruce's patch reworked, that works
for me. (even over NFS ;-))

Anyone care to share their view on it ?

Marc

--17pEHd4RhPHOinZp
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="getcwd.patch.txt"
Content-Transfer-Encoding: quoted-printable

--- /usr/src/sys/kern/vfs_cache.c	Mon Apr  5 23:03:36 2004
+++ /usr/src/sys/kern/vfs_cache.c	Fri May  7 10:39:48 2004
@@ -35,6 +35,8 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD: src/sys/kern/vfs_cache.c,v 1.87 2004/04/05 21:03:36 im=
p Exp $");
=20
+#include "opt_mac.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -51,8 +53,11 @@
 #include <sys/filedesc.h>
 #include <sys/fnv_hash.h>
=20
-#include <vm/uma.h>
-
+#include <sys/file.h>
+#include <sys/uio.h>
+#include <sys/mac.h>
+#include <sys/dirent.h>
+#include <ufs/ufs/dir.h>	/* XXX only for DIRBLKSIZ */
 /*
  * This structure describes the elements in the cache of recent
  * names looked up by namei.
@@ -69,6 +74,8 @@
 	char	nc_name[0];		/* segment name */
 };
=20
+#include <vm/uma.h>
+
 /*
  * Name caching works as follows:
  *
@@ -778,6 +785,13 @@
 };
 #endif
=20
+static int getcwd_impl(struct vnode *lvp, struct vnode *rvp, char **bpp,
+	char *bufp, struct thread *td);
+static int getcwd_scandir(struct vnode **, struct vnode **, char **, char =
*,
+	struct thread *);
+
+#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
+
 /*
  * XXX All of these sysctls would probably be more productive dead.
  */
@@ -791,6 +805,7 @@
 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2);
 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3);
 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4);
+static u_long numcwdfail5; STATNODE(CTLFLAG_RD, numcwdfail5, &numcwdfail5);
 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound);
=20
 /* Implementation of the getcwd syscall */
@@ -803,14 +818,20 @@
 	return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen));
 }
=20
+/*
+ * Find pathname of process's current directory.
+ *
+ * Use vfs vnode-to-name reverse cache; if that fails, fall back
+ * to reading directory contents.
+ */
 int
 kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int b=
uflen)
 {
-	char *bp, *tmpbuf;
-	int error, i, slash_prefixed;
+	char *bp, *bufp;
+	int error;
 	struct filedesc *fdp;
-	struct namecache *ncp;
-	struct vnode *vp;
+	struct vnode *rvp;
+	struct vnode *lvp;
=20
 	numcwdcalls++;
 	if (disablecwd)
@@ -819,82 +840,422 @@
 		return (EINVAL);
 	if (buflen > MAXPATHLEN)
 		buflen =3D MAXPATHLEN;
+
+	buflen *=3D 4; /* XXX */
+
 	error =3D 0;
-	tmpbuf =3D bp =3D malloc(buflen, M_TEMP, M_WAITOK);
+	bufp =3D bp =3D malloc(buflen, M_TEMP, M_WAITOK);
 	bp +=3D buflen - 1;
 	*bp =3D '\0';
+
 	fdp =3D td->td_proc->p_fd;
-	slash_prefixed =3D 0;
+
 	FILEDESC_LOCK(fdp);
-	for (vp =3D fdp->fd_cdir; vp !=3D fdp->fd_rdir && vp !=3D rootvnode;) {
-		if (vp->v_vflag & VV_ROOT) {
-			if (vp->v_mount =3D=3D NULL) {	/* forced unmount */
-				FILEDESC_UNLOCK(fdp);
-				free(tmpbuf, M_TEMP);
-				return (EBADF);
-			}
-			vp =3D vp->v_mount->mnt_vnodecovered;
-			continue;
+
+	rvp =3D fdp->fd_rdir;
+	if (rvp =3D=3D NULL)
+		rvp =3D rootvnode;
+	VREF(rvp);
+
+	lvp =3D fdp->fd_cdir;
+
+	/* We have all the info we need from fdp. */
+	FILEDESC_UNLOCK(fdp);
+
+	error =3D getcwd_impl(lvp, rvp, &bp, bufp, td);
+	if (!error) {
+		numcwdfound++;
+		if (bufseg =3D=3D UIO_SYSSPACE)
+			bcopy(bp, buf, strlen(bp) + 1);
+		else
+			error =3D copyout(bp, buf, strlen(bp) + 1);
+	} else {
+#if DIAGNOSTIC
+		printf("getcwd: error %d\n", error);
+#endif
+	}
+
+	vrele(rvp);
+	free(bufp, M_TEMP);
+	return (error);
+}
+
+/*
+ * Recursively advance up the directory hierarchy from lvp to rvp.
+ * Look in the namecache first for each path component; if it cannot
+ * be found, use scandir to find it. Copy the result into the buffer
+ * pointed to by bufp from *bpp onwards.
+ */
+
+static int
+getcwd_impl(lvp, rvp, bpp, bufp, td)
+	struct vnode *lvp;
+	struct vnode *rvp;
+	char **bpp;
+	char *bufp;
+	struct thread *td;
+{
+	int i, error, slash_prefixed;
+	struct namecache *ncp;
+	struct vnode *uvp;
+	struct vnode *tvp;
+	char *bp;
+
+	error =3D slash_prefixed =3D 0;
+	uvp =3D NULL;
+
+	VREF(lvp);
+	error =3D vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td);
+	if (error) {
+		numcwdfail5++;
+		vrele(lvp);
+		lvp =3D NULL;
+		goto out;
+	}
+	if (bufp)
+		bp =3D *bpp;
+
+	while (lvp !=3D rvp) {
+#if 1
+		if (lvp->v_type !=3D VDIR) {
+			numcwdfail1++;
+			error =3D ENOTDIR;
+			goto out;
 		}
-		if (vp->v_dd->v_id !=3D vp->v_ddid) {
-			FILEDESC_UNLOCK(fdp);
+#else
+		if (lvp->v_dd->v_id !=3D lvp->v_ddid) {
 			numcwdfail1++;
-			free(tmpbuf, M_TEMP);
-			return (ENOTDIR);
+			error =3D ENOTDIR;
+			goto out;
 		}
-		CACHE_LOCK();
-		ncp =3D TAILQ_FIRST(&vp->v_cache_dst);
-		if (!ncp) {
-			numcwdfail2++;
-			CACHE_UNLOCK();
-			FILEDESC_UNLOCK(fdp);
-			free(tmpbuf, M_TEMP);
-			return (ENOENT);
+#endif
+		/*
+		 * step up if we're a covered vnode.
+		 */
+		while (lvp->v_vflag & VV_ROOT) {
+			if (lvp =3D=3D rvp)
+				goto out;
+			tvp =3D lvp;
+			lvp =3D lvp->v_mount->mnt_vnodecovered;
+			vput(tvp);
+			if (lvp =3D=3D NULL) {
+				numcwdfail3++;
+				error =3D ENOENT;
+				goto out;
+			}
+			VREF(lvp);
+			error =3D vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td);
+			if (error !=3D 0) {
+				numcwdfail5++;
+				vrele(lvp);
+				lvp =3D NULL;
+				goto out;
+			}
 		}
-		if (ncp->nc_dvp !=3D vp->v_dd) {
-			numcwdfail3++;
-			CACHE_UNLOCK();
-			FILEDESC_UNLOCK(fdp);
-			free(tmpbuf, M_TEMP);
-			return (EBADF);
-		}
-		for (i =3D ncp->nc_nlen - 1; i >=3D 0; i--) {
-			if (bp =3D=3D tmpbuf) {
-				numcwdfail4++;
+		/*
+		 * look in the cache first
+		 */
+		CACHE_LOCK();
+		ncp =3D TAILQ_FIRST(&lvp->v_cache_dst);
+#if DIAGNOSTIC
+		/* XXX simulate cache failure every 10 lookups */
+		if ((numcwdcalls % 10) =3D=3D 0)
+			ncp =3D NULL;
+#endif
+		if (ncp) {
+			if (ncp->nc_dvp !=3D lvp->v_dd) {
 				CACHE_UNLOCK();
-				FILEDESC_UNLOCK(fdp);
-				free(tmpbuf, M_TEMP);
-				return (ENOMEM);
+				numcwdfail3++;
+				error =3D EBADF;
+				goto out;
+			}
+			for (i =3D ncp->nc_nlen - 1; i >=3D 0; i--) {
+				if (bp =3D=3D bufp) {
+					CACHE_UNLOCK();
+					numcwdfail4++;
+					error =3D ENOMEM;
+					goto out;
+				}
+				*--bp =3D ncp->nc_name[i];
 			}
-			*--bp =3D ncp->nc_name[i];
-		}
-		if (bp =3D=3D tmpbuf) {
-			numcwdfail4++;
 			CACHE_UNLOCK();
-			FILEDESC_UNLOCK(fdp);
-			free(tmpbuf, M_TEMP);
-			return (ENOMEM);
+			/*
+			 * must ensure lvp is always locked and ref'd
+			 */
+			tvp =3D lvp;		=09
+			lvp =3D lvp->v_dd;
+			vput(tvp);
+			if (lvp =3D=3D NULL) {
+				numcwdfail3++;
+				error =3D ENOENT;
+				goto out;
+			}
+			VREF(lvp);
+			error =3D vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td);
+			if (error !=3D 0) {
+				numcwdfail5++;
+				vrele(lvp);
+				lvp =3D NULL;
+				goto out;
+			}
+		} else {
+			CACHE_UNLOCK();
+			numcwdfail2++;
+#if DIAGNOSTIC
+			printf("getcwd: using scandir\n");
+#endif
+			error =3D getcwd_scandir(&lvp, &uvp, &bp, bufp, td);
+			if (error) {
+#if DIAGNOSTIC
+				printf("getcwd_scandir returned %d\n", error);
+#endif
+				goto out;
+			}
+#if DIAGNOSTIC
+			if (lvp !=3D NULL)
+				panic("getcwd: oops, forgot to null lvp");
+#endif
+			lvp =3D uvp;
+			uvp =3D NULL;
 		}
+#if DIAGNOSTIC
+		if (bufp && (bp <=3D bufp))
+			panic("getcwd: oops, went back too far");
+#endif
 		*--bp =3D '/';
 		slash_prefixed =3D 1;
-		vp =3D vp->v_dd;
-		CACHE_UNLOCK();
 	}
-	FILEDESC_UNLOCK(fdp);
+
 	if (!slash_prefixed) {
-		if (bp =3D=3D tmpbuf) {
+		if (bp =3D=3D bufp) {
 			numcwdfail4++;
-			free(tmpbuf, M_TEMP);
-			return (ENOMEM);
+			error =3D ENOMEM;
+			goto out;
 		}
 		*--bp =3D '/';
 	}
-	numcwdfound++;
-	if (bufseg =3D=3D UIO_SYSSPACE)
-		bcopy(bp, buf, strlen(bp) + 1);
-	else
-		error =3D copyout(bp, buf, strlen(bp) + 1);
-	free(tmpbuf, M_TEMP);
+out:
+	if (bpp)
+		*bpp =3D bp;
+	if (uvp)
+		vput(uvp);
+	if (lvp)
+		vput(lvp);
+	return (error);
+}
+
+/*
+ * Vnode variable naming conventions in this file:
+ *
+ * rvp: the current root we're aiming towards.
+ * lvp, *lvpp: the "lower" vnode
+ * uvp, *uvpp: the "upper" vnode.
+ *
+ * Since all the vnodes we're dealing with are directories, and the
+ * lookups are going *up* in the filesystem rather than *down*, the
+ * usual "pvp" (parent) or "dvp" (directory) naming conventions are
+ * too confusing.
+ */
+
+/*
+ * XXX Will infinite loop in certain cases if a directory read reliably
+ *	returns EINVAL on last block.
+ * XXX is EINVAL the right thing to return if a directory is malformed?
+ */
+
+/*
+ * XXX Untested vs. mount -o union; probably does the wrong thing.
+ */
+
+/*
+ * Find parent vnode of *lvpp, return in *uvpp
+ *
+ * If we care about the name, scan it looking for name of directory
+ * entry pointing at lvp.
+ *
+ * Place the name in the buffer which starts at bufp, immediately
+ * before *bpp, and move bpp backwards to point at the start of it.
+ *
+ * On entry, *lvpp is a locked vnode reference; on exit, it is vput and NU=
LL'ed
+ * On exit, *uvpp is either NULL or is a locked vnode reference.
+ */
+
+static int
+getcwd_scandir(lvpp, uvpp, bpp, bufp, td)
+	struct vnode **lvpp;
+	struct vnode **uvpp;
+	char **bpp;
+	char *bufp;
+	struct thread *td;
+{
+	int	error =3D 0;
+	int	eofflag;
+	off_t	off;
+	int	tries;
+	struct uio uio;
+	struct iovec iov;
+	char	*dirbuf =3D NULL;
+	int	dirbuflen;
+	ino_t   fileno;
+	struct vattr va;
+	struct vnode *uvp =3D NULL;
+	struct vnode *lvp =3D *lvpp;
+	struct componentname cn;
+	int len, reclen;
+	tries =3D 0;
+
+	/*
+	 * If we want the filename, get some info we need while the
+	 * current directory is still locked.
+	 */
+	if (bufp !=3D NULL) {
+		error =3D VOP_GETATTR(lvp, &va, td->td_ucred, td);
+		if (error) {
+			vput(lvp);
+			*lvpp =3D NULL;
+			*uvpp =3D NULL;
+#if DIAGNOSTICS
+			printf("VOP_GETATTR returned %d", error);
+#endif
+			return error;
+		}
+	}
+
+	/*
+	 * Ok, we have to do it the hard way..
+	 * Next, get parent vnode using lookup of ..
+	 */
+	cn.cn_nameiop =3D LOOKUP;
+	cn.cn_flags =3D ISLASTCN | ISDOTDOT | RDONLY;
+	cn.cn_thread =3D td;
+	cn.cn_cred =3D td->td_ucred;
+	cn.cn_pnbuf =3D NULL;
+	cn.cn_nameptr =3D "..";
+	cn.cn_namelen =3D 2;
+	cn.cn_consume =3D 0;
+
+	/*
+	 * At this point, lvp is locked and will be unlocked by the lookup.
+	 * On successful return, *uvpp will be locked
+	 */
+	error =3D VOP_LOOKUP(lvp, uvpp, &cn);
+	if (error) {
+		vput(lvp);
+		*lvpp =3D NULL;
+		*uvpp =3D NULL;
+		return error;
+	}
+	uvp =3D *uvpp;
+
+	/* If we don't care about the pathname, we're done */
+	if (bufp =3D=3D NULL) {
+		vrele(lvp);
+		*lvpp =3D NULL;
+		return 0;
+	}
+
+	fileno =3D va.va_fileid;
+
+	dirbuflen =3D DIRBLKSIZ;
+	if (dirbuflen < va.va_blocksize)
+		dirbuflen =3D va.va_blocksize;
+	dirbuf =3D (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
+
+#if 0
+unionread:
+#endif
+	off =3D 0;
+	do {
+		/* call VOP_READDIR of parent */
+		iov.iov_base =3D dirbuf;
+		iov.iov_len =3D dirbuflen;
+
+		uio.uio_iov =3D &iov;
+		uio.uio_iovcnt =3D 1;
+		uio.uio_offset =3D off;
+		uio.uio_resid =3D dirbuflen;
+		uio.uio_segflg =3D UIO_SYSSPACE;
+		uio.uio_rw =3D UIO_READ;
+		uio.uio_td =3D td;
+
+		eofflag =3D 0;
+
+#ifdef MAC
+		error =3D mac_check_vnode_readdir(td->td_ucred, uvp);
+		if (error =3D=3D 0)
+#endif /* MAC */
+			error =3D VOP_READDIR(uvp, &uio, td->td_ucred, &eofflag,
+			    0, 0);
+
+		off =3D uio.uio_offset;
+
+		/*
+		 * Try again if NFS tosses its cookies.
+		 * XXX this can still loop forever if the directory is busted
+		 * such that the second or subsequent page of it always
+		 * returns EINVAL
+		 */
+		if ((error =3D=3D EINVAL) && (tries < 3)) {
+			off =3D 0;
+			tries++;
+			continue;	/* once more, with feeling */
+		}
+
+		if (!error) {
+			char   *cpos;
+			struct dirent *dp;
+		=09
+			cpos =3D dirbuf;
+			tries =3D 0;
+			=09
+			/* scan directory page looking for matching vnode */=20
+			for (len =3D (dirbuflen - uio.uio_resid);
+			     len > 0;
+			     len -=3D reclen)
+			{
+				dp =3D (struct dirent *) cpos;
+				reclen =3D dp->d_reclen;
+
+				/* check for malformed directory.. */
+				if (reclen < DIRENT_MINSIZE) {
+					error =3D EINVAL;
+					goto out;
+				}
+				/*
+				 * XXX should perhaps do VOP_LOOKUP to
+				 * check that we got back to the right place,
+				 * but getting the locking games for that
+				 * right would be heinous.
+				 */
+				if ((dp->d_type !=3D DT_WHT) &&
+				    (dp->d_fileno =3D=3D fileno)) {
+					char *bp =3D *bpp;
+					bp -=3D dp->d_namlen;
+#if DIAGNOSTIC
+					printf("bp: %p bufp: %p dp->d_name: %s"
+					       " (%d)\n",
+					       bp, bufp,
+					       dp->d_name, dp->d_namlen);
+#endif
+					if (bp <=3D bufp) {
+						error =3D ERANGE;
+						goto out;
+					}
+					bcopy(dp->d_name, bp, dp->d_namlen);
+					error =3D 0;
+					*bpp =3D bp;
+					goto out;
+				}
+				cpos +=3D reclen;
+			}
+		}
+	} while (!eofflag);
+	error =3D ENOENT;
+
+out:
+	vrele(lvp);
+	*lvpp =3D NULL;
+	free(dirbuf, M_TEMP);
 	return (error);
 }
=20
--- /usr/src/sys/compat/linux/linux_getcwd.c	Mon Nov 17 19:57:20 2003
+++ /usr/src/sys/compat/linux/linux_getcwd.c	Fri May  7 11:12:28 2004
@@ -40,435 +40,27 @@
 __FBSDID("$FreeBSD: src/sys/compat/linux/linux_getcwd.c,v 1.14 2003/11/17 =
18:57:20 rwatson Exp $");
=20
 #include "opt_compat.h"
-#include "opt_mac.h"
=20
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/namei.h>
-#include <sys/filedesc.h>
-#include <sys/kernel.h>
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <sys/syscallsubr.h>
-#include <sys/vnode.h>
-#include <sys/mount.h>
 #include <sys/proc.h>
-#include <sys/uio.h>
-#include <sys/mac.h>
-#include <sys/malloc.h>
-#include <sys/dirent.h>
-#include <ufs/ufs/dir.h>	/* XXX only for DIRBLKSIZ */
+#include <sys/syscallsubr.h>
=20
 #include <machine/../linux/linux.h>
 #include <machine/../linux/linux_proto.h>
 #include <compat/linux/linux_util.h>
=20
-static int
-linux_getcwd_scandir(struct vnode **, struct vnode **,
-    char **, char *, struct thread *);
-static int
-linux_getcwd_common(struct vnode *, struct vnode *,
-		   char **, char *, int, int, struct thread *);
-
-#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
-
-/*
- * Vnode variable naming conventions in this file:
- *
- * rvp: the current root we're aiming towards.
- * lvp, *lvpp: the "lower" vnode
- * uvp, *uvpp: the "upper" vnode.
- *
- * Since all the vnodes we're dealing with are directories, and the
- * lookups are going *up* in the filesystem rather than *down*, the
- * usual "pvp" (parent) or "dvp" (directory) naming conventions are
- * too confusing.
- */
-
-/*
- * XXX Will infinite loop in certain cases if a directory read reliably
- *	returns EINVAL on last block.
- * XXX is EINVAL the right thing to return if a directory is malformed?
- */
-
-/*
- * XXX Untested vs. mount -o union; probably does the wrong thing.
- */
-
-/*
- * Find parent vnode of *lvpp, return in *uvpp
- *
- * If we care about the name, scan it looking for name of directory
- * entry pointing at lvp.
- *
- * Place the name in the buffer which starts at bufp, immediately
- * before *bpp, and move bpp backwards to point at the start of it.
- *
- * On entry, *lvpp is a locked vnode reference; on exit, it is vput and NU=
LL'ed
- * On exit, *uvpp is either NULL or is a locked vnode reference.
- */
-static int
-linux_getcwd_scandir(lvpp, uvpp, bpp, bufp, td)
-	struct vnode **lvpp;
-	struct vnode **uvpp;
-	char **bpp;
-	char *bufp;
-	struct thread *td;
-{
-	int     error =3D 0;
-	int     eofflag;
-	off_t   off;
-	int     tries;
-	struct uio uio;
-	struct iovec iov;
-	char   *dirbuf =3D NULL;
-	int	dirbuflen;
-	ino_t   fileno;
-	struct vattr va;
-	struct vnode *uvp =3D NULL;
-	struct vnode *lvp =3D *lvpp;=09
-	struct componentname cn;
-	int len, reclen;
-	tries =3D 0;
-
-	/*
-	 * If we want the filename, get some info we need while the
-	 * current directory is still locked.
-	 */
-	if (bufp !=3D NULL) {
-		error =3D VOP_GETATTR(lvp, &va, td->td_ucred, td);
-		if (error) {
-			vput(lvp);
-			*lvpp =3D NULL;
-			*uvpp =3D NULL;
-			return error;
-		}
-	}
-
-	/*
-	 * Ok, we have to do it the hard way..
-	 * Next, get parent vnode using lookup of ..
-	 */
-	cn.cn_nameiop =3D LOOKUP;
-	cn.cn_flags =3D ISLASTCN | ISDOTDOT | RDONLY;
-	cn.cn_thread =3D td;
-	cn.cn_cred =3D td->td_ucred;
-	cn.cn_pnbuf =3D NULL;
-	cn.cn_nameptr =3D "..";
-	cn.cn_namelen =3D 2;
-	cn.cn_consume =3D 0;
-=09
-	/*
-	 * At this point, lvp is locked and will be unlocked by the lookup.
-	 * On successful return, *uvpp will be locked
-	 */
-#ifdef MAC
-	error =3D mac_check_vnode_lookup(td->td_ucred, lvp, &cn);
-	if (error =3D=3D 0)
-#endif
-		error =3D VOP_LOOKUP(lvp, uvpp, &cn);
-	if (error) {
-		vput(lvp);
-		*lvpp =3D NULL;
-		*uvpp =3D NULL;
-		return error;
-	}
-	uvp =3D *uvpp;
-
-	/* If we don't care about the pathname, we're done */
-	if (bufp =3D=3D NULL) {
-		vrele(lvp);
-		*lvpp =3D NULL;
-		return 0;
-	}
-=09
-	fileno =3D va.va_fileid;
-
-	dirbuflen =3D DIRBLKSIZ;
-	if (dirbuflen < va.va_blocksize)
-		dirbuflen =3D va.va_blocksize;
-	dirbuf =3D (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
-
-#if 0
-unionread:
-#endif
-	off =3D 0;
-	do {
-		/* call VOP_READDIR of parent */
-		iov.iov_base =3D dirbuf;
-		iov.iov_len =3D dirbuflen;
-
-		uio.uio_iov =3D &iov;
-		uio.uio_iovcnt =3D 1;
-		uio.uio_offset =3D off;
-		uio.uio_resid =3D dirbuflen;
-		uio.uio_segflg =3D UIO_SYSSPACE;
-		uio.uio_rw =3D UIO_READ;
-		uio.uio_td =3D td;
-
-		eofflag =3D 0;
-
-#ifdef MAC
-		error =3D mac_check_vnode_readdir(td->td_ucred, uvp);
-		if (error =3D=3D 0)
-#endif /* MAC */
-			error =3D VOP_READDIR(uvp, &uio, td->td_ucred, &eofflag,
-			    0, 0);
-
-		off =3D uio.uio_offset;
-
-		/*
-		 * Try again if NFS tosses its cookies.
-		 * XXX this can still loop forever if the directory is busted
-		 * such that the second or subsequent page of it always
-		 * returns EINVAL
-		 */
-		if ((error =3D=3D EINVAL) && (tries < 3)) {
-			off =3D 0;
-			tries++;
-			continue;	/* once more, with feeling */
-		}
-
-		if (!error) {
-			char   *cpos;
-			struct dirent *dp;
-		=09
-			cpos =3D dirbuf;
-			tries =3D 0;
-			=09
-			/* scan directory page looking for matching vnode */=20
-			for (len =3D (dirbuflen - uio.uio_resid); len > 0; len -=3D reclen) {
-				dp =3D (struct dirent *) cpos;
-				reclen =3D dp->d_reclen;
-
-				/* check for malformed directory.. */
-				if (reclen < DIRENT_MINSIZE) {
-					error =3D EINVAL;
-					goto out;
-				}
-				/*
-				 * XXX should perhaps do VOP_LOOKUP to
-				 * check that we got back to the right place,
-				 * but getting the locking games for that
-				 * right would be heinous.
-				 */
-				if ((dp->d_type !=3D DT_WHT) &&
-				    (dp->d_fileno =3D=3D fileno)) {
-					char *bp =3D *bpp;
-					bp -=3D dp->d_namlen;
-				=09
-					if (bp <=3D bufp) {
-						error =3D ERANGE;
-						goto out;
-					}
-					bcopy(dp->d_name, bp, dp->d_namlen);
-					error =3D 0;
-					*bpp =3D bp;
-					goto out;
-				}
-				cpos +=3D reclen;
-			}
-		}
-	} while (!eofflag);
-	error =3D ENOENT;
-	=09
-out:
-	vrele(lvp);
-	*lvpp =3D NULL;
-	free(dirbuf, M_TEMP);
-	return error;
-}
-
-
-/*
- * common routine shared by sys___getcwd() and linux_vn_isunder()
- */
-
-#define GETCWD_CHECK_ACCESS 0x0001
-
-static int
-linux_getcwd_common (lvp, rvp, bpp, bufp, limit, flags, td)
-	struct vnode *lvp;
-	struct vnode *rvp;
-	char **bpp;
-	char *bufp;
-	int limit;
-	int flags;
-	struct thread *td;
-{
-	struct filedesc *fdp =3D td->td_proc->p_fd;
-	struct vnode *uvp =3D NULL;
-	char *bp =3D NULL;
-	int error;
-	int perms =3D VEXEC;
-
-	if (rvp =3D=3D NULL) {
-		rvp =3D fdp->fd_rdir;
-		if (rvp =3D=3D NULL)
-			rvp =3D rootvnode;
-	}
-=09
-	VREF(rvp);
-	VREF(lvp);
-
-	/*
-	 * Error handling invariant:
-	 * Before a `goto out':
-	 *	lvp is either NULL, or locked and held.
-	 *	uvp is either NULL, or locked and held.
-	 */
-
-	error =3D vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td);
-	if (error) {
-		vrele(lvp);
-		lvp =3D NULL;
-		goto out;
-	}
-	if (bufp)
-		bp =3D *bpp;
-	/*
-	 * this loop will terminate when one of the following happens:
-	 *	- we hit the root
-	 *	- getdirentries or lookup fails
-	 *	- we run out of space in the buffer.
-	 */
-	if (lvp =3D=3D rvp) {
-		if (bp)
-			*(--bp) =3D '/';
-		goto out;
-	}
-	do {
-		if (lvp->v_type !=3D VDIR) {
-			error =3D ENOTDIR;
-			goto out;
-		}
-	=09
-		/*
-		 * access check here is optional, depending on
-		 * whether or not caller cares.
-		 */
-		if (flags & GETCWD_CHECK_ACCESS) {
-			error =3D VOP_ACCESS(lvp, perms, td->td_ucred, td);
-			if (error)
-				goto out;
-			perms =3D VEXEC|VREAD;
-		}
-	=09
-		/*
-		 * step up if we're a covered vnode..
-		 */
-		while (lvp->v_vflag & VV_ROOT) {
-			struct vnode *tvp;
-
-			if (lvp =3D=3D rvp)
-				goto out;
-		=09
-			tvp =3D lvp;
-			lvp =3D lvp->v_mount->mnt_vnodecovered;
-			vput(tvp);
-			/*
-			 * hodie natus est radici frater
-			 */
-			if (lvp =3D=3D NULL) {
-				error =3D ENOENT;
-				goto out;
-			}
-			VREF(lvp);
-			error =3D vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td);
-			if (error !=3D 0) {
-				vrele(lvp);
-				lvp =3D NULL;
-				goto out;
-			}
-		}
-		error =3D linux_getcwd_scandir(&lvp, &uvp, &bp, bufp, td);
-		if (error)
-			goto out;
-#if DIAGNOSTIC	=09
-		if (lvp !=3D NULL)
-			panic("getcwd: oops, forgot to null lvp");
-		if (bufp && (bp <=3D bufp)) {
-			panic("getcwd: oops, went back too far");
-		}
-#endif	=09
-		if (bp)=20
-			*(--bp) =3D '/';
-		lvp =3D uvp;
-		uvp =3D NULL;
-		limit--;
-	} while ((lvp !=3D rvp) && (limit > 0));=20
-
-out:
-	if (bpp)
-		*bpp =3D bp;
-	if (uvp)
-		vput(uvp);
-	if (lvp)
-		vput(lvp);
-	vrele(rvp);
-	return error;
-}
-
-
 /*
  * Find pathname of process's current directory.
- *
- * Use vfs vnode-to-name reverse cache; if that fails, fall back
- * to reading directory contents.
+ * Simply use the new, complete getcwd() implementation.
  */
=20
 int
 linux_getcwd(struct thread *td, struct linux_getcwd_args *args)
 {
-	caddr_t bp, bend, path;
-	int error, len, lenused;
-
 #ifdef DEBUG
 	printf("Linux-emul(%ld): getcwd(%p, %ld)\n", (long)td->td_proc->p_pid,
 	       args->buf, (long)args->bufsize);
 #endif
-
-	len =3D args->bufsize;
-
-	if (len > MAXPATHLEN*4)
-		len =3D MAXPATHLEN*4;
-	else if (len < 2)
-		return ERANGE;
-
-	path =3D (char *)malloc(len, M_TEMP, M_WAITOK);
-
-	error =3D kern___getcwd(td, path, UIO_SYSSPACE, len);
-	if (!error) {
-		lenused =3D strlen(path) + 1;
-		if (lenused <=3D args->bufsize) {
-			td->td_retval[0] =3D lenused;
-			error =3D copyout(path, args->buf, lenused);
-		}
-		else
-			error =3D ERANGE;
-	} else {
-		bp =3D &path[len];
-		bend =3D bp;
-		*(--bp) =3D '\0';
-
-		/*
-		 * 5th argument here is "max number of vnodes to traverse".
-		 * Since each entry takes up at least 2 bytes in the output buffer,
-		 * limit it to N/2 vnodes for an N byte buffer.
-		 */
-
-		error =3D linux_getcwd_common (td->td_proc->p_fd->fd_cdir, NULL,
-		    &bp, path, len/2, GETCWD_CHECK_ACCESS, td);
-
-		if (error)
-			goto out;
-		lenused =3D bend - bp;
-		td->td_retval[0] =3D lenused;
-		/* put the result into user buffer */
-		error =3D copyout(bp, args->buf, lenused);
-	}
-out:
-	free(path, M_TEMP);
-	return (error);
+	return (kern___getcwd(td, args->buf, UIO_USERSPACE, args->bufsize));
 }
-

--17pEHd4RhPHOinZp--

--K8nIJk4ghYZn606h
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.4 (FreeBSD)

iD8DBQFAm1VbezjnobFOgrERAg1aAJwIyNFytGIsdVEV/pqDVbepuH4vEACeNrkP
nWe9bieUeUTGqejixBBtPgY=
=ZVqL
-----END PGP SIGNATURE-----

--K8nIJk4ghYZn606h--