Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 27 Sep 2010 23:22:27 -0700
From:      Marcel Moolenaar <xcllnt@mac.com>
To:        "freebsd-arch@FreeBSD.org Arch" <freebsd-arch@freebsd.org>
Subject:   [patch] functional prototype of root mount enhancement
Message-ID:  <CD1BDE8F-29BE-4A82-B0D9-8849FF3C1A1F@mac.com>

next in thread | raw e-mail | index | archive | help

--Boundary_(ID_e2iysUHX7Ge1qa8HV4CNIg)
Content-type: text/plain; charset=us-ascii
Content-transfer-encoding: 7BIT

All,

I prototyped the root mount enhancement previously discussed.
I would appreciate feedback and suggestions and bug reports
of course.

See:
http://docs.freebsd.org/cgi/getmsg.cgi?fetch=5942+0+current/freebsd-arch
http://docs.freebsd.org/cgi/getmsg.cgi?fetch=120899+0+archive/2010/freebsd-arch/20100829.freebsd-arch

The prototype supports all boot options that affect the root
mount. Those are: -a, -C, -r
When present, the initial root mount directives get adjusted
accordingly.

The prototype adds better support for mount options. Both the
interactive, as well has the compiled-in root mount option
(i.e. ROOTDEVNAME) can contain mount options.

Not implemented yet is the .onfail handling, as well as the
.timeout handling (previously called .wait). Also, the .init
directive is not implemented.

There's 1 bug under investigation: when a 2nd (non-devfs)
file system is mounted as root, the 1st (non-devfs) gets
moved under /.mount or /mnt under the new (=2nd) file
system. However, trying to access the file system results in
a WITNESS panic caused by a syscall leaving with the ufs
lock held.

The code has some debug output still, which is helpful to
see what's going on internally. From a boot (with a
/.mount.conf present on ufs:/dev/ad0s1a):

	:
WARNING: WITNESS option enabled, expect reduced performance.
Root mount waiting for: usbus1
Root mount waiting for: usbus1
uhub1: 6 ports with 6 removable, self powered
Root mount waiting for: usbus1
Root mount waiting for: usbus1
ugen1.2: <Apple Inc.> at usbus1
========
.onfail panic
.timeout 1
ufs:/dev/ad0s1a rw
.ask
========
Trying to mount root from ufs:/dev/ad0s1a [rw]...
XXX: vfs_mountroot_parse: error = 0, mpdevfs=0xc3fa3000, mp=0xc3fa2c94
========
.onfail continue
#ufs:/dev/da0a
.ask
========

Loader variables:
  vfs.root.mountfrom=ufs:/dev/ad0s1a
  vfs.root.mountfrom.options=rw

Manual root filesystem specification:
  <fstype>:<device> [options]
      Mount <device> using filesystem <fstype>
      and with the specified (optional) option list.

    eg. ufs:/dev/da0s1a
        cd9660:/dev/acd0 ro
          (which is equivalent to: mount -t cd9660 -o ro /dev/acd0 /

  ?                  List valid disk boot devices
  <empty line>       Abort manual input

mountroot> 
XXX: vfs_mountroot_parse: error = -1, mpdevfs=0xc3fa3000, mp=0
	:

In case the attachment gets eaten:
	http://www.xcllnt.net/~marcel/rootmount.diff

-- 
Marcel Moolenaar
xcllnt@mac.com



--Boundary_(ID_e2iysUHX7Ge1qa8HV4CNIg)
Content-type: application/octet-stream; name=rootmount.diff
Content-transfer-encoding: 7bit
Content-disposition: attachment; filename=rootmount.diff

Index: conf/files
===================================================================
--- conf/files	(revision 41)
+++ conf/files	(revision 49)
@@ -2216,6 +2216,7 @@
 kern/vfs_init.c			standard
 kern/vfs_lookup.c		standard
 kern/vfs_mount.c		standard
+kern/vfs_mountroot.c		standard
 kern/vfs_subr.c			standard
 kern/vfs_syscalls.c		standard
 kern/vfs_vnops.c		standard
Index: kern/vfs_mountroot.c
===================================================================
--- kern/vfs_mountroot.c	(revision 0)
+++ kern/vfs_mountroot.c	(revision 49)
@@ -0,0 +1,985 @@
+/*-
+ * Copyright (c) 1999-2004 Poul-Henning Kamp
+ * Copyright (c) 1999 Michael Smith
+ * Copyright (c) 1989, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_rootdevname.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/malloc.h>
+#include <sys/mdioctl.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/reboot.h>
+#include <sys/stat.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysproto.h>
+#include <sys/sx.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+
+#include <geom/geom.h>
+
+/*
+ * The root filesystem is detailed in the kernel environment variable
+ * vfs.root.mountfrom, which is expected to be in the general format
+ *
+ * <vfsname>:[<path>][	<vfsname>:[<path>] ...]
+ * vfsname   := the name of a VFS known to the kernel and capable
+ *              of being mounted as root
+ * path      := disk device name or other data used by the filesystem
+ *              to locate its physical store
+ *
+ * If the environment variable vfs.root.mountfrom is a space separated list,
+ * each list element is tried in turn and the root filesystem will be mounted
+ * from the first one that suceeds.
+ *
+ * The environment variable vfs.root.mountfrom.options is a comma delimited
+ * set of string mount options.  These mount options must be parseable
+ * by nmount() in the kernel.
+ */
+
+static int parse_mount(char **);
+static struct mntarg *parse_mountroot_options(struct mntarg *, const char *);
+
+/*
+ * The vnode of the system's root (/ in the filesystem, without chroot
+ * active.)
+ */
+struct vnode *rootvnode;
+
+char *rootdevnames[2] = {NULL, NULL};
+
+struct root_hold_token {
+	const char			*who;
+	LIST_ENTRY(root_hold_token)	list;
+};
+
+static LIST_HEAD(, root_hold_token)	root_holds =
+    LIST_HEAD_INITIALIZER(root_holds);
+
+enum action {
+	A_PANIC,
+	A_CONTINUE,
+	A_REBOOT,
+	A_RETRY
+};
+
+static enum action root_mount_action;
+
+static int root_mount_mddev;
+static int root_mount_complete;
+
+/* By default wait up to 1 second for devices to appear. */
+static int root_mount_timeout = 1;
+
+struct root_hold_token *
+root_mount_hold(const char *identifier)
+{
+	struct root_hold_token *h;
+
+	if (root_mounted())
+		return (NULL);
+
+	h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
+	h->who = identifier;
+	mtx_lock(&mountlist_mtx);
+	LIST_INSERT_HEAD(&root_holds, h, list);
+	mtx_unlock(&mountlist_mtx);
+	return (h);
+}
+
+void
+root_mount_rel(struct root_hold_token *h)
+{
+
+	if (h == NULL)
+		return;
+	mtx_lock(&mountlist_mtx);
+	LIST_REMOVE(h, list);
+	wakeup(&root_holds);
+	mtx_unlock(&mountlist_mtx);
+	free(h, M_DEVBUF);
+}
+
+int
+root_mounted(void)
+{
+
+	/* No mutex is acquired here because int stores are atomic. */
+	return (root_mount_complete);
+}
+
+void
+root_mount_wait(void)
+{
+
+	/*
+	 * Panic on an obvious deadlock - the function can't be called from
+	 * a thread which is doing the whole SYSINIT stuff.
+	 */
+	KASSERT(curthread->td_proc->p_pid != 0,
+	    ("root_mount_wait: cannot be called from the swapper thread"));
+	mtx_lock(&mountlist_mtx);
+	while (!root_mount_complete) {
+		msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait",
+		    hz);
+	}
+	mtx_unlock(&mountlist_mtx);
+}
+
+static void
+set_rootvnode(void)
+{
+	struct proc *p;
+
+	if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode))
+		panic("Cannot find root vnode");
+
+	VOP_UNLOCK(rootvnode, 0);
+
+	p = curthread->td_proc;
+	FILEDESC_XLOCK(p->p_fd);
+
+	if (p->p_fd->fd_cdir != NULL)
+		vrele(p->p_fd->fd_cdir);
+	p->p_fd->fd_cdir = rootvnode;
+	VREF(rootvnode);
+
+	if (p->p_fd->fd_rdir != NULL)
+		vrele(p->p_fd->fd_rdir);
+	p->p_fd->fd_rdir = rootvnode;
+	VREF(rootvnode);
+
+	FILEDESC_XUNLOCK(p->p_fd);
+
+	EVENTHANDLER_INVOKE(mountroot);
+}
+
+static int
+vfs_mountroot_devfs(struct thread *td, struct mount **mpp)
+{
+	struct vfsoptlist *opts;
+	struct vfsconf *vfsp;
+	struct mount *mp;
+	int error;
+
+	*mpp = NULL;
+
+	vfsp = vfs_byname("devfs");
+	KASSERT(vfsp != NULL, ("Could not find devfs by name"));
+	if (vfsp == NULL)
+		return (ENOENT);
+
+	mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred);
+
+	error = VFS_MOUNT(mp);
+	KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
+	if (error)
+		return (error);
+
+	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
+	TAILQ_INIT(opts);
+	mp->mnt_opt = opts;
+
+	mtx_lock(&mountlist_mtx);
+	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
+	mtx_unlock(&mountlist_mtx);
+
+	*mpp = mp;
+	set_rootvnode();
+
+	error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
+	if (error)
+		printf("kern_symlink /dev -> / returns %d\n", error);
+
+	return (error);
+}
+
+static int
+vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
+{
+	struct nameidata nd;
+	struct mount *mporoot, *mpnroot;
+	struct vnode *vp, *vporoot, *vpdevfs;
+	char *fspath;
+	int error;
+
+	mpnroot = TAILQ_NEXT(mpdevfs, mnt_list);
+
+	/* Shuffle the mountlist. */
+	mtx_lock(&mountlist_mtx);
+	mporoot = TAILQ_FIRST(&mountlist);
+	TAILQ_REMOVE(&mountlist, mpdevfs, mnt_list);
+	if (mporoot != mpdevfs) {
+		TAILQ_REMOVE(&mountlist, mpnroot, mnt_list);
+		TAILQ_INSERT_HEAD(&mountlist, mpnroot, mnt_list);
+	}
+	TAILQ_INSERT_TAIL(&mountlist, mpdevfs, mnt_list);
+	mtx_unlock(&mountlist_mtx);
+
+	cache_purgevfs(mporoot);
+	if (mporoot != mpdevfs)
+		cache_purgevfs(mpdevfs);
+
+	VFS_ROOT(mporoot, LK_EXCLUSIVE, &vporoot);
+
+	VI_LOCK(vporoot);
+	vporoot->v_iflag &= ~VI_MOUNT;
+	VI_UNLOCK(vporoot);
+	vporoot->v_vflag &= ~VV_ROOT;
+	vporoot->v_mountedhere = NULL;
+	mporoot->mnt_vnodecovered = NULL;
+	vput(vporoot);
+
+	/* Set up the new rootvnode, and purge the cache */
+	mpnroot->mnt_vnodecovered = NULL;
+	set_rootvnode();
+	cache_purgevfs(rootvnode->v_mount);
+
+	if (mporoot != mpdevfs) {
+		/* Remount old root under /.mount or /mnt */
+		fspath = "/.mount";
+		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
+		    fspath, td);
+		error = namei(&nd);
+		if (error) {
+			NDFREE(&nd, NDF_ONLY_PNBUF);
+			fspath = "/mnt";
+			NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
+			    fspath, td);
+			error = namei(&nd);
+		}
+		if (!error) {
+			vp = nd.ni_vp;
+			error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
+			if (!error)
+				error = vinvalbuf(vp, V_SAVE, 0, 0);
+			if (!error) {
+				cache_purge(vp);
+				mporoot->mnt_vnodecovered = vp;
+				vp->v_mountedhere = mporoot;
+				strlcpy(mporoot->mnt_stat.f_mntonname,
+				    fspath, MNAMELEN);
+				VOP_UNLOCK(vp, 0);
+			} else
+				vput(vp);
+		}
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+
+		if (mporoot->mnt_vnodecovered == NULL)
+			printf("mountroot: unable to remount previous root.\n");
+	}
+
+	/* Remount devfs under /dev */
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
+
+	error = namei(&nd);
+	if (!error) {
+		vp = nd.ni_vp;
+		error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
+		if (!error)
+			error = vinvalbuf(vp, V_SAVE, 0, 0);
+		if (!error) {
+			vpdevfs = mpdevfs->mnt_vnodecovered;
+			if (vpdevfs != NULL) {
+				cache_purge(vpdevfs);
+				vpdevfs->v_mountedhere = NULL;
+				vrele(vpdevfs);
+			}
+			mpdevfs->mnt_vnodecovered = vp;
+			vp->v_mountedhere = mpdevfs;
+			VOP_UNLOCK(vp, 0);
+		} else
+			vput(vp);
+	}
+	NDFREE(&nd, NDF_ONLY_PNBUF);
+
+	if (mporoot == mpdevfs) {
+		vfs_unbusy(mpdevfs);
+		/* Unlink the no longer needed /dev/dev -> / symlink */
+		kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
+	}
+
+	return (0);
+}
+
+/*
+ * Configuration parser.
+ */
+
+/* Parser character classes. */
+#define	CC_WHITESPACE		-1
+#define	CC_NONWHITESPACE	-2
+
+/* Parse errors. */
+#define	PE_EOF			-1
+#define	PE_EOL			-2
+
+static __inline int
+parse_peek(char **conf)
+{
+
+	return (**conf);
+}
+
+static __inline void
+parse_poke(char **conf, int c)
+{
+
+	**conf = c;
+}
+
+static __inline void
+parse_advance(char **conf)
+{
+
+	(*conf)++;
+}
+
+static __inline int
+parse_isspace(int c)
+{
+
+	return ((c == ' ' || c == '\t' || c == '\n') ? 1 : 0);
+}
+
+static int
+parse_skipto(char **conf, int mc)
+{
+	int c, match;
+
+	while (1) {
+		c = parse_peek(conf);
+		if (c == 0)
+			return (PE_EOF);
+		switch (mc) {
+		case CC_WHITESPACE:
+			match = (c == ' ' || c == '\t' || c == '\n') ? 1 : 0;
+			break;
+		case CC_NONWHITESPACE:
+			if (c == '\n')
+				return (PE_EOL);
+			match = (c != ' ' && c != '\t') ? 1 : 0;
+			break;
+		default:
+			match = (c == mc) ? 1 : 0;
+			break;
+		}
+		if (match)
+			break;
+		parse_advance(conf);
+	}
+	return (0);
+}
+
+static int
+parse_token(char **conf, char **tok)
+{
+	char *p;
+	size_t len;
+	int error;
+
+	*tok = NULL;
+	error = parse_skipto(conf, CC_NONWHITESPACE);
+	if (error)
+		return (error);
+	p = *conf;
+	error = parse_skipto(conf, CC_WHITESPACE);
+	len = *conf - p;
+	*tok = malloc(len + 1, M_TEMP, M_WAITOK | M_ZERO);
+	bcopy(p, *tok, len);
+	return (0);
+}
+
+static void
+parse_dir_ask_printenv(const char *var)
+{
+	char *val;
+
+	val = getenv(var);
+	if (val != NULL) {
+		printf("  %s=%s\n", var, val);
+		freeenv(val);
+	}
+}
+
+static int
+parse_dir_ask(char **conf)
+{
+	char name[80];
+	char *mnt;
+	int error;
+
+	printf("\nLoader variables:\n");
+	parse_dir_ask_printenv("vfs.root.mountfrom");
+	parse_dir_ask_printenv("vfs.root.mountfrom.options");
+
+	printf("\nManual root filesystem specification:\n");
+	printf("  <fstype>:<device> [options]\n");
+	printf("      Mount <device> using filesystem <fstype>\n");
+	printf("      and with the specified (optional) option list.\n");
+	printf("\n");
+	printf("    eg. ufs:/dev/da0s1a\n");
+	printf("        cd9660:/dev/acd0 ro\n");
+	printf("          (which is equivalent to: ");
+	printf("mount -t cd9660 -o ro /dev/acd0 /\n");
+	printf("\n");
+	printf("  ?                  List valid disk boot devices\n");
+	printf("  <empty line>       Abort manual input\n");
+
+ again:
+	printf("\nmountroot> ");
+	gets(name, sizeof(name), 1);
+	if (name[0] == '\0')
+		return (0);
+	if (name[0] == '?') {
+		printf("\nList of GEOM managed disk devices:\n  ");
+		g_dev_print();
+		goto again;
+	}
+	mnt = name;
+	error = parse_mount(&mnt);
+	if (error == -1) {
+		printf("Invalid specification.\n");
+		goto again;
+	}
+	return (error);
+}
+
+static int
+parse_dir_md(char **conf)
+{
+	struct stat sb;
+	struct thread *td;
+	struct md_ioctl *mdio;
+	char *path, *tok;
+	int error, fd, len;
+
+	td = curthread;
+
+	error = parse_token(conf, &tok);
+	if (error)
+		return (error);
+
+	len = strlen(tok);
+	mdio = malloc(sizeof(*mdio) + len + 1, M_TEMP, M_WAITOK | M_ZERO);
+	path = (void *)(mdio + 1);
+	bcopy(tok, path, len);
+	free(tok, M_TEMP);
+
+	/* Get file status. */
+	error = kern_stat(td, path, UIO_SYSSPACE, &sb);
+	if (error)
+		goto out;
+
+	/* Open /dev/mdctl so that we can attach/detach. */
+	error = kern_open(td, "/dev/" MDCTL_NAME, UIO_SYSSPACE, O_RDWR, 0);
+	if (error)
+		goto out;
+
+	fd = td->td_retval[0];
+	mdio->md_version = MDIOVERSION;
+	mdio->md_type = MD_VNODE;
+
+	if (root_mount_mddev != -1) {
+		mdio->md_unit = root_mount_mddev;
+		DROP_GIANT();
+		error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
+		PICKUP_GIANT();
+		/* Ignore errors. We don't care. */
+		root_mount_mddev = -1;
+	}
+
+	mdio->md_file = (void *)(mdio + 1);
+	mdio->md_options = MD_AUTOUNIT | MD_READONLY;
+	mdio->md_mediasize = sb.st_size;
+	mdio->md_unit = 0;
+	DROP_GIANT();
+	error = kern_ioctl(td, fd, MDIOCATTACH, (void *)mdio);
+	PICKUP_GIANT();
+	if (error)
+		goto out;
+
+	if (mdio->md_unit > 9) {
+		printf("rootmount: too many md units\n");
+		mdio->md_file = NULL;
+		mdio->md_options = 0;
+		mdio->md_mediasize = 0;
+		DROP_GIANT();
+		error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
+		PICKUP_GIANT();
+		/* Ignore errors. We don't care. */
+		error = ERANGE;
+		goto out;
+	}
+
+	root_mount_mddev = mdio->md_unit;
+	printf(MD_NAME "%u attached to %s\n", root_mount_mddev, mdio->md_file);
+
+	error = kern_close(td, fd);
+
+ out:
+	free(mdio, M_TEMP);
+	return (error);
+}
+
+static int
+parse_dir_onfail(char **conf)
+{
+	char *action;
+	int error;
+
+	error = parse_token(conf, &action);
+	if (error)
+		return (error);
+
+	if (!strcmp(action, "continue"))
+		root_mount_action = A_CONTINUE;
+	else if (!strcmp(action, "panic"))
+		root_mount_action = A_PANIC;
+	else if (!strcmp(action, "reboot"))
+		root_mount_action = A_REBOOT;
+	else if (!strcmp(action, "retry"))
+		root_mount_action = A_RETRY;
+	else {
+		printf("rootmount: %s: unknown action\n", action);
+		error = EINVAL;
+	}
+
+	free(action, M_TEMP);
+	return (0);
+}
+
+static int
+parse_dir_timeout(char **conf)
+{
+	char *tok, *endtok;
+	long secs;
+	int error;
+
+	error = parse_token(conf, &tok);
+	if (error)
+		return (error);
+
+	secs = strtol(tok, &endtok, 0);
+	error = (secs < 0 || *endtok != '\0') ? EINVAL : 0;
+	if (!error)
+		root_mount_timeout = secs;
+	free(tok, M_TEMP);
+	return (error);
+}
+
+static int
+parse_directive(char **conf)
+{
+	char *dir;
+	int error;
+
+	error = parse_token(conf, &dir);
+	if (error)
+		return (error);
+
+	if (strcmp(dir, ".ask") == 0)
+		error = parse_dir_ask(conf);
+	else if (strcmp(dir, ".md") == 0)
+		error = parse_dir_md(conf);
+	else if (strcmp(dir, ".onfail") == 0)
+		error = parse_dir_onfail(conf);
+	else if (strcmp(dir, ".timeout") == 0)
+		error = parse_dir_timeout(conf);
+	else {
+		printf("mountroot: invalid directive `%s'\n", dir);
+		/* Ignore the rest of the line. */
+		(void)parse_skipto(conf, '\n');
+		error = EINVAL;
+	}
+	free(dir, M_TEMP);
+	return (error);
+}
+
+static int
+parse_mount(char **conf)
+{
+	char errmsg[255];
+	struct mntarg *ma;
+	char *dev, *fs, *opts, *tok;
+	int error;
+
+	error = parse_token(conf, &tok);
+	if (error)
+		return (error);
+	fs = tok;
+	error = parse_skipto(&tok, ':');
+	if (error) {
+		free(fs, M_TEMP);
+		return (error);
+	}
+	parse_poke(&tok, '\0');
+	parse_advance(&tok);
+	dev = tok;
+
+	if (root_mount_mddev != -1) {
+		/* Handle substitution for the md unit number. */
+		tok = strstr(dev, "md#");
+		if (tok != NULL)
+			tok[2] = '0' + root_mount_mddev;
+	}
+
+	/* Parse options. */
+	error = parse_token(conf, &tok);
+	opts = (error == 0) ? tok : NULL;
+
+	printf("Trying to mount root from %s:%s [%s]...\n", fs, dev,
+	    (opts != NULL) ? opts : "");
+
+	bzero(errmsg, sizeof(errmsg));
+
+	if (vfs_byname(fs) == NULL) {
+		strlcpy(errmsg, "unknown file system", sizeof(errmsg));
+		error = ENOENT;
+		goto out;
+	}
+
+	if (dev[0] != '\0') {
+		/* XXX wait N seconds for the device to appear. */
+	}
+
+	ma = NULL;
+	ma = mount_arg(ma, "fstype", fs, -1);
+	ma = mount_arg(ma, "fspath", "/", -1);
+	ma = mount_arg(ma, "from", dev, -1);
+	ma = mount_arg(ma, "errmsg", errmsg, sizeof(errmsg));
+	ma = mount_arg(ma, "ro", NULL, 0);
+	ma = parse_mountroot_options(ma, opts);
+	error = kernel_mount(ma, MNT_ROOTFS);
+
+ out:
+	if (error) {
+		printf("Mounting from %s:%s failed with error %d",
+		    fs, dev, error);
+		if (errmsg[0] != '\0')
+			printf(": %s", errmsg);
+		printf(".\n");
+	}
+	free(fs, M_TEMP);
+	if (opts != NULL)
+		free(opts, M_TEMP);
+	/* kernel_mount can return -1 on error. */
+	return ((error < 0) ? EDOOFUS : error);
+}
+
+static int
+vfs_mountroot_parse(char **conf, struct mount *mpdevfs)
+{
+	struct mount *mp;
+	int error;
+
+	mp = TAILQ_NEXT(mpdevfs, mnt_list);
+	error = (mp == NULL) ? 0 : EDOOFUS;
+	root_mount_mddev = -1;
+	root_mount_action = A_CONTINUE;
+	while (mp == NULL) {
+		error = parse_skipto(conf, CC_NONWHITESPACE);
+		if (error == PE_EOL) {
+			parse_advance(conf);
+			continue;
+		}
+		if (error < 0)
+			break;
+		switch (parse_peek(conf)) {
+		case '#':
+			error = parse_skipto(conf, '\n');
+			break;
+		case '.':
+			error = parse_directive(conf);
+			break;
+		default:
+			error = parse_mount(conf);
+			break;
+		}
+		if (error < 0)
+			break;
+		/* Ignore any trailing garbage on the line. */
+		if (parse_peek(conf) != '\n') {
+			printf("mountroot: advancing to next directive...\n");
+			(void)parse_skipto(conf, '\n');
+		}
+		mp = TAILQ_NEXT(mpdevfs, mnt_list);
+	}
+
+	printf("XXX: %s: error = %d, mpdevfs=%p, mp=%p\n", __func__,
+	    error, mpdevfs, mp);
+
+	return (error);
+}
+
+static void
+vfs_mountroot_conf0(struct sbuf *sb)
+{
+	char *s, *tok, *mnt, *opt;
+	int error;
+
+	sbuf_printf(sb, ".onfail panic\n");
+	sbuf_printf(sb, ".timeout 1\n");
+	if (boothowto & RB_ASKNAME)
+		sbuf_printf(sb, ".ask\n");
+#ifdef ROOTDEVNAME
+	if (boothowto & RB_DFLTROOT)
+		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
+#endif
+	if (boothowto & RB_CDROM) {
+		sbuf_printf(sb, "cd9660:cd0\n");
+		sbuf_printf(sb, ".timeout 0\n");
+		sbuf_printf(sb, "cd9660:acd0\n");
+		sbuf_printf(sb, ".timeout 1\n");
+	}
+	s = getenv("vfs.root.mountfrom");
+	if (s != NULL) {
+		opt = getenv("vfs.root.mountfrom.options");
+		tok = s;
+		error = parse_token(&tok, &mnt);
+		while (!error) {
+			sbuf_printf(sb, "%s %s\n", mnt,
+			    (opt != NULL) ? opt : "");
+			free(mnt, M_TEMP);
+			error = parse_token(&tok, &mnt);
+		}
+		if (opt != NULL)
+			freeenv(opt);
+		freeenv(s);
+	}
+	if (rootdevnames[0] != NULL)
+		sbuf_printf(sb, "%s\n", rootdevnames[0]);
+	if (rootdevnames[1] != NULL)
+		sbuf_printf(sb, "%s\n", rootdevnames[1]);
+#ifdef ROOTDEVNAME
+	if (!(boothowto & RB_DFLTROOT))
+		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
+#endif
+	if (!(boothowto & RB_ASKNAME))
+		sbuf_printf(sb, ".ask\n");
+}
+
+static int
+vfs_mountroot_readconf(struct thread *td, struct sbuf *sb)
+{
+	static char buf[128];
+	struct nameidata nd;
+	off_t ofs;
+	int error, flags;
+	int len, resid;
+	int vfslocked;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE,
+	    "/.mount.conf", td);
+	flags = FREAD;
+	error = vn_open(&nd, &flags, 0, NULL);
+	if (error)
+		return (error);
+
+	vfslocked = NDHASGIANT(&nd);
+	NDFREE(&nd, NDF_ONLY_PNBUF);
+	ofs = 0;
+	len = sizeof(buf) - 1;
+	while (1) {
+		error = vn_rdwr(UIO_READ, nd.ni_vp, buf, len, ofs,
+		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
+		    NOCRED, &resid, td);
+		if (error)
+			break;
+		if (resid == len)
+			break;
+		buf[len - resid] = 0;
+		sbuf_printf(sb, "%s", buf);
+		ofs += len - resid;
+	}
+
+	VOP_UNLOCK(nd.ni_vp, 0);
+	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
+	VFS_UNLOCK_GIANT(vfslocked);
+	return (error);
+}
+
+static void
+vfs_mountroot_wait(void)
+{
+	struct root_hold_token *h;
+	struct timeval lastfail;
+	int curfail;
+
+	curfail = 0;
+	while (1) {
+		DROP_GIANT();
+		g_waitidle();
+		PICKUP_GIANT();
+		mtx_lock(&mountlist_mtx);
+		if (LIST_EMPTY(&root_holds)) {
+			mtx_unlock(&mountlist_mtx);
+			break;
+		}
+		if (ppsratecheck(&lastfail, &curfail, 1)) {
+			printf("Root mount waiting for:");
+			LIST_FOREACH(h, &root_holds, list)
+				printf(" %s", h->who);
+			printf("\n");
+		}
+		msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
+		    hz);
+	}
+}
+
+void
+vfs_mountroot(void)
+{
+	struct mount *mp;
+	struct sbuf *sb;
+	struct thread *td;
+	char *conf;
+	time_t timebase;
+	int error;
+
+	td = curthread;
+
+	vfs_mountroot_wait();
+
+	sb = sbuf_new_auto();
+	vfs_mountroot_conf0(sb);
+	sbuf_finish(sb);
+
+	error = vfs_mountroot_devfs(td, &mp);
+	while (!error) {
+		conf = sbuf_data(sb);
+		printf("========\n%s========\n", conf);
+		error = vfs_mountroot_parse(&conf, mp);
+		if (!error) {
+			error = vfs_mountroot_shuffle(td, mp);
+			if (!error) {
+				sbuf_clear(sb);
+				error = vfs_mountroot_readconf(td, sb);
+				sbuf_finish(sb);
+			}
+		}
+	}
+
+	sbuf_delete(sb);
+
+	/*
+	 * Iterate over all currently mounted file systems and use
+	 * the time stamp found to check and/or initialize the RTC.
+	 * Call inittodr() only once and pass it the largest of the
+	 * timestamps we encounter.
+	 */
+	timebase = 0;
+	mtx_lock(&mountlist_mtx);
+	mp = TAILQ_FIRST(&mountlist);
+	while (mp != NULL) {
+		if (mp->mnt_time > timebase)
+			timebase = mp->mnt_time;
+		mp = TAILQ_NEXT(mp, mnt_list);
+	}
+	mtx_unlock(&mountlist_mtx);
+	inittodr(timebase);
+
+	/* Keep prison0's root in sync with the global rootvnode. */
+	mtx_lock(&prison0.pr_mtx);
+	prison0.pr_root = rootvnode;
+	vref(prison0.pr_root);
+	mtx_unlock(&prison0.pr_mtx);
+
+	mtx_lock(&mountlist_mtx);
+	atomic_store_rel_int(&root_mount_complete, 1);
+	wakeup(&root_mount_complete);
+	mtx_unlock(&mountlist_mtx);
+}
+
+static struct mntarg *
+parse_mountroot_options(struct mntarg *ma, const char *options)
+{
+	char *p;
+	char *name, *name_arg;
+	char *val, *val_arg;
+	char *opts;
+
+	if (options == NULL || options[0] == '\0')
+		return (ma);
+
+	p = opts = strdup(options, M_MOUNT);
+	if (opts == NULL) {
+		return (ma);
+	}
+
+	while((name = strsep(&p, ",")) != NULL) {
+		if (name[0] == '\0')
+			break;
+
+		val = strchr(name, '=');
+		if (val != NULL) {
+			*val = '\0';
+			++val;
+		}
+		if( strcmp(name, "rw") == 0 ||
+		    strcmp(name, "noro") == 0) {
+			/*
+			 * The first time we mount the root file system,
+			 * we need to mount 'ro', so We need to ignore
+			 * 'rw' and 'noro' mount options.
+			 */
+			continue;
+		}
+		name_arg = strdup(name, M_MOUNT);
+		val_arg = NULL;
+		if (val != NULL)
+			val_arg = strdup(val, M_MOUNT);
+
+		ma = mount_arg(ma, name_arg, val_arg,
+		    (val_arg != NULL ? -1 : 0));
+	}
+	free(opts, M_MOUNT);
+	return (ma);
+}
Index: kern/vfs_mount.c
===================================================================
--- kern/vfs_mount.c	(revision 41)
+++ kern/vfs_mount.c	(revision 49)
@@ -67,16 +67,10 @@
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
-#include "opt_rootdevname.h"
-
-#define	ROOTNAME		"root_device"
 #define	VFS_MOUNTARG_SIZE_MAX	(1024 * 64)
 
-static void	set_rootvnode(void);
 static int	vfs_domount(struct thread *td, const char *fstype,
 		    char *fspath, int fsflags, void *fsdata);
-static int	vfs_mountroot_ask(void);
-static int	vfs_mountroot_try(const char *mountfrom, const char *options);
 static void	free_mntarg(struct mntarg *ma);
 
 static int	usermount = 0;
@@ -95,31 +89,6 @@
 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
 
 /*
- * The vnode of the system's root (/ in the filesystem, without chroot
- * active.)
- */
-struct vnode	*rootvnode;
-
-/*
- * The root filesystem is detailed in the kernel environment variable
- * vfs.root.mountfrom, which is expected to be in the general format
- *
- * <vfsname>:[<path>][	<vfsname>:[<path>] ...]
- * vfsname   := the name of a VFS known to the kernel and capable
- *              of being mounted as root
- * path      := disk device name or other data used by the filesystem
- *              to locate its physical store
- *
- * If the environment variable vfs.root.mountfrom is a space separated list,
- * each list element is tried in turn and the root filesystem will be mounted
- * from the first one that suceeds.
- *
- * The environment variable vfs.root.mountfrom.options is a comma delimited
- * set of string mount options.  These mount options must be parseable
- * by nmount() in the kernel.
- */
-
-/*
  * Global opts, taken by all filesystems
  */
 static const char *global_opts[] = {
@@ -133,22 +102,36 @@
 	NULL
 };
 
-/*
- * The root specifiers we will try if RB_CDROM is specified.
- */
-static char *cdrom_rootdevnames[] = {
-	"cd9660:cd0",
-	"cd9660:acd0",
-	NULL
-};
+static int
+mount_init(void *mem, int size, int flags)
+{
+	struct mount *mp;
 
-/* legacy find-root code */
-char		*rootdevnames[2] = {NULL, NULL};
-#ifndef ROOTDEVNAME
-#  define ROOTDEVNAME NULL
-#endif
-static const char	*ctrootdevname = ROOTDEVNAME;
+	mp = (struct mount *)mem;
+	mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
+	lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
+	return (0);
+}
 
+static void
+mount_fini(void *mem, int size)
+{
+	struct mount *mp;
+
+	mp = (struct mount *)mem;
+	lockdestroy(&mp->mnt_explock);
+	mtx_destroy(&mp->mnt_mtx);
+}
+
+static void
+vfs_mount_init(void *dummy __unused)
+{
+
+	mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount), NULL,
+	    NULL, mount_init, mount_fini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+}
+SYSINIT(vfs_mount, SI_SUB_VFS, SI_ORDER_ANY, vfs_mount_init, NULL);
+
 /*
  * ---------------------------------------------------------------------
  * Functions for building and sanitizing the mount options
@@ -452,27 +435,6 @@
 	MNT_IUNLOCK(mp);
 }
 
-static int
-mount_init(void *mem, int size, int flags)
-{
-	struct mount *mp;
-
-	mp = (struct mount *)mem;
-	mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
-	lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
-	return (0);
-}
-
-static void
-mount_fini(void *mem, int size)
-{
-	struct mount *mp;
-
-	mp = (struct mount *)mem;
-	lockdestroy(&mp->mnt_explock);
-	mtx_destroy(&mp->mnt_mtx);
-}
-
 /*
  * Allocate and initialize the mount point struct.
  */
@@ -1343,269 +1305,6 @@
 }
 
 /*
- * ---------------------------------------------------------------------
- * Mounting of root filesystem
- *
- */
-
-struct root_hold_token {
-	const char			*who;
-	LIST_ENTRY(root_hold_token)	list;
-};
-
-static LIST_HEAD(, root_hold_token)	root_holds =
-    LIST_HEAD_INITIALIZER(root_holds);
-
-static int root_mount_complete;
-
-/*
- * Hold root mount.
- */
-struct root_hold_token *
-root_mount_hold(const char *identifier)
-{
-	struct root_hold_token *h;
-
-	if (root_mounted())
-		return (NULL);
-
-	h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
-	h->who = identifier;
-	mtx_lock(&mountlist_mtx);
-	LIST_INSERT_HEAD(&root_holds, h, list);
-	mtx_unlock(&mountlist_mtx);
-	return (h);
-}
-
-/*
- * Release root mount.
- */
-void
-root_mount_rel(struct root_hold_token *h)
-{
-
-	if (h == NULL)
-		return;
-	mtx_lock(&mountlist_mtx);
-	LIST_REMOVE(h, list);
-	wakeup(&root_holds);
-	mtx_unlock(&mountlist_mtx);
-	free(h, M_DEVBUF);
-}
-
-/*
- * Wait for all subsystems to release root mount.
- */
-static void
-root_mount_prepare(void)
-{
-	struct root_hold_token *h;
-	struct timeval lastfail;
-	int curfail = 0;
-
-	for (;;) {
-		DROP_GIANT();
-		g_waitidle();
-		PICKUP_GIANT();
-		mtx_lock(&mountlist_mtx);
-		if (LIST_EMPTY(&root_holds)) {
-			mtx_unlock(&mountlist_mtx);
-			break;
-		}
-		if (ppsratecheck(&lastfail, &curfail, 1)) {
-			printf("Root mount waiting for:");
-			LIST_FOREACH(h, &root_holds, list)
-				printf(" %s", h->who);
-			printf("\n");
-		}
-		msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
-		    hz);
-	}
-}
-
-/*
- * Root was mounted, share the good news.
- */
-static void
-root_mount_done(void)
-{
-
-	/* Keep prison0's root in sync with the global rootvnode. */
-	mtx_lock(&prison0.pr_mtx);
-	prison0.pr_root = rootvnode;
-	vref(prison0.pr_root);
-	mtx_unlock(&prison0.pr_mtx);
-	/*
-	 * Use a mutex to prevent the wakeup being missed and waiting for
-	 * an extra 1 second sleep.
-	 */
-	mtx_lock(&mountlist_mtx);
-	root_mount_complete = 1;
-	wakeup(&root_mount_complete);
-	mtx_unlock(&mountlist_mtx);
-}
-
-/*
- * Return true if root is already mounted.
- */
-int
-root_mounted(void)
-{
-
-	/* No mutex is acquired here because int stores are atomic. */
-	return (root_mount_complete);
-}
-
-/*
- * Wait until root is mounted.
- */
-void
-root_mount_wait(void)
-{
-
-	/*
-	 * Panic on an obvious deadlock - the function can't be called from
-	 * a thread which is doing the whole SYSINIT stuff.
-	 */
-	KASSERT(curthread->td_proc->p_pid != 0,
-	    ("root_mount_wait: cannot be called from the swapper thread"));
-	mtx_lock(&mountlist_mtx);
-	while (!root_mount_complete) {
-		msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait",
-		    hz);
-	}
-	mtx_unlock(&mountlist_mtx);
-}
-
-static void
-set_rootvnode()
-{
-	struct proc *p;
-
-	if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode))
-		panic("Cannot find root vnode");
-
-	VOP_UNLOCK(rootvnode, 0);
-
-	p = curthread->td_proc;
-	FILEDESC_XLOCK(p->p_fd);
-
-	if (p->p_fd->fd_cdir != NULL)
-		vrele(p->p_fd->fd_cdir);
-	p->p_fd->fd_cdir = rootvnode;
-	VREF(rootvnode);
-
-	if (p->p_fd->fd_rdir != NULL)
-		vrele(p->p_fd->fd_rdir);
-	p->p_fd->fd_rdir = rootvnode;
-	VREF(rootvnode);
-
-	FILEDESC_XUNLOCK(p->p_fd);
-
-	EVENTHANDLER_INVOKE(mountroot);
-}
-
-/*
- * Mount /devfs as our root filesystem, but do not put it on the mountlist
- * yet.  Create a /dev -> / symlink so that absolute pathnames will lookup.
- */
-
-static void
-devfs_first(void)
-{
-	struct thread *td = curthread;
-	struct vfsoptlist *opts;
-	struct vfsconf *vfsp;
-	struct mount *mp = NULL;
-	int error;
-
-	vfsp = vfs_byname("devfs");
-	KASSERT(vfsp != NULL, ("Could not find devfs by name"));
-	if (vfsp == NULL)
-		return;
-
-	mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred);
-
-	error = VFS_MOUNT(mp);
-	KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
-	if (error)
-		return;
-
-	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
-	TAILQ_INIT(opts);
-	mp->mnt_opt = opts;
-
-	mtx_lock(&mountlist_mtx);
-	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
-	mtx_unlock(&mountlist_mtx);
-
-	set_rootvnode();
-
-	error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
-	if (error)
-		printf("kern_symlink /dev -> / returns %d\n", error);
-}
-
-/*
- * Surgically move our devfs to be mounted on /dev.
- */
-
-static void
-devfs_fixup(struct thread *td)
-{
-	struct nameidata nd;
-	int error;
-	struct vnode *vp, *dvp;
-	struct mount *mp;
-
-	/* Remove our devfs mount from the mountlist and purge the cache */
-	mtx_lock(&mountlist_mtx);
-	mp = TAILQ_FIRST(&mountlist);
-	TAILQ_REMOVE(&mountlist, mp, mnt_list);
-	mtx_unlock(&mountlist_mtx);
-	cache_purgevfs(mp);
-
-	VFS_ROOT(mp, LK_EXCLUSIVE, &dvp);
-	VI_LOCK(dvp);
-	dvp->v_iflag &= ~VI_MOUNT;
-	VI_UNLOCK(dvp);
-	dvp->v_mountedhere = NULL;
-
-	/* Set up the real rootvnode, and purge the cache */
-	TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL;
-	set_rootvnode();
-	cache_purgevfs(rootvnode->v_mount);
-
-	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
-	error = namei(&nd);
-	if (error) {
-		printf("Lookup of /dev for devfs, error: %d\n", error);
-		return;
-	}
-	NDFREE(&nd, NDF_ONLY_PNBUF);
-	vp = nd.ni_vp;
-	if (vp->v_type != VDIR) {
-		vput(vp);
-	}
-	error = vinvalbuf(vp, V_SAVE, 0, 0);
-	if (error) {
-		vput(vp);
-	}
-	cache_purge(vp);
-	mp->mnt_vnodecovered = vp;
-	vp->v_mountedhere = mp;
-	mtx_lock(&mountlist_mtx);
-	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
-	mtx_unlock(&mountlist_mtx);
-	VOP_UNLOCK(vp, 0);
-	vput(dvp);
-	vfs_unbusy(mp);
-
-	/* Unlink the no longer needed /dev/dev -> / symlink */
-	kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
-}
-
-/*
  * Report errors during filesystem mounting.
  */
 void
@@ -1642,288 +1341,7 @@
 }
 
 /*
- * Find and mount the root filesystem
- */
-void
-vfs_mountroot(void)
-{
-	char *cp, *cpt, *options, *tmpdev;
-	int error, i, asked = 0;
-
-	options = NULL;
-
-	root_mount_prepare();
-
-	mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount),
-	    NULL, NULL, mount_init, mount_fini,
-	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
-	devfs_first();
-
-	/*
-	 * We are booted with instructions to prompt for the root filesystem.
-	 */
-	if (boothowto & RB_ASKNAME) {
-		if (!vfs_mountroot_ask())
-			goto mounted;
-		asked = 1;
-	}
-
-	options = getenv("vfs.root.mountfrom.options");
-
-	/*
-	 * The root filesystem information is compiled in, and we are
-	 * booted with instructions to use it.
-	 */
-	if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
-		if (!vfs_mountroot_try(ctrootdevname, options))
-			goto mounted;
-		ctrootdevname = NULL;
-	}
-
-	/*
-	 * We've been given the generic "use CDROM as root" flag.  This is
-	 * necessary because one media may be used in many different
-	 * devices, so we need to search for them.
-	 */
-	if (boothowto & RB_CDROM) {
-		for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
-			if (!vfs_mountroot_try(cdrom_rootdevnames[i], options))
-				goto mounted;
-		}
-	}
-
-	/*
-	 * Try to use the value read by the loader from /etc/fstab, or
-	 * supplied via some other means.  This is the preferred
-	 * mechanism.
-	 */
-	cp = getenv("vfs.root.mountfrom");
-	if (cp != NULL) {
-		cpt = cp;
-		while ((tmpdev = strsep(&cpt, " \t")) != NULL) {
-			error = vfs_mountroot_try(tmpdev, options);
-			if (error == 0) {
-				freeenv(cp);
-				goto mounted;
-			}
-		}
-		freeenv(cp);
-	}
-
-	/*
-	 * Try values that may have been computed by code during boot
-	 */
-	if (!vfs_mountroot_try(rootdevnames[0], options))
-		goto mounted;
-	if (!vfs_mountroot_try(rootdevnames[1], options))
-		goto mounted;
-
-	/*
-	 * If we (still) have a compiled-in default, try it.
-	 */
-	if (ctrootdevname != NULL)
-		if (!vfs_mountroot_try(ctrootdevname, options))
-			goto mounted;
-	/*
-	 * Everything so far has failed, prompt on the console if we haven't
-	 * already tried that.
-	 */
-	if (!asked)
-		if (!vfs_mountroot_ask())
-			goto mounted;
-
-	panic("Root mount failed, startup aborted.");
-
-mounted:
-	root_mount_done();
-	freeenv(options);
-}
-
-static struct mntarg *
-parse_mountroot_options(struct mntarg *ma, const char *options)
-{
-	char *p;
-	char *name, *name_arg;
-	char *val, *val_arg;
-	char *opts;
-
-	if (options == NULL || options[0] == '\0')
-		return (ma);
-
-	p = opts = strdup(options, M_MOUNT);
-	if (opts == NULL) {
-		return (ma);
-	} 
-
-	while((name = strsep(&p, ",")) != NULL) {
-		if (name[0] == '\0')
-			break;
-
-		val = strchr(name, '=');
-		if (val != NULL) {
-			*val = '\0';
-			++val;
-		}
-		if( strcmp(name, "rw") == 0 ||
-		    strcmp(name, "noro") == 0) {
-			/*
-			 * The first time we mount the root file system,
-			 * we need to mount 'ro', so We need to ignore
-			 * 'rw' and 'noro' mount options.
-			 */
-			continue;
-		}
-		name_arg = strdup(name, M_MOUNT);
-		val_arg = NULL;
-		if (val != NULL) 
-			val_arg = strdup(val, M_MOUNT);
-
-		ma = mount_arg(ma, name_arg, val_arg,
-		    (val_arg != NULL ? -1 : 0));
-	}
-	free(opts, M_MOUNT);
-	return (ma);
-}
-
-/*
- * Mount (mountfrom) as the root filesystem.
- */
-static int
-vfs_mountroot_try(const char *mountfrom, const char *options)
-{
-	struct mount	*mp;
-	struct mntarg	*ma;
-	char		*vfsname, *path;
-	time_t		timebase;
-	int		error;
-	char		patt[32];
-	char		errmsg[255];
-
-	vfsname = NULL;
-	path    = NULL;
-	mp      = NULL;
-	ma	= NULL;
-	error   = EINVAL;
-	bzero(errmsg, sizeof(errmsg));
-
-	if (mountfrom == NULL)
-		return (error);		/* don't complain */
-	printf("Trying to mount root from %s\n", mountfrom);
-
-	/* parse vfs name and path */
-	vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
-	path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
-	vfsname[0] = path[0] = 0;
-	sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
-	if (sscanf(mountfrom, patt, vfsname, path) < 1)
-		goto out;
-
-	if (path[0] == '\0')
-		strcpy(path, ROOTNAME);
-
-	ma = mount_arg(ma, "fstype", vfsname, -1);
-	ma = mount_arg(ma, "fspath", "/", -1);
-	ma = mount_arg(ma, "from", path, -1);
-	ma = mount_arg(ma, "errmsg", errmsg, sizeof(errmsg));
-	ma = mount_arg(ma, "ro", NULL, 0);
-	ma = parse_mountroot_options(ma, options);
-	error = kernel_mount(ma, MNT_ROOTFS);
-
-	if (error == 0) {
-		/*
-		 * We mount devfs prior to mounting the / FS, so the first
-		 * entry will typically be devfs.
-		 */
-		mp = TAILQ_FIRST(&mountlist);
-		KASSERT(mp != NULL, ("%s: mountlist is empty", __func__));
-
-		/*
-		 * Iterate over all currently mounted file systems and use
-		 * the time stamp found to check and/or initialize the RTC.
-		 * Typically devfs has no time stamp and the only other FS
-		 * is the actual / FS.
-		 * Call inittodr() only once and pass it the largest of the
-		 * timestamps we encounter.
-		 */
-		timebase = 0;
-		do {
-			if (mp->mnt_time > timebase)
-				timebase = mp->mnt_time;
-			mp = TAILQ_NEXT(mp, mnt_list);
-		} while (mp != NULL);
-		inittodr(timebase);
-
-		devfs_fixup(curthread);
-	}
-
-	if (error != 0 ) {
-		printf("ROOT MOUNT ERROR: %s\n", errmsg);
-		printf("If you have invalid mount options, reboot, and ");
-		printf("first try the following from\n");
-		printf("the loader prompt:\n\n");
-		printf("     set vfs.root.mountfrom.options=rw\n\n");
-		printf("and then remove invalid mount options from ");
-		printf("/etc/fstab.\n\n");
-	}
-out:
-	free(path, M_MOUNT);
-	free(vfsname, M_MOUNT);
-	return (error);
-}
-
-/*
  * ---------------------------------------------------------------------
- * Interactive root filesystem selection code.
- */
-
-static int
-vfs_mountroot_ask(void)
-{
-	char name[128];
-	char *mountfrom;
-	char *options;
-
-	for(;;) {
-		printf("Loader variables:\n");
-		printf("vfs.root.mountfrom=");
-		mountfrom = getenv("vfs.root.mountfrom");
-		if (mountfrom != NULL) {
-			printf("%s", mountfrom);
-		}
-		printf("\n");
-		printf("vfs.root.mountfrom.options=");
-		options = getenv("vfs.root.mountfrom.options");
-		if (options != NULL) {
-			printf("%s", options);
-		}
-		printf("\n");
-		freeenv(mountfrom);
-		freeenv(options);
-		printf("\nManual root filesystem specification:\n");
-		printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
-		printf("                       eg. ufs:/dev/da0s1a\n");
-		printf("                       eg. cd9660:/dev/acd0\n");
-		printf("                       This is equivalent to: ");
-		printf("mount -t cd9660 /dev/acd0 /\n"); 
-		printf("\n");
-		printf("  ?                  List valid disk boot devices\n");
-		printf("  <empty line>       Abort manual input\n");
-		printf("\nmountroot> ");
-		gets(name, sizeof(name), 1);
-		if (name[0] == '\0')
-			return (1);
-		if (name[0] == '?') {
-			printf("\nList of GEOM managed disk devices:\n  ");
-			g_dev_print();
-			continue;
-		}
-		if (!vfs_mountroot_try(name, NULL))
-			return (0);
-	}
-}
-
-/*
- * ---------------------------------------------------------------------
  * Functions for querying mount options/arguments from filesystems.
  */
 
@@ -1965,15 +1383,17 @@
 			continue;
 		snprintf(errmsg, sizeof(errmsg),
 		    "mount option <%s> is unknown", p);
-		printf("%s\n", errmsg);
 		ret = EINVAL;
 	}
 	if (ret != 0) {
 		TAILQ_FOREACH(opt, opts, link) {
 			if (strcmp(opt->name, "errmsg") == 0) {
 				strncpy((char *)opt->value, errmsg, opt->len);
+				break;
 			}
 		}
+		if (opt == NULL)
+			printf("%s\n", errmsg);
 	}
 	return (ret);
 }
Index: dev/md/md.c
===================================================================
--- dev/md/md.c	(revision 41)
+++ dev/md/md.c	(revision 49)
@@ -911,18 +911,26 @@
 {
 	struct vattr vattr;
 	struct nameidata nd;
+	char *fname;
 	int error, flags, vfslocked;
 
-	error = copyinstr(mdio->md_file, sc->file, sizeof(sc->file), NULL);
-	if (error != 0)
-		return (error);
-	flags = FREAD|FWRITE;
 	/*
-	 * If the user specified that this is a read only device, unset the
-	 * FWRITE mask before trying to open the backing store.
+	 * Kernel-originated requests must have the filename appended
+	 * to the mdio structure to protect against malicious software.
 	 */
-	if ((mdio->md_options & MD_READONLY) != 0)
-		flags &= ~FWRITE;
+	fname = mdio->md_file;
+	if ((void *)fname != (void *)(mdio + 1)) {
+		error = copyinstr(fname, sc->file, sizeof(sc->file), NULL);
+		if (error != 0)
+			return (error);
+	} else
+		strlcpy(sc->file, fname, sizeof(sc->file));
+
+	/*
+	 * If the user specified that this is a read only device, don't
+	 * set the FWRITE mask before trying to open the backing store.
+	 */
+	flags = FREAD | ((mdio->md_options & MD_READONLY) ? 0 : FWRITE);
 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, sc->file, td);
 	error = vn_open(&nd, &flags, 0, NULL);
 	if (error != 0)

--Boundary_(ID_e2iysUHX7Ge1qa8HV4CNIg)--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CD1BDE8F-29BE-4A82-B0D9-8849FF3C1A1F>