Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 8 Nov 2015 17:33:48 +0000 (UTC)
From:      Edward Tomasz Napierala <trasz@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r290548 - in head/sbin: init reboot
Message-ID:  <201511081733.tA8HXm26041362@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: trasz
Date: Sun Nov  8 17:33:48 2015
New Revision: 290548
URL: https://svnweb.freebsd.org/changeset/base/290548

Log:
  Userspace part of reroot support. This makes it possible to change
  the root filesystem without full reboot, using "reboot -r". This can
  be used to to eg. boot from a temporary md_image preloaded by loader(8),
  setup an iSCSI session, and continue booting from rootfs mounted over
  iSCSI.
  
  Reviewed by:	kib@, bapt@
  MFC after:	1 month
  Relnotes:	yes
  Sponsored by:	The FreeBSD Foundation
  Differential Revision:	https://reviews.freebsd.org/D3693

Modified:
  head/sbin/init/Makefile
  head/sbin/init/init.c
  head/sbin/init/pathnames.h
  head/sbin/reboot/reboot.8
  head/sbin/reboot/reboot.c

Modified: head/sbin/init/Makefile
==============================================================================
--- head/sbin/init/Makefile	Sun Nov  8 14:26:50 2015	(r290547)
+++ head/sbin/init/Makefile	Sun Nov  8 17:33:48 2015	(r290548)
@@ -2,12 +2,18 @@
 # $FreeBSD$
 
 PROG=	init
+SRCS=	init.c getmntopts.c
 MAN=	init.8
 PRECIOUSPROG=
 INSTALLFLAGS=-b -B.bak
 CFLAGS+=-DDEBUGSHELL -DSECURE -DLOGIN_CAP -DCOMPAT_SYSV_INIT
 LIBADD=	util crypt
 
+# Needed for getmntopts.c
+MOUNT=	${.CURDIR}/../../sbin/mount
+CFLAGS+=-I${MOUNT}
+.PATH:	${MOUNT}
+
 NO_SHARED?=	YES
 
 .include <bsd.prog.mk>

Modified: head/sbin/init/init.c
==============================================================================
--- head/sbin/init/init.c	Sun Nov  8 14:26:50 2015	(r290547)
+++ head/sbin/init/init.c	Sun Nov  8 17:33:48 2015	(r290548)
@@ -46,6 +46,7 @@ static const char rcsid[] =
 
 #include <sys/param.h>
 #include <sys/ioctl.h>
+#include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/sysctl.h>
 #include <sys/wait.h>
@@ -79,6 +80,7 @@ static const char rcsid[] =
 #include <login_cap.h>
 #endif
 
+#include "mntopts.h"
 #include "pathnames.h"
 
 /*
@@ -103,6 +105,7 @@ static void warning(const char *, ...) _
 static void emergency(const char *, ...) __printflike(1, 2);
 static void disaster(int);
 static void badsys(int);
+static void revoke_ttys(void);
 static int  runshutdown(void);
 static char *strk(char *);
 
@@ -122,6 +125,8 @@ static state_func_t clean_ttys(void);
 static state_func_t catatonia(void);
 static state_func_t death(void);
 static state_func_t death_single(void);
+static state_func_t reroot(void);
+static state_func_t reroot_phase_two(void);
 
 static state_func_t run_script(const char *);
 
@@ -193,7 +198,7 @@ main(int argc, char *argv[])
 {
 	state_t initial_transition = runcom;
 	char kenv_value[PATH_MAX];
-	int c;
+	int c, error;
 	struct sigaction sa;
 	sigset_t mask;
 
@@ -226,6 +231,9 @@ main(int argc, char *argv[])
 				case 'q': /* rescan /etc/ttys */
 					sig = SIGHUP;
 					break;
+				case 'r': /* remount root */
+					sig = SIGEMT;
+					break;
 				default:
 					goto invalid;
 				}
@@ -247,7 +255,7 @@ invalid:
 	/*
 	 * Create an initial session.
 	 */
-	if (setsid() < 0)
+	if (setsid() < 0 && (errno != EPERM || getsid(0) != 1))
 		warning("initial setsid() failed: %m");
 
 	/*
@@ -261,7 +269,7 @@ invalid:
 	 * This code assumes that we always get arguments through flags,
 	 * never through bits set in some random machine register.
 	 */
-	while ((c = getopt(argc, argv, "dsf")) != -1)
+	while ((c = getopt(argc, argv, "dsfr")) != -1)
 		switch (c) {
 		case 'd':
 			devfs = 1;
@@ -272,6 +280,9 @@ invalid:
 		case 'f':
 			runcom_mode = FASTBOOT;
 			break;
+		case 'r':
+			initial_transition = reroot_phase_two;
+			break;
 		default:
 			warning("unrecognized flag '-%c'", c);
 			break;
@@ -287,13 +298,13 @@ invalid:
 	handle(badsys, SIGSYS, 0);
 	handle(disaster, SIGABRT, SIGFPE, SIGILL, SIGSEGV, SIGBUS, SIGXCPU,
 	    SIGXFSZ, 0);
-	handle(transition_handler, SIGHUP, SIGINT, SIGTERM, SIGTSTP, SIGUSR1,
-	    SIGUSR2, 0);
+	handle(transition_handler, SIGHUP, SIGINT, SIGEMT, SIGTERM, SIGTSTP,
+	    SIGUSR1, SIGUSR2, 0);
 	handle(alrm_handler, SIGALRM, 0);
 	sigfillset(&mask);
 	delset(&mask, SIGABRT, SIGFPE, SIGILL, SIGSEGV, SIGBUS, SIGSYS,
-	    SIGXCPU, SIGXFSZ, SIGHUP, SIGINT, SIGTERM, SIGTSTP, SIGALRM,
-	    SIGUSR1, SIGUSR2, 0);
+	    SIGXCPU, SIGXFSZ, SIGHUP, SIGINT, SIGEMT, SIGTERM, SIGTSTP,
+	    SIGALRM, SIGUSR1, SIGUSR2, 0);
 	sigprocmask(SIG_SETMASK, &mask, (sigset_t *) 0);
 	sigemptyset(&sa.sa_mask);
 	sa.sa_flags = 0;
@@ -373,6 +384,16 @@ invalid:
 			free(s);
 	}
 
+	if (initial_transition != reroot_phase_two) {
+		/*
+		 * Unmount reroot leftovers.  This runs after init(8)
+		 * gets reexecuted after reroot_phase_two() is done.
+		 */
+		error = unmount(_PATH_REROOT, MNT_FORCE);
+		if (error != 0 && errno != EINVAL)
+			warning("Cannot unmount %s: %m", _PATH_REROOT);
+	}
+
 	/*
 	 * Start the state machine.
 	 */
@@ -620,6 +641,228 @@ write_stderr(const char *message)
 	write(STDERR_FILENO, message, strlen(message));
 }
 
+static int
+read_file(const char *path, void **bufp, size_t *bufsizep)
+{
+	struct stat sb;
+	size_t bufsize;
+	void *buf;
+	ssize_t nbytes;
+	int error, fd;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		emergency("%s: %s", path, strerror(errno));
+		return (-1);
+	}
+
+	error = fstat(fd, &sb);
+	if (error != 0) {
+		emergency("fstat: %s", strerror(errno));
+		return (error);
+	}
+
+	bufsize = sb.st_size;
+	buf = malloc(bufsize);
+	if (buf == NULL) {
+		emergency("malloc: %s", strerror(errno));
+		return (error);
+	}
+
+	nbytes = read(fd, buf, bufsize);
+	if (nbytes != (ssize_t)bufsize) {
+		emergency("read: %s", strerror(errno));
+		free(buf);
+		return (error);
+	}
+
+	error = close(fd);
+	if (error != 0) {
+		emergency("close: %s", strerror(errno));
+		free(buf);
+		return (error);
+	}
+
+	*bufp = buf;
+	*bufsizep = bufsize;
+
+	return (0);
+}
+
+static int
+create_file(const char *path, void *buf, size_t bufsize)
+{
+	ssize_t nbytes;
+	int error, fd;
+
+	fd = open(path, O_WRONLY | O_CREAT | O_EXCL, 0700);
+	if (fd < 0) {
+		emergency("%s: %s", path, strerror(errno));
+		return (-1);
+	}
+
+	nbytes = write(fd, buf, bufsize);
+	if (nbytes != (ssize_t)bufsize) {
+		emergency("write: %s", strerror(errno));
+		return (-1);
+	}
+
+	error = close(fd);
+	if (error != 0) {
+		emergency("close: %s", strerror(errno));
+		free(buf);
+		return (-1);
+	}
+
+	return (0);
+}
+
+static int
+mount_tmpfs(const char *fspath)
+{
+	struct iovec *iov;
+	char errmsg[255];
+	int error, iovlen;
+
+	iov = NULL;
+	iovlen = 0;
+	memset(errmsg, 0, sizeof(errmsg));
+	build_iovec(&iov, &iovlen, "fstype",
+	    __DECONST(void *, "tmpfs"), (size_t)-1);
+	build_iovec(&iov, &iovlen, "fspath",
+	    __DECONST(void *, fspath), (size_t)-1);
+	build_iovec(&iov, &iovlen, "errmsg",
+	    errmsg, sizeof(errmsg));
+
+	error = nmount(iov, iovlen, 0);
+	if (error != 0) {
+		if (*errmsg != '\0') {
+			emergency("cannot mount tmpfs on %s: %s: %s",
+			    fspath, errmsg, strerror(errno));
+		} else {
+			emergency("cannot mount tmpfs on %s: %s",
+			    fspath, strerror(errno));
+		}
+		return (error);
+	}
+	return (0);
+}
+
+static state_func_t
+reroot(void)
+{
+	void *buf;
+	char init_path[PATH_MAX];
+	size_t bufsize, init_path_len;
+	int error, name[4];
+
+	name[0] = CTL_KERN;
+	name[1] = KERN_PROC;
+	name[2] = KERN_PROC_PATHNAME;
+	name[3] = -1;
+	init_path_len = sizeof(init_path);
+	error = sysctl(name, 4, init_path, &init_path_len, NULL, 0);
+	if (error != 0) {
+		emergency("failed to get kern.proc.pathname: %s",
+		    strerror(errno));
+		goto out;
+	}
+
+	revoke_ttys();
+	runshutdown();
+
+	/*
+	 * Make sure nobody can interfere with our scheme.
+	 */
+	error = kill(-1, SIGKILL);
+	if (error != 0) {
+		emergency("kill(2) failed: %s", strerror(errno));
+		goto out;
+	}
+
+	/*
+	 * Pacify GCC.
+	 */
+	buf = NULL;
+	bufsize = 0;
+
+	/*
+	 * Copy the init binary into tmpfs, so that we can unmount
+	 * the old rootfs without committing suicide.
+	 */
+	error = read_file(init_path, &buf, &bufsize);
+	if (error != 0)
+		goto out;
+	error = mount_tmpfs(_PATH_REROOT);
+	if (error != 0)
+		goto out;
+	error = create_file(_PATH_REROOT_INIT, buf, bufsize);
+	if (error != 0)
+		goto out;
+
+	/*
+	 * Execute the temporary init.
+	 */
+	execl(_PATH_REROOT_INIT, _PATH_REROOT_INIT, "-r", NULL);
+	emergency("cannot exec %s: %s", _PATH_REROOT_INIT, strerror(errno));
+
+out:
+	emergency("reroot failed; going to single user mode");
+	return (state_func_t) single_user;
+}
+
+static state_func_t
+reroot_phase_two(void)
+{
+	char init_path[PATH_MAX], *path, *path_component;
+	size_t init_path_len;
+	int nbytes, error;
+
+	/*
+	 * Ask the kernel to mount the new rootfs.
+	 */
+	error = reboot(RB_REROOT);
+	if (error != 0) {
+		emergency("RB_REBOOT failed: %s", strerror(errno));
+		goto out;
+	}
+
+	/*
+	 * Figure out where the destination init(8) binary is.  Note that
+	 * the path could be different than what we've started with.  Use
+	 * the value from kenv, if set, or the one from sysctl otherwise.
+	 * The latter defaults to a hardcoded value, but can be overridden
+	 * by a build time option.
+	 */
+	nbytes = kenv(KENV_GET, "init_path", init_path, sizeof(init_path));
+	if (nbytes <= 0) {
+		init_path_len = sizeof(init_path);
+		error = sysctlbyname("kern.init_path",
+		    init_path, &init_path_len, NULL, 0);
+		if (error != 0) {
+			emergency("failed to retrieve kern.init_path: %s",
+			    strerror(errno));
+			goto out;
+		}
+	}
+
+	/*
+	 * Repeat the init search logic from sys/kern/init_path.c
+	 */
+	path_component = init_path;
+	while ((path = strsep(&path_component, ":")) != NULL) {
+		/*
+		 * Execute init(8) from the new rootfs.
+		 */
+		execl(path, path, NULL);
+	}
+	emergency("cannot exec init from %s: %s", init_path, strerror(errno));
+
+out:
+	emergency("reroot failed; going to single user mode");
+	return (state_func_t) single_user;
+}
+
 /*
  * Bring the system up single user.
  */
@@ -851,8 +1094,9 @@ run_script(const char *script)
 		if ((wpid = waitpid(-1, &status, WUNTRACED)) != -1)
 			collect_child(wpid);
 		if (wpid == -1) {
-			if (requested_transition == death_single)
-				return (state_func_t) death_single;
+			if (requested_transition == death_single ||
+			    requested_transition == reroot)
+				return (state_func_t) requested_transition;
 			if (errno == EINTR)
 				continue;
 			warning("wait for %s on %s failed: %m; going to "
@@ -1323,6 +1567,9 @@ transition_handler(int sig)
 		    current_state == multi_user || current_state == catatonia)
 			requested_transition = catatonia;
 		break;
+	case SIGEMT:
+		requested_transition = reroot;
+		break;
 	default:
 		requested_transition = 0;
 		break;
@@ -1486,7 +1733,6 @@ alrm_handler(int sig)
 static state_func_t
 death(void)
 {
-	session_t *sp;
 	int block, blocked;
 	size_t len;
 
@@ -1503,11 +1749,7 @@ death(void)
 	 * runshutdown() will perform the initial open() call, causing
 	 * the terminal attributes to be misconfigured.
 	 */
-	for (sp = sessions; sp; sp = sp->se_next) {
-		sp->se_flags |= SE_SHUTDOWN;
-		kill(sp->se_process, SIGHUP);
-		revoke(sp->se_device);
-	}
+	revoke_ttys();
 
 	/* Try to run the rc.shutdown script within a period of time */
 	runshutdown();
@@ -1553,6 +1795,18 @@ death_single(void)
 	return (state_func_t) single_user;
 }
 
+static void
+revoke_ttys(void)
+{
+	session_t *sp;
+
+	for (sp = sessions; sp; sp = sp->se_next) {
+		sp->se_flags |= SE_SHUTDOWN;
+		kill(sp->se_process, SIGHUP);
+		revoke(sp->se_device);
+	}
+}
+
 /*
  * Run the system shutdown script.
  *

Modified: head/sbin/init/pathnames.h
==============================================================================
--- head/sbin/init/pathnames.h	Sun Nov  8 14:26:50 2015	(r290547)
+++ head/sbin/init/pathnames.h	Sun Nov  8 17:33:48 2015	(r290548)
@@ -35,7 +35,9 @@
 
 #include <paths.h>
 
-#define	_PATH_INITLOG	"/var/log/init.log"
-#define	_PATH_SLOGGER	"/sbin/session_logger"
-#define	_PATH_RUNCOM	"/etc/rc"
-#define _PATH_RUNDOWN   "/etc/rc.shutdown"
+#define	_PATH_INITLOG		"/var/log/init.log"
+#define	_PATH_SLOGGER		"/sbin/session_logger"
+#define	_PATH_RUNCOM		"/etc/rc"
+#define	_PATH_RUNDOWN		"/etc/rc.shutdown"
+#define	_PATH_REROOT		"/dev/reroot"
+#define	_PATH_REROOT_INIT	_PATH_REROOT "/init"

Modified: head/sbin/reboot/reboot.8
==============================================================================
--- head/sbin/reboot/reboot.8	Sun Nov  8 14:26:50 2015	(r290547)
+++ head/sbin/reboot/reboot.8	Sun Nov  8 17:33:48 2015	(r290548)
@@ -28,7 +28,7 @@
 .\"	@(#)reboot.8	8.1 (Berkeley) 6/9/93
 .\" $FreeBSD$
 .\"
-.Dd October 11, 2010
+.Dd May 22, 2015
 .Dt REBOOT 8
 .Os
 .Sh NAME
@@ -42,7 +42,7 @@
 .Op Fl lnpq
 .Op Fl k Ar kernel
 .Nm
-.Op Fl dlnpq
+.Op Fl dlnpqr
 .Op Fl k Ar kernel
 .Nm fasthalt
 .Op Fl lnpq
@@ -111,6 +111,13 @@ the flushing of the file system cache is
 .Fl n
 option is not specified).
 This option should probably not be used.
+.It Fl r
+The system kills all processes, unmounts all filesystems, mounts the new
+root filesystem, and begins the usual startup sequence.
+After changing vfs.root.mountfrom with
+.Xr kenv 8 ,
+.Nm Fl r
+can be used to change the root filesystem while preserving kernel state.
 .El
 .Pp
 The
@@ -128,6 +135,13 @@ Normally, the
 utility is used when the system needs to be halted or restarted, giving
 users advance warning of their impending doom and cleanly terminating
 specific programs.
+.Sh EXAMPLES
+Replace current root filesystem with UFS mounted from
+.Pa /dev/ada0s1a :
+.Bd -literal -offset indent
+kenv vfs.root.mountfrom=ufs:/dev/ada0s1a
+reboot -r
+.Ed
 .Sh SEE ALSO
 .Xr getutxent 3 ,
 .Xr boot 8 ,

Modified: head/sbin/reboot/reboot.c
==============================================================================
--- head/sbin/reboot/reboot.c	Sun Nov  8 14:26:50 2015	(r290547)
+++ head/sbin/reboot/reboot.c	Sun Nov  8 17:33:48 2015	(r290548)
@@ -77,7 +77,7 @@ main(int argc, char *argv[])
 	} else
 		howto = 0;
 	lflag = nflag = qflag = 0;
-	while ((ch = getopt(argc, argv, "dk:lnpq")) != -1)
+	while ((ch = getopt(argc, argv, "dk:lnpqr")) != -1)
 		switch(ch) {
 		case 'd':
 			howto |= RB_DUMP;
@@ -98,6 +98,9 @@ main(int argc, char *argv[])
 		case 'q':
 			qflag = 1;
 			break;
+		case 'r':
+			howto |= RB_REROOT;
+			break;
 		case '?':
 		default:
 			usage();
@@ -107,6 +110,8 @@ main(int argc, char *argv[])
 
 	if ((howto & (RB_DUMP | RB_HALT)) == (RB_DUMP | RB_HALT))
 		errx(1, "cannot dump (-d) when halting; must reboot instead");
+	if ((howto & RB_REROOT) != 0 && howto != RB_REROOT)
+		errx(1, "-r cannot be used with -d, -n, or -p");
 	if (geteuid()) {
 		errno = EPERM;
 		err(1, NULL);
@@ -137,6 +142,9 @@ main(int argc, char *argv[])
 		if (dohalt) {
 			openlog("halt", 0, LOG_AUTH | LOG_CONS);
 			syslog(LOG_CRIT, "halted by %s", user);
+		} else if (howto & RB_REROOT) {
+			openlog("reroot", 0, LOG_AUTH | LOG_CONS);
+			syslog(LOG_CRIT, "rerooted by %s", user);
 		} else {
 			openlog("reboot", 0, LOG_AUTH | LOG_CONS);
 			syslog(LOG_CRIT, "rebooted by %s", user);
@@ -170,6 +178,16 @@ main(int argc, char *argv[])
 	 */
 	(void)signal(SIGPIPE, SIG_IGN);
 
+	/*
+	 * Only init(8) can perform rerooting.
+	 */
+	if (howto & RB_REROOT) {
+		if (kill(1, SIGEMT) == -1)
+			err(1, "SIGEMT init");
+
+		return (0);
+	}
+
 	/* Just stop init -- if we fail, we'll restart it. */
 	if (kill(1, SIGTSTP) == -1)
 		err(1, "SIGTSTP init");



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201511081733.tA8HXm26041362>