Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 31 May 2009 12:10:04 +0000 (UTC)
From:      Marko Zec <zec@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r193166 - in head/sys: kern net sys
Message-ID:  <200905311210.n4VCA4rA042721@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: zec
Date: Sun May 31 12:10:04 2009
New Revision: 193166
URL: http://svn.freebsd.org/changeset/base/193166

Log:
  Introduce an interm userland-kernel API for creating vnets and
  assigning ifnets from one vnet to another.  Deletion of vnets is not
  yet supported.
  
  The interface is implemented as an ioctl extension so that no syscalls
  had to be introduced.  This should be acceptable given that the new
  interface will be used for a short / interim period only, until the
  new jail management framwork gains the capability of managing vnets.
  This method for managing vimages / vnets has been in use for the past
  7 years without any observable issues.
  
  The userland tool to be used in conjunction with the interim API can be
  found in p4: //depot/projects/vimage-commit2/src/usr.sbin/vimage/... and
  will most probably never get commited to svn.
  
  While here, bump copyright notices in kern_vimage.c and vimage.h to
  cover work done in year 2009.
  
  Approved by:	julian (mentor)
  Discussed with:	bz, rwatson

Modified:
  head/sys/kern/kern_prot.c
  head/sys/kern/kern_vimage.c
  head/sys/net/if.c
  head/sys/sys/sockio.h
  head/sys/sys/vimage.h

Modified: head/sys/kern/kern_prot.c
==============================================================================
--- head/sys/kern/kern_prot.c	Sun May 31 12:04:01 2009	(r193165)
+++ head/sys/kern/kern_prot.c	Sun May 31 12:10:04 2009	(r193166)
@@ -1748,7 +1748,11 @@ p_canwait(struct thread *td, struct proc
 
 	KASSERT(td == curthread, ("%s: td not curthread", __func__));
 	PROC_LOCK_ASSERT(p, MA_OWNED);
-	if ((error = prison_check(td->td_ucred, p->p_ucred)))
+	if (
+#ifdef VIMAGE /* XXX temporary until struct vimage goes away */
+	    !vi_child_of(TD_TO_VIMAGE(td), P_TO_VIMAGE(p)) &&
+#endif
+	    (error = prison_check(td->td_ucred, p->p_ucred)))
 		return (error);
 #ifdef MAC
 	if ((error = mac_proc_check_wait(td->td_ucred, p)))

Modified: head/sys/kern/kern_vimage.c
==============================================================================
--- head/sys/kern/kern_vimage.c	Sun May 31 12:04:01 2009	(r193165)
+++ head/sys/kern/kern_vimage.c	Sun May 31 12:10:04 2009	(r193166)
@@ -1,6 +1,6 @@
 /*-
- * Copyright (c) 2004-2008 University of Zagreb
- * Copyright (c) 2006-2008 FreeBSD Foundation
+ * Copyright (c) 2004-2009 University of Zagreb
+ * Copyright (c) 2006-2009 FreeBSD Foundation
  *
  * This software was developed by the University of Zagreb and the
  * FreeBSD Foundation under sponsorship by the Stichting NLnet and the
@@ -34,16 +34,24 @@ __FBSDID("$FreeBSD$");
 #include "opt_ddb.h"
 
 #include <sys/param.h>
-#include <sys/types.h>
 #include <sys/kernel.h>
 #include <sys/linker.h>
+#include <sys/lock.h>
 #include <sys/malloc.h>
-#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sx.h>
+#include <sys/priv.h>
+#include <sys/refcount.h>
 #include <sys/vimage.h>
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
+#include <net/if.h>
+#include <net/route.h>
+#include <net/vnet.h>
+
 #ifndef VIMAGE_GLOBALS
 
 MALLOC_DEFINE(M_VIMAGE, "vimage", "vimage resource container");
@@ -57,6 +65,22 @@ static int vnet_mod_constructor(struct v
 static int vnet_mod_destructor(struct vnet_modlink *);
 
 #ifdef VIMAGE
+static struct vimage *vimage_by_name(struct vimage *, char *);
+static struct vimage *vi_alloc(struct vimage *, char *);
+static struct vimage *vimage_get_next(struct vimage *, struct vimage *, int);
+static void vimage_relative_name(struct vimage *, struct vimage *,
+    char *, int);
+#endif
+
+#define	VNET_LIST_WLOCK()						\
+	mtx_lock(&vnet_list_refc_mtx);					\
+	while (vnet_list_refc != 0)					\
+		cv_wait(&vnet_list_condvar, &vnet_list_refc_mtx);
+
+#define	VNET_LIST_WUNLOCK()						\
+	mtx_unlock(&vnet_list_refc_mtx);
+
+#ifdef VIMAGE
 struct vimage_list_head vimage_head;
 struct vnet_list_head vnet_head;
 struct vprocg_list_head vprocg_head;
@@ -67,9 +91,294 @@ struct vprocg vprocg_0;
 #endif
 
 #ifdef VIMAGE
+struct cv vnet_list_condvar;
+struct mtx vnet_list_refc_mtx;
+int vnet_list_refc = 0;
+
+static u_int last_vi_id = 0;
+static u_int last_vnet_id = 0;
+static u_int last_vprocg_id = 0;
+
 struct vnet *vnet0;
 #endif
 
+#ifdef VIMAGE
+
+/*
+ * Interim userspace interface - will be replaced by jail soon.
+ */
+
+/*
+ * Move an ifnet to another vnet.  The ifnet can be specified either
+ * by ifp argument, or by name contained in vi_req->vi_if_xname if NULL is
+ * passed as ifp.  The target vnet can be specified either by vnet
+ * argument or by name. If vnet name equals to ".." or vi_req is set to
+ * NULL the interface is moved to the parent vnet.
+ */
+int
+vi_if_move(struct vi_req *vi_req, struct ifnet *ifp, struct vimage *vip)
+{
+	struct vimage *new_vip;
+	struct vnet *new_vnet = NULL;
+
+	/* Check for API / ABI version mismatch. */
+	if (vi_req->vi_api_cookie != VI_API_COOKIE)
+		return (EDOOFUS);
+
+	/* Find the target vnet. */
+	if (vi_req == NULL || strcmp(vi_req->vi_name, "..") == 0) {
+		if (IS_DEFAULT_VIMAGE(vip))
+			return (ENXIO);
+		new_vnet = vip->vi_parent->v_net;
+	} else {
+		new_vip = vimage_by_name(vip, vi_req->vi_name);
+		if (new_vip == NULL)
+			return (ENXIO);
+		new_vnet = new_vip->v_net;
+	}
+
+	/* Try to find the target ifnet by name. */
+	if (ifp == NULL)
+		ifp = ifunit(vi_req->vi_if_xname);
+
+	if (ifp == NULL)
+		return (ENXIO);
+
+	/*
+	 * Check for naming clashes in target vnet.  Not locked so races
+	 * are possible.
+	 */
+	if (vi_req != NULL) {
+		struct ifnet *t_ifp;
+
+		CURVNET_SET_QUIET(new_vnet);
+		t_ifp = ifunit(vi_req->vi_if_xname);
+		CURVNET_RESTORE();
+		if (t_ifp != NULL)
+			return (EEXIST);
+	}
+
+	/* Detach from curvnet and attach to new_vnet. */
+	if_vmove(ifp, new_vnet);
+
+	/* Report the new if_xname back to the userland */
+	if (vi_req != NULL)
+		sprintf(vi_req->vi_if_xname, "%s", ifp->if_xname);
+
+	return (0);
+}
+
+int
+vi_td_ioctl(u_long cmd, struct vi_req *vi_req, struct thread *td)
+{
+	int error = 0;
+	struct vimage *vip = TD_TO_VIMAGE(td);
+	struct vimage *vip_r = NULL;
+
+	/* Check for API / ABI version mismatch. */
+	if (vi_req->vi_api_cookie != VI_API_COOKIE)
+		return (EDOOFUS);
+
+	error = priv_check(td, PRIV_REBOOT); /* XXX temp. priv abuse */
+	if (error)
+		return (error);
+
+	vip_r = vimage_by_name(vip, vi_req->vi_name);
+	if (vip_r == NULL && !(vi_req->vi_req_action & VI_CREATE))
+		return (ESRCH);
+	if (vip_r != NULL && vi_req->vi_req_action & VI_CREATE)
+		return (EADDRINUSE);
+	if (vi_req->vi_req_action == VI_GETNEXT) {
+		vip_r = vimage_get_next(vip, vip_r, 0);
+		if (vip_r == NULL)
+			return (ESRCH);
+	}
+	if (vi_req->vi_req_action == VI_GETNEXT_RECURSE) {
+		vip_r = vimage_get_next(vip, vip_r, 1);
+		if (vip_r == NULL)
+			return (ESRCH);
+	}
+
+	if (vip_r && !vi_child_of(vip, vip_r) && /* XXX delete the rest? */
+	    vi_req->vi_req_action != VI_GET &&
+	    vi_req->vi_req_action != VI_GETNEXT)
+		return (EPERM);
+
+	switch (cmd) {
+
+	case SIOCGPVIMAGE:
+		vimage_relative_name(vip, vip_r, vi_req->vi_name,
+		    sizeof (vi_req->vi_name));
+		vi_req->vi_proc_count = vip_r->v_procg->nprocs;
+		vi_req->vi_if_count = vip_r->v_net->ifcnt;
+		vi_req->vi_sock_count = vip_r->v_net->sockcnt;
+		break;
+
+	case SIOCSPVIMAGE:
+		if (vi_req->vi_req_action == VI_DESTROY) {
+#ifdef NOTYET
+			error = vi_destroy(vip_r);
+#else
+			error = EOPNOTSUPP;
+#endif
+			break;
+		}
+
+		if (vi_req->vi_req_action == VI_SWITCHTO) {
+			struct proc *p = td->td_proc;
+			struct ucred *oldcred, *newcred;
+
+			/*
+			 * XXX priv_check()?
+			 * XXX allow only a single td per proc here?
+			 */
+			newcred = crget();
+			PROC_LOCK(p);
+			oldcred = p->p_ucred;
+			setsugid(p);
+			crcopy(newcred, oldcred);
+			refcount_release(&newcred->cr_vimage->vi_ucredrefc);
+			newcred->cr_vimage = vip_r;
+			refcount_acquire(&newcred->cr_vimage->vi_ucredrefc);
+			p->p_ucred = newcred;
+			PROC_UNLOCK(p);
+			sx_xlock(&allproc_lock);
+			oldcred->cr_vimage->v_procg->nprocs--;
+			refcount_release(&oldcred->cr_vimage->vi_ucredrefc);
+			P_TO_VPROCG(p)->nprocs++;
+			sx_xunlock(&allproc_lock);
+			crfree(oldcred);
+			break;
+		}
+
+		if (vi_req->vi_req_action & VI_CREATE) {
+			char *dotpos;
+
+			dotpos = strrchr(vi_req->vi_name, '.');
+			if (dotpos != NULL) {
+				*dotpos = 0;
+				vip = vimage_by_name(vip, vi_req->vi_name);
+				if (vip == NULL)
+					return (ESRCH);
+				dotpos++;
+				vip_r = vi_alloc(vip, dotpos);
+			} else
+				vip_r = vi_alloc(vip, vi_req->vi_name);
+			if (vip_r == NULL)
+				return (ENOMEM);
+		}
+	}
+	return (error);
+}
+
+int
+vi_child_of(struct vimage *parent, struct vimage *child)
+{
+
+	if (child == parent)
+		return (0);
+	for (; child; child = child->vi_parent)
+		if (child == parent)
+			return (1);
+	return (0);
+}
+
+static struct vimage *
+vimage_by_name(struct vimage *top, char *name)
+{
+	struct vimage *vip;
+	char *next_name;
+	int namelen;
+
+	next_name = strchr(name, '.');
+	if (next_name != NULL) {
+		namelen = next_name - name;
+		next_name++;
+		if (namelen == 0) {
+			if (strlen(next_name) == 0)
+				return (top);	/* '.' == this vimage */
+			else
+				return (NULL);
+		}
+	} else
+		namelen = strlen(name);
+	if (namelen == 0)
+		return (NULL);
+	LIST_FOREACH(vip, &top->vi_child_head, vi_sibling) {
+		if (strlen(vip->vi_name) == namelen &&
+		    strncmp(name, vip->vi_name, namelen) == 0) {
+			if (next_name != NULL)
+				return (vimage_by_name(vip, next_name));
+			else
+				return (vip);
+		}
+	}
+	return (NULL);
+}
+
+static void
+vimage_relative_name(struct vimage *top, struct vimage *where,
+    char *buffer, int bufflen)
+{
+	int used = 1;
+
+	if (where == top) {
+		sprintf(buffer, ".");
+		return;
+	} else
+		*buffer = 0;
+
+	do {
+		int namelen = strlen(where->vi_name);
+
+		if (namelen + used + 1 >= bufflen)
+			panic("buffer overflow");
+
+		if (used > 1) {
+			bcopy(buffer, &buffer[namelen + 1], used);
+			buffer[namelen] = '.';
+			used++;
+		} else
+			bcopy(buffer, &buffer[namelen], used);
+		bcopy(where->vi_name, buffer, namelen);
+		used += namelen;
+		where = where->vi_parent;
+	} while (where != top);
+}
+
+static struct vimage *
+vimage_get_next(struct vimage *top, struct vimage *where, int recurse)
+{
+	struct vimage *next;
+
+	if (recurse) {
+		/* Try to go deeper in the hierarchy */
+		next = LIST_FIRST(&where->vi_child_head);
+		if (next != NULL)
+			return (next);
+	}
+
+	do {
+		/* Try to find next sibling */
+		next = LIST_NEXT(where, vi_sibling);
+		if (!recurse || next != NULL)
+			return (next);
+
+		/* Nothing left on this level, go one level up */
+		where = where->vi_parent;
+	} while (where != top->vi_parent);
+
+	/* Nothing left to be visited, we are done */
+	return (NULL);
+}
+
+#endif /* VIMAGE */ /* User interface block */
+
+
+/*
+ * Kernel interfaces and handlers.
+ */
+
 void
 vnet_mod_register(const struct vnet_modinfo *vmi)
 {
@@ -221,7 +530,7 @@ vnet_mod_constructor(struct vnet_modlink
 		void *mem = malloc(vmi->vmi_size, M_VNET,
 		    M_NOWAIT | M_ZERO);
 		if (mem == NULL) /* XXX should return error, not panic. */
-			panic("vi_alloc: malloc for %s\n", vmi->vmi_name);
+			panic("malloc for %s\n", vmi->vmi_name);
 		curvnet->mod_data[vmi->vmi_id] = mem;
 	}
 #endif
@@ -301,43 +610,84 @@ vi_symlookup(struct kld_sym_lookup *look
 	return (ENOENT);
 }
 
-static void
-vi_init(void *unused)
-{
 #ifdef VIMAGE
+static struct vimage *
+vi_alloc(struct vimage *parent, char *name)
+{
 	struct vimage *vip;
 	struct vprocg *vprocg;
 	struct vnet *vnet;
-#endif
-
-	TAILQ_INIT(&vnet_modlink_head);
-	TAILQ_INIT(&vnet_modpending_head);
-
-#ifdef VIMAGE
-	LIST_INIT(&vimage_head);
-	LIST_INIT(&vprocg_head);
-	LIST_INIT(&vnet_head);
+	struct vnet_modlink *vml;
 
 	vip = malloc(sizeof(struct vimage), M_VIMAGE, M_NOWAIT | M_ZERO);
 	if (vip == NULL)
-		panic("malloc failed for struct vimage");
+		panic("vi_alloc: malloc failed for vimage \"%s\"\n", name);
+	vip->vi_id = last_vi_id++;
+	LIST_INIT(&vip->vi_child_head);
+	sprintf(vip->vi_name, "%s", name);
+	vip->vi_parent = parent;
+	/* XXX locking */
+	if (parent != NULL)
+		LIST_INSERT_HEAD(&parent->vi_child_head, vip, vi_sibling);
+	else if (!LIST_EMPTY(&vimage_head))
+		panic("there can be only one default vimage!");
 	LIST_INSERT_HEAD(&vimage_head, vip, vi_le);
 
+	vnet = malloc(sizeof(struct vnet), M_VNET, M_NOWAIT | M_ZERO);
+	if (vnet == NULL)
+		panic("vi_alloc: malloc failed for vnet \"%s\"\n", name);
+	vip->v_net = vnet;
+	vnet->vnet_id = last_vnet_id++;
+	if (vnet->vnet_id == 0)
+		vnet0 = vnet;
+	vnet->vnet_magic_n = VNET_MAGIC_N;
+
 	vprocg = malloc(sizeof(struct vprocg), M_VPROCG, M_NOWAIT | M_ZERO);
 	if (vprocg == NULL)
-		panic("malloc failed for struct vprocg");
+		panic("vi_alloc: malloc failed for vprocg \"%s\"\n", name);
 	vip->v_procg = vprocg;
-	LIST_INSERT_HEAD(&vprocg_head, vprocg, vprocg_le);
+	vprocg->vprocg_id = last_vprocg_id++;
 
-	vnet = malloc(sizeof(struct vnet), M_VNET, M_NOWAIT | M_ZERO);
-	if (vnet == NULL)
-		panic("vi_alloc: malloc failed");
+	/* Initialize / attach vnet module instances. */
+	CURVNET_SET_QUIET(vnet);
+	TAILQ_FOREACH(vml, &vnet_modlink_head, vml_mod_le)
+		vnet_mod_constructor(vml);
+	CURVNET_RESTORE();
+
+	VNET_LIST_WLOCK();
 	LIST_INSERT_HEAD(&vnet_head, vnet, vnet_le);
-	vnet->vnet_magic_n = VNET_MAGIC_N;
-	vip->v_net = vnet;
-	vnet0 = vnet;
+	VNET_LIST_WUNLOCK();
+
+	/* XXX locking */
+	LIST_INSERT_HEAD(&vprocg_head, vprocg, vprocg_le);
+
+	return (vip);
+}
+#endif /* VIMAGE */
 
-	/* We MUST clear curvnet in vi_init_done before going SMP. */
+static void
+vi_init(void *unused)
+{
+
+	TAILQ_INIT(&vnet_modlink_head);
+	TAILQ_INIT(&vnet_modpending_head);
+
+#ifdef VIMAGE
+	LIST_INIT(&vimage_head);
+	LIST_INIT(&vprocg_head);
+	LIST_INIT(&vnet_head);
+
+	mtx_init(&vnet_list_refc_mtx, "vnet_list_refc_mtx", NULL, MTX_DEF);
+	cv_init(&vnet_list_condvar, "vnet_list_condvar");
+
+	/* Default image has no parent and no name. */
+	vi_alloc(NULL, "");
+
+	/*
+	 * We MUST clear curvnet in vi_init_done() before going SMP,
+	 * otherwise CURVNET_SET() macros would scream about unnecessary
+	 * curvnet recursions.
+	 */
 	curvnet = LIST_FIRST(&vnet_head);
 #endif
 }

Modified: head/sys/net/if.c
==============================================================================
--- head/sys/net/if.c	Sun May 31 12:04:01 2009	(r193165)
+++ head/sys/net/if.c	Sun May 31 12:10:04 2009	(r193166)
@@ -2283,6 +2283,21 @@ ifioctl(struct socket *so, u_long cmd, c
 	ifr = (struct ifreq *)data;
 
 	switch (cmd) {
+#ifdef VIMAGE
+	/*
+	 * XXX vnet creation will be implemented through the new jail
+	 * framework - this is just a temporary hack for testing the
+	 * vnet create / destroy mechanisms.
+	 */
+	case SIOCSIFVIMAGE:
+		error = vi_if_move((struct vi_req *) data, NULL,
+		    TD_TO_VIMAGE(td));
+		return (error);
+	case SIOCSPVIMAGE:
+	case SIOCGPVIMAGE:
+		error = vi_td_ioctl(cmd, (struct vi_req *) data, td);
+		return (error);
+#endif
 	case SIOCIFCREATE:
 	case SIOCIFCREATE2:
 		error = priv_check(td, PRIV_NET_IFCREATE);

Modified: head/sys/sys/sockio.h
==============================================================================
--- head/sys/sys/sockio.h	Sun May 31 12:04:01 2009	(r193165)
+++ head/sys/sys/sockio.h	Sun May 31 12:10:04 2009	(r193166)
@@ -108,6 +108,10 @@
 #define	SIOCGPRIVATE_0	_IOWR('i', 80, struct ifreq)	/* device private 0 */
 #define	SIOCGPRIVATE_1	_IOWR('i', 81, struct ifreq)	/* device private 1 */
 
+#define	SIOCSPVIMAGE	 _IOW('i', 101, struct vi_req)	/* set proc vimage */
+#define	SIOCGPVIMAGE	_IOWR('i', 102, struct vi_req)	/* get proc vimage */
+#define	SIOCSIFVIMAGE	_IOWR('i', 103, struct vi_req)	/* set ifc vi/net */
+
 #define	SIOCSDRVSPEC	_IOW('i', 123, struct ifdrv)	/* set driver-specific
 								  parameters */
 #define	SIOCGDRVSPEC	_IOWR('i', 123, struct ifdrv)	/* get driver-specific

Modified: head/sys/sys/vimage.h
==============================================================================
--- head/sys/sys/vimage.h	Sun May 31 12:04:01 2009	(r193165)
+++ head/sys/sys/vimage.h	Sun May 31 12:10:04 2009	(r193166)
@@ -1,6 +1,6 @@
 /*-
- * Copyright (c) 2006-2008 University of Zagreb
- * Copyright (c) 2006-2008 FreeBSD Foundation
+ * Copyright (c) 2006-2009 University of Zagreb
+ * Copyright (c) 2006-2009 FreeBSD Foundation
  *
  * This software was developed by the University of Zagreb and the
  * FreeBSD Foundation under sponsorship by the Stichting NLnet and the
@@ -36,6 +36,31 @@
 #include <sys/proc.h>
 #include <sys/queue.h>
 
+/* Interim userspace API. */
+struct vi_req {
+	int	vi_api_cookie;		/* Catch API mismatch. */
+	int	vi_req_action;		/* What to do with this request? */
+	u_short	vi_proc_count;		/* Current number of processes. */
+	int	vi_if_count;		/* Current number of ifnets. */
+	int	vi_sock_count;
+	char	vi_name[MAXPATHLEN];
+	char	vi_if_xname[MAXPATHLEN]; /* XXX should be IFNAMSIZ */
+};
+
+#define	VI_CREATE		0x00000001
+#define	VI_DESTROY		0x00000002
+#define	VI_SWITCHTO		0x00000008
+#define	VI_IFACE		0x00000010
+#define	VI_GET			0x00000100
+#define	VI_GETNEXT		0x00000200
+#define	VI_GETNEXT_RECURSE	0x00000300
+
+#define	VI_API_VERSION		1		/* Bump on struct changes. */
+
+#define	VI_API_COOKIE		((sizeof(struct vi_req) << 16) | VI_API_VERSION)
+
+#ifdef _KERNEL
+
 #if defined(VIMAGE) && defined(VIMAGE_GLOBALS)
 #error "You cannot have both option VIMAGE and option VIMAGE_GLOBALS!"
 #endif
@@ -46,6 +71,8 @@
 
 struct vprocg;
 struct vnet;
+struct vi_req;
+struct ifnet;
 struct kld_sym_lookup;
 
 typedef int vnet_attach_fn(const void *);
@@ -129,6 +156,9 @@ struct vnet_modlink {
 #define	V_MOD_vprocg		0	/* no minor module ids like in vnet */
 
 int	vi_symlookup(struct kld_sym_lookup *, char *);
+int	vi_td_ioctl(u_long, struct vi_req *, struct thread *);
+int	vi_if_move(struct vi_req *, struct ifnet *, struct vimage *);
+int	vi_child_of(struct vimage *, struct vimage *);
 void	vnet_mod_register(const struct vnet_modinfo *);
 void	vnet_mod_register_multi(const struct vnet_modinfo *, void *, char *);
 void	vnet_mod_deregister(const struct vnet_modinfo *);
@@ -449,4 +479,6 @@ extern struct vprocg_list_head vprocg_he
 #define	VIMAGE_CTASSERT(x, y)		struct __hack
 #endif
 
+#endif /* _KERNEL */
+
 #endif /* !_SYS_VIMAGE_H_ */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200905311210.n4VCA4rA042721>