Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 2 Apr 2016 13:51:07 +0000 (UTC)
From:      "Bjoern A. Zeeb" <bz@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r297512 - in projects/vnet/sys: net netinet netinet6 sys
Message-ID:  <201604021351.u32Dp7h1003312@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: bz
Date: Sat Apr  2 13:51:06 2016
New Revision: 297512
URL: https://svnweb.freebsd.org/changeset/base/297512

Log:
  It seems with the parts tested a top-down network stack cleanup works.
  
  The exception to the rules are cloned interfaces which unfortunately
  need to go first, which is a constrained we inherit as the clone destroy
  functions cleanup themselves and we can't break that for the normal
  ifconfig destroy case.  We introduce a VNET state field for this to
  check.  It is unclear to me if we want that state just be the SI_SUB_*
  level we are currently on instead of manually defining state.  That way
  we would be more flexible?
  
  In general this changes is very careful (and introduces flags in various
  places for that) to distinguish between the cases.  On VNET shutdown we
  do not want to do the normal interface detach and cleanup as we cleanup
  upper layers, protocol layers, etc.
  
  Extra fun had with:
  - bpf, on vmove we lose the if_bpf due to the event handler on detach
    but we do not re-attach.  It did not seem to be 100% reliable which
    made me wonder but also how this may have worked in the past and not
    been noticed.  I saw panics in epair on the BPF_MTAP right after attach
    from nd6 sending a packet.
  - de-tangling multicast and it's assumptions on interfaces and a proper
    cleanup needed to be handled carefully.
  - plugging the rtsock zone leak turned out to be interface routes never
    being removed properly in the top-down teardown.  It's unclear to me
    if they are in the current head version.
  - all the inter-dependencies between layers and lists and hashes and locks
    and the various bits called on on teardown or from various places
    causing problems when suddenly certain layers had already been "destroyed"
    yet their locks were tried to be acquired.
  
  Sponsored by:	The FreeBSD Foundation

Modified:
  projects/vnet/sys/net/bpf.c
  projects/vnet/sys/net/bpf.h
  projects/vnet/sys/net/if.c
  projects/vnet/sys/net/if_bridge.c
  projects/vnet/sys/net/if_enc.c
  projects/vnet/sys/net/if_epair.c
  projects/vnet/sys/net/if_lagg.c
  projects/vnet/sys/net/if_loop.c
  projects/vnet/sys/net/route.c
  projects/vnet/sys/net/route.h
  projects/vnet/sys/net/vnet.c
  projects/vnet/sys/net/vnet.h
  projects/vnet/sys/netinet/igmp.c
  projects/vnet/sys/netinet/in.c
  projects/vnet/sys/netinet/in.h
  projects/vnet/sys/netinet/in_var.h
  projects/vnet/sys/netinet/ip_input.c
  projects/vnet/sys/netinet6/in6_ifattach.c
  projects/vnet/sys/netinet6/in6_ifattach.h
  projects/vnet/sys/netinet6/ip6_input.c
  projects/vnet/sys/netinet6/mld6.c
  projects/vnet/sys/sys/kernel.h

Modified: projects/vnet/sys/net/bpf.c
==============================================================================
--- projects/vnet/sys/net/bpf.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/net/bpf.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 
 #include "opt_bpf.h"
 #include "opt_compat.h"
+#include "opt_ddb.h"
 #include "opt_netgraph.h"
 
 #include <sys/types.h>
@@ -67,6 +68,10 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/socket.h>
 
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
@@ -2569,6 +2574,32 @@ bpfattach2(struct ifnet *ifp, u_int dlt,
 		if_printf(ifp, "bpf attached\n");
 }
 
+#ifdef VIMAGE
+/*
+ * When moving interfaces between vnet instances we need a way to
+ * query the dlt and hdrlen before detach so we can re-attch the if_bpf
+ * after the vmove.  We unfortunately have no device driver infrastructure
+ * to query the interface for these values after creation/attach, thus
+ * add this as a workaround.
+ */
+int
+bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen)
+{
+
+	if (bp == NULL)
+		return (ENXIO);
+	if (bif_dlt == NULL && bif_hdrlen == NULL)
+		return (0);
+
+	if (bif_dlt != NULL)
+		*bif_dlt = bp->bif_dlt;
+	if (bif_hdrlen != NULL)
+		*bif_hdrlen = bp->bif_hdrlen;
+
+	return (0);
+}
+#endif
+
 /*
  * Detach bpf from an interface. This involves detaching each descriptor
  * associated with the interface. Notify each descriptor as it's detached
@@ -2977,3 +3008,34 @@ bpf_validate(const struct bpf_insn *f, i
 }
 
 #endif /* !DEV_BPF && !NETGRAPH_BPF */
+
+#ifdef DDB
+static void
+bpf_show_bpf_if(struct bpf_if *bpf_if)
+{
+
+	if (bpf_if == NULL)
+		return;
+	db_printf("%p:\n", bpf_if);
+#define	BPF_DB_PRINTF(f, e)	db_printf("   %s = " f "\n", #e, bpf_if->e);
+	/* bif_ext.bif_next */
+	/* bif_ext.bif_dlist */
+	BPF_DB_PRINTF("%#x", bif_dlt);
+	BPF_DB_PRINTF("%u", bif_hdrlen);
+	BPF_DB_PRINTF("%p", bif_ifp);
+	/* bif_lock */
+	/* bif_wlist */
+	BPF_DB_PRINTF("%#x", bif_flags);
+}
+
+DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
+{
+
+	if (!have_addr) {
+		db_printf("usage: show bpf_if <struct bpf_if *>\n");
+		return;
+	}
+
+	bpf_show_bpf_if((struct bpf_if *)addr);
+}
+#endif

Modified: projects/vnet/sys/net/bpf.h
==============================================================================
--- projects/vnet/sys/net/bpf.h	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/net/bpf.h	Sat Apr  2 13:51:06 2016	(r297512)
@@ -1469,6 +1469,9 @@ void	 bpf_mtap2(struct bpf_if *, void *,
 void	 bpfattach(struct ifnet *, u_int, u_int);
 void	 bpfattach2(struct ifnet *, u_int, u_int, struct bpf_if **);
 void	 bpfdetach(struct ifnet *);
+#ifdef VIMAGE
+int	 bpf_get_bp_params(struct bpf_if *, u_int *, u_int *);
+#endif
 
 void	 bpfilterattach(int);
 u_int	 bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int);

Modified: projects/vnet/sys/net/if.c
==============================================================================
--- projects/vnet/sys/net/if.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/net/if.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -174,9 +174,9 @@ static int	if_getgroup(struct ifgroupreq
 static int	if_getgroupmembers(struct ifgroupreq *);
 static void	if_delgroups(struct ifnet *);
 static void	if_attach_internal(struct ifnet *, int, struct if_clone *);
-static int	if_detach_internal(struct ifnet *, int, struct if_clone **);
+static int	if_detach_internal(struct ifnet *, int, int, struct if_clone **);
 #ifdef VIMAGE
-static void	if_vmove(struct ifnet *, struct vnet *);
+static void	if_vmove(struct ifnet *, struct vnet *, int);
 #endif
 
 #ifdef INET6
@@ -389,12 +389,6 @@ vnet_if_uninit(const void *unused __unus
 VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
     vnet_if_uninit, NULL);
 
-/*
- * XXX-BZ VNET; probably along with dom stuff.
- * This is very wrong but MC currently implies that interfaces are
- * gone before we can free it.  This needs to be fied differently
- * and this needs to be moved back to SI_SUB_INIT_IF.
- */
 static void
 vnet_if_return(const void *unused __unused)
 {
@@ -403,10 +397,10 @@ vnet_if_return(const void *unused __unus
 	/* Return all inherited interfaces to their parent vnets. */
 	TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
 		if (ifp->if_home_vnet != ifp->if_vnet)
-			if_vmove(ifp, ifp->if_home_vnet);
+			if_vmove(ifp, ifp->if_home_vnet, 1);
 	}
 }
-VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
+VNET_SYSUNINIT(vnet_if_return, SI_SUB_INIT_IF, SI_ORDER_ANY,
     vnet_if_return, NULL);
 #endif
 
@@ -910,12 +904,23 @@ if_detach(struct ifnet *ifp)
 {
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
-	if_detach_internal(ifp, 0, NULL);
+	if_detach_internal(ifp, 0, 0, NULL);
 	CURVNET_RESTORE();
 }
 
+/*
+ * The vmove, if set, flag indicates that we are called from a callpath
+ * that is moving an interface to a different vnet instance.
+ *
+ * The shutdown flag, if set, indicates that we are called in the
+ * progress of shutting down a vnet instance.  Currently only the
+ * vnet_if_return SYSUNINIT function sets it.  Note: we can be called
+ * on a vnet instance shutdown without this flag being set, e.g., when
+ * the cloned interfaces are destoyed as first thing of teardown.
+ */
 static int
-if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
+if_detach_internal(struct ifnet *ifp, int vmove, int shutdown,
+    struct if_clone **ifcp)
 {
 	struct ifaddr *ifa;
 	int i;
@@ -951,6 +956,12 @@ if_detach_internal(struct ifnet *ifp, in
 #endif
 	}
 
+	/* The one thing we have to do. */
+	if_delgroups(ifp);
+
+	if (!vmove && !shutdown && ifp->if_vnet->vnet_state == VNET_STATE_DYING_AFTER_PSEUDO)
+		return (ENOENT);
+
 	/* Check if this is a cloned interface or not. */
 	if (vmove && ifcp != NULL)
 		*ifcp = if_clone_findifc(ifp);
@@ -974,7 +985,7 @@ if_detach_internal(struct ifnet *ifp, in
 	if_purgeaddrs(ifp);
 
 #ifdef INET
-	in_ifdetach(ifp);
+	in_ifdetach(ifp, !shutdown);
 #endif
 
 #ifdef INET6
@@ -984,9 +995,10 @@ if_detach_internal(struct ifnet *ifp, in
 	 * routes are expected to be removed by the IPv6-specific kernel API.
 	 * Otherwise, the kernel will detect some inconsistency and bark it.
 	 */
-	in6_ifdetach(ifp);
+	in6_ifdetach(ifp, !shutdown);
 #endif
-	if_purgemaddrs(ifp);
+	if (!shutdown)
+		if_purgemaddrs(ifp);
 
 	/* Announce that the interface is gone. */
 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
@@ -1016,8 +1028,8 @@ if_detach_internal(struct ifnet *ifp, in
 		}
 	}
 
-	rt_flushifroutes(ifp);
-	if_delgroups(ifp);
+	if (!shutdown)
+		rt_flushifroutes(ifp);
 
 	/*
 	 * We cannot hold the lock over dom_ifdetach calls as they might
@@ -1046,17 +1058,25 @@ if_detach_internal(struct ifnet *ifp, in
  * and finally find an unused if_xname for the target vnet.
  */
 static void
-if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
+if_vmove(struct ifnet *ifp, struct vnet *new_vnet, int shutdown)
 {
 	struct if_clone *ifc;
 	int rc;
+	u_int bif_dlt, bif_hdrlen;
+
+	/*
+	 * if_detach_internal() will call the eventhandler to notify
+	 * interface departure.  That will detach if_bpf.  We need to
+	 * safe the dlt and hdrlen so we can re-attach it later.
+	 */
+	bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
 
 	/*
 	 * Detach from current vnet, but preserve LLADDR info, do not
 	 * mark as dead etc. so that the ifnet can be reattached later.
 	 * If we cannot find it, we lost the race to someone else.
 	 */
-	rc = if_detach_internal(ifp, 1, &ifc);
+	rc = if_detach_internal(ifp, 1, shutdown, &ifc);
 	if (rc != 0)
 		return;
 
@@ -1090,6 +1110,9 @@ if_vmove(struct ifnet *ifp, struct vnet 
 
 	if_attach_internal(ifp, 1, ifc);
 
+	if (ifp->if_bpf == NULL)
+		bpfattach(ifp, bif_dlt, bif_hdrlen);
+
 	CURVNET_RESTORE();
 }
 
@@ -1128,7 +1151,7 @@ if_vmove_loan(struct thread *td, struct 
 	}
 
 	/* Move the interface into the child jail/vnet. */
-	if_vmove(ifp, pr->pr_vnet);
+	if_vmove(ifp, pr->pr_vnet, 0);
 
 	/* Report the new if_xname back to the userland. */
 	sprintf(ifname, "%s", ifp->if_xname);
@@ -1171,7 +1194,7 @@ if_vmove_reclaim(struct thread *td, char
 	}
 
 	/* Get interface back from child jail/vnet. */
-	if_vmove(ifp, vnet_dst);
+	if_vmove(ifp, vnet_dst, 0);
 	CURVNET_RESTORE();
 
 	/* Report the new if_xname back to the userland. */

Modified: projects/vnet/sys/net/if_bridge.c
==============================================================================
--- projects/vnet/sys/net/if_bridge.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/net/if_bridge.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -1132,7 +1132,7 @@ bridge_ioctl_add(struct bridge_softc *sc
 		LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
  			if (in6ifa_llaonifp(bif->bif_ifp)) {
 				BRIDGE_UNLOCK(sc);
-				in6_ifdetach(bif->bif_ifp);
+				in6_ifdetach(bif->bif_ifp, 1);
 				BRIDGE_LOCK(sc);
 				if_printf(sc->sc_ifp,
 				    "IPv6 addresses on %s have been removed "
@@ -1144,7 +1144,7 @@ bridge_ioctl_add(struct bridge_softc *sc
 		BRIDGE_XDROP(sc);
 		if (in6ifa_llaonifp(ifs)) {
 			BRIDGE_UNLOCK(sc);
-			in6_ifdetach(ifs);
+			in6_ifdetach(ifs, 1);
 			BRIDGE_LOCK(sc);
 			if_printf(sc->sc_ifp,
 			    "IPv6 addresses on %s have been removed "

Modified: projects/vnet/sys/net/if_enc.c
==============================================================================
--- projects/vnet/sys/net/if_enc.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/net/if_enc.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -381,21 +381,13 @@ VNET_SYSINIT(vnet_enc_init_proto, SI_SUB
 static void
 vnet_enc_uninit(const void *unused __unused)
 {
-
-	if_clone_detach(V_enc_cloner);
-}
-VNET_SYSUNINIT(vnet_enc_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
-    vnet_enc_uninit, NULL);
-
-static void
-vnet_enc_uninit_proto(void *unused __unused)
-{
 	KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc));
 
 	enc_remove_hhooks(V_enc_sc);
+	if_clone_detach(V_enc_cloner);
 }
-VNET_SYSUNINIT(vnet_enc_uninit_proto, SI_SUB_PROTO_IFATTACHDOMAIN,
-    SI_ORDER_ANY, vnet_enc_uninit_proto, NULL);
+VNET_SYSUNINIT(vnet_enc_uninit, SI_SUB_PSEUDO_DONE, SI_ORDER_ANY,
+    vnet_enc_uninit, NULL);
 
 static int
 enc_modevent(module_t mod, int type, void *data)

Modified: projects/vnet/sys/net/if_epair.c
==============================================================================
--- projects/vnet/sys/net/if_epair.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/net/if_epair.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -960,7 +960,7 @@ vnet_epair_init(const void *unused __unu
 	V_epair_cloner = if_clone_advanced(epairname, 0,
 	    epair_clone_match, epair_clone_create, epair_clone_destroy);
 }
-VNET_SYSINIT(vnet_epair_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_epair_init, NULL);
 
 static void
@@ -969,7 +969,7 @@ vnet_epair_uninit(const void *unused __u
 
 	if_clone_detach(V_epair_cloner);
 }
-VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_PSEUDO_DONE, SI_ORDER_ANY,
     vnet_epair_uninit, NULL);
 
 static int

Modified: projects/vnet/sys/net/if_lagg.c
==============================================================================
--- projects/vnet/sys/net/if_lagg.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/net/if_lagg.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -1466,7 +1466,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd
 		 * interface.
 		 */
 		if (in6ifa_llaonifp(tpif)) {
-			in6_ifdetach(tpif);
+			in6_ifdetach(tpif, 1);
 				if_printf(sc->sc_ifp,
 				    "IPv6 addresses on %s have been removed "
 				    "before adding it as a member to prevent "

Modified: projects/vnet/sys/net/if_loop.c
==============================================================================
--- projects/vnet/sys/net/if_loop.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/net/if_loop.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -156,7 +156,7 @@ vnet_loif_init(const void *unused __unus
 	    1);
 #endif
 }
-VNET_SYSINIT(vnet_loif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+VNET_SYSINIT(vnet_loif_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_loif_init, NULL);
 
 #ifdef VIMAGE
@@ -167,7 +167,7 @@ vnet_loif_uninit(const void *unused __un
 	if_clone_detach(V_lo_cloner);
 	V_loif = NULL;
 }
-VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_PSEUDO_DONE, SI_ORDER_SECOND,
     vnet_loif_uninit, NULL);
 #endif
 

Modified: projects/vnet/sys/net/route.c
==============================================================================
--- projects/vnet/sys/net/route.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/net/route.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -1129,6 +1129,15 @@ rt_ifdelroute(const struct rtentry *rt, 
  * to this interface...oh well...
  */
 void
+rt_flushifroutes_af(struct ifnet *ifp, int af)
+{
+	KASSERT((af >= 1 && af <= AF_MAX), ("%s: af %d not >= 1 and <= %d",
+	    __func__, af, AF_MAX));
+
+	rt_foreach_fib_walk_del(af, rt_ifdelroute, ifp);
+}
+
+void
 rt_flushifroutes(struct ifnet *ifp)
 {
 

Modified: projects/vnet/sys/net/route.h
==============================================================================
--- projects/vnet/sys/net/route.h	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/net/route.h	Sat Apr  2 13:51:06 2016	(r297512)
@@ -436,6 +436,7 @@ typedef int rt_walktree_f_t(struct rtent
 typedef void rt_setwarg_t(struct rib_head *, uint32_t, int, void *);
 void	rt_foreach_fib_walk(int af, rt_setwarg_t *, rt_walktree_f_t *, void *);
 void	rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg);
+void	rt_flushifroutes_af(struct ifnet *, int);
 void	rt_flushifroutes(struct ifnet *ifp);
 
 /* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */

Modified: projects/vnet/sys/net/vnet.c
==============================================================================
--- projects/vnet/sys/net/vnet.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/net/vnet.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -233,6 +233,7 @@ vnet_alloc(void)
 	SDT_PROBE1(vnet, functions, vnet_alloc, entry, __LINE__);
 	vnet = malloc(sizeof(struct vnet), M_VNET, M_WAITOK | M_ZERO);
 	vnet->vnet_magic_n = VNET_MAGIC_N;
+	vnet->vnet_state = VNET_STATE_STARTING;
 	SDT_PROBE2(vnet, functions, vnet_alloc, alloc, __LINE__, vnet);
 
 	/*
@@ -255,6 +256,7 @@ vnet_alloc(void)
 	CURVNET_RESTORE();
 
 	VNET_LIST_WLOCK();
+	vnet->vnet_state = VNET_STATE_ACTIVE;
 	LIST_INSERT_HEAD(&vnet_head, vnet, vnet_le);
 	VNET_LIST_WUNLOCK();
 
@@ -274,6 +276,7 @@ vnet_destroy(struct vnet *vnet)
 	    ("%s: vnet still has sockets", __func__));
 
 	VNET_LIST_WLOCK();
+	vnet->vnet_state = VNET_STATE_DYING;
 	LIST_REMOVE(vnet, vnet_le);
 	VNET_LIST_WUNLOCK();
 
@@ -293,6 +296,23 @@ vnet_destroy(struct vnet *vnet)
 }
 
 /*
+ * Cloned interfaces have become such a layer violation that they need
+ * special treatmeant.   They need to go very first and they need to be able
+ * to clean themselves up entirely and not wait for the stack to shutdown as
+ * we if_free() them.  We would like to split the cleanup of them up as well
+ * but in a non-VNET context a ifconfig foo0 destroy still has t work as well.
+ * This MUST be the only SYSUNINIT on SI_SUB_PSEUDO_DONE/SI_ORDER_FIRST!
+ */
+static void
+vnet_uninit_after_pseudo(const void *unused __unused)
+{
+
+	curvnet->vnet_state = VNET_STATE_DYING_AFTER_PSEUDO;
+}
+VNET_SYSUNINIT(vnet_if_uninit_after_pseudo, SI_SUB_PSEUDO_DONE, SI_ORDER_FIRST,
+    vnet_uninit_after_pseudo, NULL);
+
+/*
  * Boot time initialization and allocation of virtual network stacks.
  */
 static void
@@ -687,6 +707,9 @@ DB_SHOW_COMMAND(vnets, db_show_vnets)
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_FOREACH(vnet_iter) {
+		if (have_addr && addr != 0 &&
+		    (struct vnet *)addr != vnet_iter)
+			continue;
 		db_printf("vnet            = %p\n", vnet_iter);
 		db_printf(" vnet_magic_n   = 0x%x (%s, orig 0x%x)\n",
 		    vnet_iter->vnet_magic_n,
@@ -697,6 +720,7 @@ DB_SHOW_COMMAND(vnets, db_show_vnets)
 		db_printf(" vnet_data_mem  = %p\n", vnet_iter->vnet_data_mem);
 		db_printf(" vnet_data_base = 0x%jx\n",
 		    (uintmax_t)vnet_iter->vnet_data_base);
+		db_printf(" vnet_state     = %#x\n", vnet_iter->vnet_state);
 		db_printf("\n");
 		if (db_pager_quit)
 			break;

Modified: projects/vnet/sys/net/vnet.h
==============================================================================
--- projects/vnet/sys/net/vnet.h	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/net/vnet.h	Sat Apr  2 13:51:06 2016	(r297512)
@@ -70,6 +70,11 @@ struct vnet {
 	u_int			 vnet_magic_n;
 	u_int			 vnet_ifcnt;
 	u_int			 vnet_sockcnt;
+	u_int			 vnet_state;
+#define	VNET_STATE_STARTING		0x01
+#define	VNET_STATE_ACTIVE		0x02
+#define	VNET_STATE_DYING		0x04
+#define	VNET_STATE_DYING_AFTER_PSEUDO	0x08
 	void			*vnet_data_mem;
 	uintptr_t		 vnet_data_base;
 };

Modified: projects/vnet/sys/netinet/igmp.c
==============================================================================
--- projects/vnet/sys/netinet/igmp.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/netinet/igmp.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -702,10 +702,6 @@ igi_delete_locked(const struct ifnet *if
 			return;
 		}
 	}
-
-#ifdef INVARIANTS
-	panic("%s: igmp_ifsoftc not found for ifp %p\n", __func__,  ifp);
-#endif
 }
 
 /*
@@ -3601,11 +3597,8 @@ static void
 vnet_igmp_uninit(const void *unused __unused)
 {
 
+	/* This can happen when we shutdown the entire network stack. */
 	CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
-
-	VNET_ASSERT(LIST_EMPTY(&V_igi_head),
-	    ("%s: igi list %p not empty; ifnets not detached?", __func__,
-	    &V_igi_head));
 }
 VNET_SYSUNINIT(vnet_igmp_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY,
     vnet_igmp_uninit, NULL);

Modified: projects/vnet/sys/netinet/in.c
==============================================================================
--- projects/vnet/sys/netinet/in.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/netinet/in.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -895,6 +895,39 @@ in_scrubprefix(struct in_ifaddr *target,
 
 #undef rtinitflags
 
+void
+in_ifscrub_all(void)
+{
+	struct ifnet *ifp;
+	struct ifaddr *ifa, *nifa;
+	struct ifaliasreq ifr;
+
+	IFNET_RLOCK();
+	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+		/* Cannot lock here - lock recursion. */
+		/* IF_ADDR_RLOCK(ifp); */
+		TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) {
+			if (ifa->ifa_addr->sa_family != AF_INET)
+				continue;
+
+			/*
+			 * This is ugly but the only way for legacy IP to
+			 * cleanly remove addresses and everything attached.
+			 */
+			bzero(&ifr, sizeof(ifr));
+			ifr.ifra_addr = *ifa->ifa_addr;
+			if (ifa->ifa_dstaddr)
+			ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
+			(void)in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr,
+			    ifp, NULL);
+		}
+		/* IF_ADDR_RUNLOCK(ifp); */
+		in_purgemaddrs(ifp);
+		igmp_domifdetach(ifp);
+	}
+	IFNET_RUNLOCK();
+}
+
 /*
  * Return 1 if the address might be a local broadcast address.
  */
@@ -939,12 +972,14 @@ in_broadcast(struct in_addr in, struct i
  * On interface removal, clean up IPv4 data structures hung off of the ifnet.
  */
 void
-in_ifdetach(struct ifnet *ifp)
+in_ifdetach(struct ifnet *ifp, int purgeulp)
 {
 
-	in_pcbpurgeif0(&V_ripcbinfo, ifp);
-	in_pcbpurgeif0(&V_udbinfo, ifp);
-	in_pcbpurgeif0(&V_ulitecbinfo, ifp);
+	if (purgeulp) {
+		in_pcbpurgeif0(&V_ripcbinfo, ifp);
+		in_pcbpurgeif0(&V_udbinfo, ifp);
+		in_pcbpurgeif0(&V_ulitecbinfo, ifp);
+	}
 	in_purgemaddrs(ifp);
 }
 

Modified: projects/vnet/sys/netinet/in.h
==============================================================================
--- projects/vnet/sys/netinet/in.h	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/netinet/in.h	Sat Apr  2 13:51:06 2016	(r297512)
@@ -648,7 +648,7 @@ char	*inet_ntoa(struct in_addr); /* in l
 char	*inet_ntoa_r(struct in_addr ina, char *buf); /* in libkern */
 char	*inet_ntop(int, const void *, char *, socklen_t); /* in libkern */
 int	 inet_pton(int af, const char *, void *); /* in libkern */
-void	 in_ifdetach(struct ifnet *);
+void	 in_ifdetach(struct ifnet *, int);
 
 #define	in_hosteq(s, t)	((s).s_addr == (t).s_addr)
 #define	in_nullhost(x)	((x).s_addr == INADDR_ANY)

Modified: projects/vnet/sys/netinet/in_var.h
==============================================================================
--- projects/vnet/sys/netinet/in_var.h	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/netinet/in_var.h	Sat Apr  2 13:51:06 2016	(r297512)
@@ -376,6 +376,7 @@ int	in_control(struct socket *, u_long, 
 	    struct thread *);
 int	in_addprefix(struct in_ifaddr *, int);
 int	in_scrubprefix(struct in_ifaddr *, u_int);
+void	in_ifscrub_all(void);
 void	ip_input(struct mbuf *);
 void	ip_direct_input(struct mbuf *);
 void	in_ifadown(struct ifaddr *ifa, int);

Modified: projects/vnet/sys/netinet/ip_input.c
==============================================================================
--- projects/vnet/sys/netinet/ip_input.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/netinet/ip_input.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -364,6 +364,7 @@ ip_init(void)
 static void
 ip_destroy(void *unused __unused)
 {
+	struct ifnet *ifp;
 	int error;
 
 	if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
@@ -382,11 +383,21 @@ ip_destroy(void *unused __unused)
 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
 		    "error %d returned\n", __func__, error);
 	}
-	/* Cleanup in_ifaddr hash table; should be empty. */
-	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
+
+	/* Remove the IPv4 addresses from all interfaces. */
+	in_ifscrub_all();
+
+	/* Make sure the IPv4 routes are gone as well. */
+	IFNET_RLOCK();
+	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
+		rt_flushifroutes_af(ifp, AF_INET);
+	IFNET_RUNLOCK();
 
 	/* Destroy IP reassembly queue. */
 	ipreass_destroy();
+
+	/* Cleanup in_ifaddr hash table; should be empty. */
+	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
 }
 
 VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);

Modified: projects/vnet/sys/netinet6/in6_ifattach.c
==============================================================================
--- projects/vnet/sys/netinet6/in6_ifattach.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/netinet6/in6_ifattach.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -765,7 +765,7 @@ in6_ifattach(struct ifnet *ifp, struct i
  * from the ifnet list in bsdi.
  */
 void
-in6_ifdetach(struct ifnet *ifp)
+in6_ifdetach(struct ifnet *ifp, int purgeulp)
 {
 	struct ifaddr *ifa, *next;
 
@@ -773,7 +773,13 @@ in6_ifdetach(struct ifnet *ifp)
 		return;
 
 	/* remove neighbor management table */
-	nd6_purge(ifp);
+	/*
+	 * Enabling the nd6_purge will panic on vmove for interfaces on VNET
+	 * teardown as the IPv6 layer is cleaned up already and the locks
+	 * are destroyed.
+	 */
+	if (purgeulp)
+		nd6_purge(ifp);
 
 	/*
 	 * nuke any of IPv6 addresses we have
@@ -784,9 +790,11 @@ in6_ifdetach(struct ifnet *ifp)
 			continue;
 		in6_purgeaddr(ifa);
 	}
-	in6_pcbpurgeif0(&V_udbinfo, ifp);
-	in6_pcbpurgeif0(&V_ulitecbinfo, ifp);
-	in6_pcbpurgeif0(&V_ripcbinfo, ifp);
+	if (purgeulp) {
+		in6_pcbpurgeif0(&V_udbinfo, ifp);
+		in6_pcbpurgeif0(&V_ulitecbinfo, ifp);
+		in6_pcbpurgeif0(&V_ripcbinfo, ifp);
+	}
 	/* leave from all multicast groups joined */
 	in6_purgemaddrs(ifp);
 
@@ -798,7 +806,8 @@ in6_ifdetach(struct ifnet *ifp)
 	 * prefixes after removing all addresses above.
 	 * (Or can we just delay calling nd6_purge until at this point?)
 	 */
-	nd6_purge(ifp);
+	if (purgeulp)
+		nd6_purge(ifp);
 }
 
 int

Modified: projects/vnet/sys/netinet6/in6_ifattach.h
==============================================================================
--- projects/vnet/sys/netinet6/in6_ifattach.h	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/netinet6/in6_ifattach.h	Sat Apr  2 13:51:06 2016	(r297512)
@@ -36,7 +36,7 @@
 #ifdef _KERNEL
 void in6_ifattach(struct ifnet *, struct ifnet *);
 void in6_ifattach_destroy(void);
-void in6_ifdetach(struct ifnet *);
+void in6_ifdetach(struct ifnet *, int);
 int in6_get_tmpifid(struct ifnet *, u_int8_t *, const u_int8_t *, int);
 void in6_tmpaddrtimer(void *);
 int in6_get_hw_ifid(struct ifnet *, struct in6_addr *);

Modified: projects/vnet/sys/netinet6/ip6_input.c
==============================================================================
--- projects/vnet/sys/netinet6/ip6_input.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/netinet6/ip6_input.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -114,6 +114,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet/icmp6.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/in6_ifattach.h>
+#include <netinet6/mld6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/in6_rss.h>
 
@@ -308,6 +309,8 @@ ip6proto_unregister(short ip6proto)
 static void
 ip6_destroy(void *unused __unused)
 {
+	struct ifaddr *ifa, *nifa;
+	struct ifnet *ifp;
 	int error;
 
 	if ((error = pfil_head_unregister(&V_inet6_pfil_hook)) != 0)
@@ -325,9 +328,30 @@ ip6_destroy(void *unused __unused)
 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET6: "
 		    "error %d returned\n", __func__, error);
 	}
-	hashdestroy(V_in6_ifaddrhashtbl, M_IFADDR, V_in6_ifaddrhmask);
+
+	/* Cleanup addresses. */
+	IFNET_RLOCK();
+	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+		/* Cannot lock here - lock recursion. */
+		/* IF_ADDR_LOCK(ifp); */
+		TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) {
+
+			if (ifa->ifa_addr->sa_family != AF_INET6)
+				continue;
+			in6_purgeaddr(ifa);
+		}
+		/* IF_ADDR_UNLOCK(ifp); */
+		in6_ifdetach(ifp, 0);
+		mld_domifdetach(ifp);
+		/* Make sure any routes are gone as well. */
+		rt_flushifroutes_af(ifp, AF_INET6);
+	}
+	IFNET_RUNLOCK();
+
 	nd6_destroy();
 	in6_ifattach_destroy();
+
+	hashdestroy(V_in6_ifaddrhashtbl, M_IFADDR, V_in6_ifaddrhmask);
 }
 
 VNET_SYSUNINIT(inet6, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip6_destroy, NULL);

Modified: projects/vnet/sys/netinet6/mld6.c
==============================================================================
--- projects/vnet/sys/netinet6/mld6.c	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/netinet6/mld6.c	Sat Apr  2 13:51:06 2016	(r297512)
@@ -300,7 +300,8 @@ mld_restore_context(struct mbuf *m)
 
 #if defined(VIMAGE) && defined(INVARIANTS)
 	KASSERT(curvnet == m->m_pkthdr.PH_loc.ptr,
-	    ("%s: called when curvnet was not restored", __func__));
+	    ("%s: called when curvnet was not restored: cuvnet %p m ptr %p",
+	    __func__, curvnet, m->m_pkthdr.PH_loc.ptr));
 #endif
 	return (m->m_pkthdr.flowid);
 }
@@ -611,9 +612,6 @@ mli_delete_locked(const struct ifnet *if
 			return;
 		}
 	}
-#ifdef INVARIANTS
-	panic("%s: mld_ifsoftc not found for ifp %p\n", __func__,  ifp);
-#endif
 }
 
 /*
@@ -3290,10 +3288,8 @@ static void
 vnet_mld_uninit(const void *unused __unused)
 {
 
+	/* This can happen if we shutdown the network stack. */
 	CTR1(KTR_MLD, "%s: tearing down", __func__);
-
-	KASSERT(LIST_EMPTY(&V_mli_head),
-	    ("%s: mli list not empty; ifnets not detached?", __func__));
 }
 VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_uninit,
     NULL);

Modified: projects/vnet/sys/sys/kernel.h
==============================================================================
--- projects/vnet/sys/sys/kernel.h	Sat Apr  2 12:03:08 2016	(r297511)
+++ projects/vnet/sys/sys/kernel.h	Sat Apr  2 13:51:06 2016	(r297512)
@@ -150,6 +150,7 @@ enum sysinit_sub_id {
 	SI_SUB_ROOT_CONF	= 0xb000000,	/* Find root devices */
 	SI_SUB_INTRINSIC_POST	= 0xd000000,	/* proc 0 cleanup*/
 	SI_SUB_SYSCALLS		= 0xd800000,	/* register system calls */
+	SI_SUB_PSEUDO_DONE	= 0xdb00000,	/* pseudo dev removal; last! */
 	SI_SUB_VNET_DONE	= 0xdc00000,	/* vnet registration complete */
 	SI_SUB_KTHREAD_INIT	= 0xe000000,	/* init process*/
 	SI_SUB_KTHREAD_PAGE	= 0xe400000,	/* pageout daemon*/



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201604021351.u32Dp7h1003312>