Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 14 Aug 2013 23:43:05 +0300
From:      Mikolaj Golub <trociny@FreeBSD.org>
To:        Marko Zec <zec@fer.hr>
Cc:        freebsd-virtualization@freebsd.org
Subject:   Re: RFC: ipfw nat VIMAGE improvements
Message-ID:  <20130814204303.GA13541@gmail.com>
In-Reply-To: <201308141728.31361.zec@fer.hr>
References:  <20130811200111.GA49895@gmail.com> <201308141728.31361.zec@fer.hr>

next in thread | previous in thread | raw e-mail | index | archive | help

--1yeeQ81UyVL57Vl7
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Wed, Aug 14, 2013 at 05:28:31PM +0200, Marko Zec wrote:
> On Sunday 11 August 2013 22:01:12 Mikolaj Golub wrote:
> > Hi,
> >
> > I would like to commit this patch that fixes some issues related to
> > ipfw nat module load/unload on VIMAGE featured system.
> >
> > Any comments, objections?
> 
> Far from being an expert in ipfw, I'm worried that the proposed approach of 
> simultaneously acquiring locks on _all_ ipfw instances might be calling for 
> trouble:
> 
> +       VNET_LIST_RLOCK();
> +       VNET_FOREACH(vnet_iter) {
> +               CURVNET_SET(vnet_iter);
> +               IPFW_WLOCK(&V_layer3_chain);
> +               CURVNET_RESTORE();
> +       }
>         ipfw_nat_ptr = ipfw_nat;
>         lookup_nat_ptr = lookup_nat;
>         ipfw_nat_cfg_ptr = ipfw_nat_cfg;
>         ipfw_nat_del_ptr = ipfw_nat_del;
>         ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
>         ipfw_nat_get_log_ptr = ipfw_nat_get_log;
> -       IPFW_WUNLOCK(&V_layer3_chain);
> -       V_ifaddr_event_tag = EVENTHANDLER_REGISTER(
> +       VNET_FOREACH(vnet_iter) {
> +               CURVNET_SET(vnet_iter);
> +               IPFW_WUNLOCK(&V_layer3_chain);
> +               CURVNET_RESTORE();
> +       }
> +       VNET_LIST_RUNLOCK();
> 
> Why couldn't we introduce a per-vnet flag, say V_ipfw_nat_ready, and use it 
> as
> 
> #define IPFW_NAT_LOADED (V_ipfw_nat_ready)
> 
> instead of current version of that macro:
> 
> #define IPFW_NAT_LOADED (ipfw_nat_ptr != NULL)
> 
> I.e., perhaps in ipfw_nat_init() we could first set all the function 
> pointers, and then iterate over all vnets and set V_ipfw_nat ready there.  
> In ipfw_nat_destroy() we would first iterate over all vnets to clear the 
> flag, before clearing function pointers?

I like you approach. Though insted of iterating vnets in
ipfw_nat_init/destroy I think it is safe just to set/unset
V_ipfw_nat_ready in vnet_ipfw_nat_init/uninit.

-- 
Mikolaj Golub

--1yeeQ81UyVL57Vl7
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: inline; filename="ip_fw_nat.c.VIMAGE.2.patch"

commit 76323cd328717de5b77d0d1e2e23150c482c630a
Author: Mikolaj Golub <trociny@freebsd.org>
Date:   Sun Aug 4 13:49:50 2013 +0300

    Make ipfw nat init/unint work correctly for VIMAGE:
    
    * Do per vnet instance cleanup (previously it was only for vnet0 on
      module unload, and led to libalias leaks and possible panics due to
      stale pointer dereferences).
    
    * Instead of protecting ipfw hooks registering/deregistering by only
      vnet0 lock (which does not prevent pointers access from another
      vnets), introduce per vnet ipfw_nat_loaded variable. The variable is
      set after hooks are registered and unset before they are deregistered.
    
    * Devirtualize ifaddr_event_tag as we run only one event handler for
      all vnets.
    
    * It is supposed that ifaddr_change event handler is called in the
      interface vnet context, so add the assertion.

diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c
index 6317013..b9dc18e 100644
--- a/sys/netpfil/ipfw/ip_fw2.c
+++ b/sys/netpfil/ipfw/ip_fw2.c
@@ -142,6 +142,8 @@ VNET_DEFINE(int, verbose_limit);
 /* layer3_chain contains the list of rules for layer 3 */
 VNET_DEFINE(struct ip_fw_chain, layer3_chain);
 
+VNET_DEFINE(int, ipfw_nat_loaded) = 0;
+
 ipfw_nat_t *ipfw_nat_ptr = NULL;
 struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
 ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
diff --git a/sys/netpfil/ipfw/ip_fw_nat.c b/sys/netpfil/ipfw/ip_fw_nat.c
index 84852db..155eddd 100644
--- a/sys/netpfil/ipfw/ip_fw_nat.c
+++ b/sys/netpfil/ipfw/ip_fw_nat.c
@@ -53,8 +53,7 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
 
-static VNET_DEFINE(eventhandler_tag, ifaddr_event_tag);
-#define	V_ifaddr_event_tag	VNET(ifaddr_event_tag)
+static eventhandler_tag ifaddr_event_tag;
 
 static void
 ifaddr_change(void *arg __unused, struct ifnet *ifp)
@@ -63,6 +62,8 @@ ifaddr_change(void *arg __unused, struct ifnet *ifp)
 	struct ifaddr *ifa;
 	struct ip_fw_chain *chain;
 
+	KASSERT(curvnet == ifp->if_vnet,
+	    ("curvnet(%p) differs from iface vnet(%p)", curvnet, ifp->if_vnet));
 	chain = &V_layer3_chain;
 	IPFW_WLOCK(chain);
 	/* Check every nat entry... */
@@ -589,11 +590,38 @@ ipfw_nat_get_log(struct sockopt *sopt)
 	return(0);
 }
 
+static int
+vnet_ipfw_nat_init(const void *arg __unused)
+{
+
+	V_ipfw_nat_ready = 1;
+	return (0);
+}
+
+static int
+vnet_ipfw_nat_uninit(const void *arg __unused)
+{
+	struct cfg_nat *ptr, *ptr_temp;
+	struct ip_fw_chain *chain;
+
+	chain = &V_layer3_chain;
+	IPFW_WLOCK(chain);
+	LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
+		LIST_REMOVE(ptr, _next);
+		del_redir_spool_cfg(ptr, &ptr->redir_chain);
+		LibAliasUninit(ptr->lib);
+		free(ptr, M_IPFW);
+	}
+	flush_nat_ptrs(chain, -1 /* flush all */);
+	V_ipfw_nat_ready = 0;
+	IPFW_WUNLOCK(chain);
+	return (0);
+}
+
 static void
 ipfw_nat_init(void)
 {
 
-	IPFW_WLOCK(&V_layer3_chain);
 	/* init ipfw hooks */
 	ipfw_nat_ptr = ipfw_nat;
 	lookup_nat_ptr = lookup_nat;
@@ -601,28 +629,16 @@ ipfw_nat_init(void)
 	ipfw_nat_del_ptr = ipfw_nat_del;
 	ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
 	ipfw_nat_get_log_ptr = ipfw_nat_get_log;
-	IPFW_WUNLOCK(&V_layer3_chain);
-	V_ifaddr_event_tag = EVENTHANDLER_REGISTER(
-	    ifaddr_event, ifaddr_change,
+
+	ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_change,
 	    NULL, EVENTHANDLER_PRI_ANY);
 }
 
 static void
 ipfw_nat_destroy(void)
 {
-	struct cfg_nat *ptr, *ptr_temp;
-	struct ip_fw_chain *chain;
 
-	chain = &V_layer3_chain;
-	IPFW_WLOCK(chain);
-	LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
-		LIST_REMOVE(ptr, _next);
-		del_redir_spool_cfg(ptr, &ptr->redir_chain);
-		LibAliasUninit(ptr->lib);
-		free(ptr, M_IPFW);
-	}
-	EVENTHANDLER_DEREGISTER(ifaddr_event, V_ifaddr_event_tag);
-	flush_nat_ptrs(chain, -1 /* flush all */);
+	EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag);
 	/* deregister ipfw_nat */
 	ipfw_nat_ptr = NULL;
 	lookup_nat_ptr = NULL;
@@ -630,7 +646,6 @@ ipfw_nat_destroy(void)
 	ipfw_nat_del_ptr = NULL;
 	ipfw_nat_get_cfg_ptr = NULL;
 	ipfw_nat_get_log_ptr = NULL;
-	IPFW_WUNLOCK(chain);
 }
 
 static int
@@ -640,11 +655,9 @@ ipfw_nat_modevent(module_t mod, int type, void *unused)
 
 	switch (type) {
 	case MOD_LOAD:
-		ipfw_nat_init();
 		break;
 
 	case MOD_UNLOAD:
-		ipfw_nat_destroy();
 		break;
 
 	default:
@@ -660,8 +673,25 @@ static moduledata_t ipfw_nat_mod = {
 	0
 };
 
-DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+/* Define startup order. */
+#define	IPFW_NAT_SI_SUB_FIREWALL	SI_SUB_PROTO_IFATTACHDOMAIN
+#define	IPFW_NAT_MODEVENT_ORDER		(SI_ORDER_ANY - 255)
+#define	IPFW_NAT_MODULE_ORDER		(IPFW_NAT_MODEVENT_ORDER + 1)
+#define	IPFW_NAT_VNET_ORDER		(IPFW_NAT_MODEVENT_ORDER + 2)
+
+DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, IPFW_NAT_SI_SUB_FIREWALL, SI_ORDER_ANY);
 MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1);
 MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2);
 MODULE_VERSION(ipfw_nat, 1);
+
+SYSINIT(ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER,
+    ipfw_nat_init, NULL);
+VNET_SYSINIT(vnet_ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_VNET_ORDER,
+    vnet_ipfw_nat_init, NULL);
+
+SYSUNINIT(ipfw_nat_destroy, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER,
+    ipfw_nat_destroy, NULL);
+VNET_SYSUNINIT(vnet_ipfw_nat_uninit, IPFW_NAT_SI_SUB_FIREWALL,
+    IPFW_NAT_VNET_ORDER, vnet_ipfw_nat_uninit, NULL);
+
 /* end of file */
diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h
index a41cdf5..a8d7eea 100644
--- a/sys/netpfil/ipfw/ip_fw_private.h
+++ b/sys/netpfil/ipfw/ip_fw_private.h
@@ -327,9 +327,11 @@ extern struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
 typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *);
 typedef int ipfw_nat_cfg_t(struct sockopt *);
 
-extern ipfw_nat_t *ipfw_nat_ptr;
-#define IPFW_NAT_LOADED (ipfw_nat_ptr != NULL)
+VNET_DECLARE(int, ipfw_nat_ready);
+#define	V_ipfw_nat_ready	VNET(ipfw_nat_ready)
+#define	IPFW_NAT_LOADED	(V_ipfw_nat_ready)
 
+extern ipfw_nat_t *ipfw_nat_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_del_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;

--1yeeQ81UyVL57Vl7--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20130814204303.GA13541>