Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 21 Apr 2009 22:43:32 +0000 (UTC)
From:      Robert Watson <rwatson@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r191367 - head/sys/net
Message-ID:  <200904212243.n3LMhW48027008@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: rwatson
Date: Tue Apr 21 22:43:32 2009
New Revision: 191367
URL: http://svn.freebsd.org/changeset/base/191367

Log:
  Start to address a number of races relating to use of ifnet pointers
  after the corresponding interface has been destroyed:
  
  (1) Add an ifnet refcount, ifp->if_refcount.  Initialize it to 1 in
      if_alloc(), and modify if_free_type() to decrement and check the
      refcount.
  
  (2) Add new if_ref() and if_rele() interfaces to allow kernel code
      walking global interface lists to release IFNET_[RW]LOCK() yet
      keep the ifnet stable.  Currently, if_rele() is a no-op wrapper
      around if_free(), but this may change in the future.
  
  (3) Add new ifnet field, if_alloctype, which caches the type passed
      to if_alloc(), but unlike if_type, won't be changed by drivers.
      This allows asynchronous free's of the interface after the
      driver has released it to still use the right type.  Use that
      instead of the type passed to if_free_type(), but assert that
      they are the same (might have to rethink this if that doesn't
      work out).
  
  (4) Add a new ifnet_byindex_ref(), which looks up an interface by
      index and returns a reference rather than a pointer to it.
  
  (5) Fix if_alloc() to fully initialize the if_addr_mtx before hooking
      up the ifnet to global lists.
  
  (6) Modify sysctls in if_mib.c to use ifnet_byindex_ref() and release
      the ifnet when done.
  
  When this change is MFC'd, it will need to replace if_ispare fields
  rather than adding new fields in order to avoid breaking the binary
  interface.  Once this change is MFC'd, if_free_type() should be
  removed, as its 'type' argument is now optional.
  
  This refcount is not appropriate for counting mbuf pkthdr references,
  and also not for counting entry into the device driver via ifnet
  function pointers.  An rmlock may be appropriate for the latter.
  Rather, this is about ensuring data structure stability when reaching
  an ifnet via global ifnet lists and tables followed by copy in or out
  of userspace.
  
  MFC after:      3 weeks
  Reported by:    mdtancsa
  Reviewed by:    brooks

Modified:
  head/sys/net/if.c
  head/sys/net/if_mib.c
  head/sys/net/if_var.h

Modified: head/sys/net/if.c
==============================================================================
--- head/sys/net/if.c	Tue Apr 21 19:14:13 2009	(r191366)
+++ head/sys/net/if.c	Tue Apr 21 22:43:32 2009	(r191367)
@@ -52,6 +52,7 @@
 #include <sys/protosw.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
+#include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/sockio.h>
 #include <sys/syslog.h>
@@ -204,19 +205,35 @@ static struct ifnet *
 ifnet_byindex_locked(u_short idx)
 {
 	INIT_VNET_NET(curvnet);
+
+	if (idx > V_if_index)
+		return (NULL);
+	return (V_ifindex_table[idx].ife_ifnet);
+}
+
+struct ifnet *
+ifnet_byindex(u_short idx)
+{
 	struct ifnet *ifp;
 
-	ifp = V_ifindex_table[idx].ife_ifnet;
+	IFNET_RLOCK();
+	ifp = ifnet_byindex_locked(idx);
+	IFNET_RUNLOCK();
 	return (ifp);
 }
 
 struct ifnet *
-ifnet_byindex(u_short idx)
+ifnet_byindex_ref(u_short idx)
 {
 	struct ifnet *ifp;
 
 	IFNET_RLOCK();
 	ifp = ifnet_byindex_locked(idx);
+	if (ifp == NULL) {
+		IFNET_RUNLOCK();
+		return (NULL);
+	}
+	if_ref(ifp);
 	IFNET_RUNLOCK();
 	return (ifp);
 }
@@ -490,6 +507,7 @@ if_alloc(u_char type)
 		if_grow();
 
 	ifp->if_type = type;
+	ifp->if_alloctype = type;
 
 	if (if_com_alloc[type] != NULL) {
 		ifp->if_l2com = if_com_alloc[type](type, ifp);
@@ -498,11 +516,12 @@ if_alloc(u_char type)
 			return (NULL);
 		}
 	}
+
+	IF_ADDR_LOCK_INIT(ifp);
+	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
 	IFNET_WLOCK();
 	ifnet_setbyindex(ifp->if_index, ifp);
 	IFNET_WUNLOCK();
-	IF_ADDR_LOCK_INIT(ifp);
-
 	return (ifp);
 }
 
@@ -516,7 +535,7 @@ void
 if_free(struct ifnet *ifp)
 {
 
-	if_free_type(ifp, ifp->if_type);
+	if_free_type(ifp, ifp->if_alloctype);
 }
 
 /*
@@ -529,28 +548,50 @@ if_free_type(struct ifnet *ifp, u_char t
 {
 	INIT_VNET_NET(curvnet); /* ifp->if_vnet can be NULL here ! */
 
-	if (ifp != ifnet_byindex(ifp->if_index)) {
-		if_printf(ifp, "%s: value was not if_alloced, skipping\n",
-		    __func__);
+	/*
+	 * Some drivers modify if_type, so we can't rely on it being the
+	 * same in free as it was in alloc.  Now that we have if_alloctype,
+	 * we should just use that, but drivers expect to pass a type.
+	 */
+	KASSERT(ifp->if_alloctype == type,
+	    ("if_free_type: type (%d) != alloctype (%d)", type,
+	    ifp->if_alloctype));
+
+	if (!refcount_release(&ifp->if_refcount))
 		return;
-	}
 
 	IFNET_WLOCK();
+	KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
+	    ("%s: freeing unallocated ifnet", ifp->if_xname));
 	ifnet_setbyindex(ifp->if_index, NULL);
-
-	/* XXX: should be locked with if_findindex() */
 	while (V_if_index > 0 && ifnet_byindex_locked(V_if_index) == NULL)
 		V_if_index--;
 	IFNET_WUNLOCK();
 
-	if (if_com_free[type] != NULL)
-		if_com_free[type](ifp->if_l2com, type);
+	if (if_com_free[ifp->if_alloctype] != NULL)
+		if_com_free[ifp->if_alloctype](ifp->if_l2com,
+		    ifp->if_alloctype);
 
 	IF_ADDR_LOCK_DESTROY(ifp);
 	free(ifp, M_IFNET);
 }
 
 void
+if_ref(struct ifnet *ifp)
+{
+
+	/* We don't assert the ifnet list lock here, but arguably should. */
+	refcount_acquire(&ifp->if_refcount);
+}
+
+void
+if_rele(struct ifnet *ifp)
+{
+
+	if_free(ifp);
+}
+
+void
 ifq_attach(struct ifaltq *ifq, struct ifnet *ifp)
 {
 	

Modified: head/sys/net/if_mib.c
==============================================================================
--- head/sys/net/if_mib.c	Tue Apr 21 19:14:13 2009	(r191366)
+++ head/sys/net/if_mib.c	Tue Apr 21 22:43:32 2009	(r191367)
@@ -88,16 +88,16 @@ sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XX
 
 	if (namelen != 2)
 		return EINVAL;
-
-	if (name[0] <= 0 || name[0] > V_if_index ||
-	    ifnet_byindex(name[0]) == NULL)
-		return ENOENT;
-
-	ifp = ifnet_byindex(name[0]);
+	if (name[0] <= 0)
+		return (ENOENT);
+	ifp = ifnet_byindex_ref(name[0]);
+	if (ifp == NULL)
+		return (ENOENT);
 
 	switch(name[1]) {
 	default:
-		return ENOENT;
+		error = ENOENT;
+		goto out;
 
 	case IFDATA_GENERAL:
 		bzero(&ifmd, sizeof(ifmd));
@@ -114,11 +114,11 @@ sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XX
 
 		error = SYSCTL_OUT(req, &ifmd, sizeof ifmd);
 		if (error || !req->newptr)
-			return error;
+			goto out;
 
 		error = SYSCTL_IN(req, &ifmd, sizeof ifmd);
 		if (error)
-			return error;
+			goto out;
 
 #define DONTCOPY(fld) ifmd.ifmd_data.ifi_##fld = ifp->if_data.ifi_##fld
 		DONTCOPY(type);
@@ -139,18 +139,20 @@ sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XX
 	case IFDATA_LINKSPECIFIC:
 		error = SYSCTL_OUT(req, ifp->if_linkmib, ifp->if_linkmiblen);
 		if (error || !req->newptr)
-			return error;
+			goto out;
 
 		error = SYSCTL_IN(req, ifp->if_linkmib, ifp->if_linkmiblen);
 		if (error)
-			return error;
+			goto out;
 		break;
 
 	case IFDATA_DRIVERNAME:
 		/* 20 is enough for 64bit ints */
 		dlen = strlen(ifp->if_dname) + 20 + 1;
-		if ((dbuf = malloc(dlen, M_TEMP, M_NOWAIT)) == NULL)
-			return (ENOMEM);
+		if ((dbuf = malloc(dlen, M_TEMP, M_NOWAIT)) == NULL) {
+			error = ENOMEM;
+			goto out;
+		}
 		if (ifp->if_dunit == IF_DUNIT_NONE)
 			strcpy(dbuf, ifp->if_dname);
 		else
@@ -160,9 +162,11 @@ sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XX
 		if (error == 0 && req->newptr != NULL)
 			error = EPERM;
 		free(dbuf, M_TEMP);
-		return (error);
+		goto out;
 	}
-	return 0;
+out:
+	if_rele(ifp);
+	return error;
 }
 
 SYSCTL_NODE(_net_link_generic, IFMIB_IFDATA, ifdata, CTLFLAG_RW,

Modified: head/sys/net/if_var.h
==============================================================================
--- head/sys/net/if_var.h	Tue Apr 21 19:14:13 2009	(r191366)
+++ head/sys/net/if_var.h	Tue Apr 21 22:43:32 2009	(r191367)
@@ -121,6 +121,7 @@ struct ifnet {
 	char	if_xname[IFNAMSIZ];	/* external name (name + unit) */
 	const char *if_dname;		/* driver name */
 	int	if_dunit;		/* unit or IF_DUNIT_NONE */
+	u_int	if_refcount;		/* reference count */
 	struct	ifaddrhead if_addrhead;	/* linked list of addresses per if */
 		/*
 		 * if_addrhead is the list of all addresses associated to
@@ -190,12 +191,14 @@ struct ifnet {
 					/* protected by if_addr_mtx */
 	void	*if_pf_kif;
 	void	*if_lagg;		/* lagg glue */
+	u_char	 if_alloctype;		/* if_type at time of allocation */
 
 	/*
 	 * Spare fields are added so that we can modify sensitive data
 	 * structures without changing the kernel binary interface, and must
 	 * be used with care where binary compatibility is required.
 	 */
+	char	 if_cspare[3];
 	void	*if_pspare[8];
 	int	if_ispare[4];
 };
@@ -721,7 +724,13 @@ struct ifindex_entry {
 	struct cdev *ife_dev;
 };
 
+/*
+ * Look up an ifnet given its index; the _ref variant also acquires a
+ * reference that must be freed using if_rele().  It is almost always a bug
+ * to call ifnet_byindex() instead if ifnet_byindex_ref().
+ */
 struct ifnet	*ifnet_byindex(u_short idx);
+struct ifnet	*ifnet_byindex_ref(u_short idx);
 
 /*
  * Given the index, ifaddr_byindex() returns the one and only
@@ -758,6 +767,8 @@ void	if_initname(struct ifnet *, const c
 void	if_link_state_change(struct ifnet *, int);
 int	if_printf(struct ifnet *, const char *, ...) __printflike(2, 3);
 void	if_qflush(struct ifnet *);
+void	if_ref(struct ifnet *);
+void	if_rele(struct ifnet *);
 int	if_setlladdr(struct ifnet *, const u_char *, int);
 void	if_up(struct ifnet *);
 /*void	ifinit(void);*/ /* declared in systm.h for main() */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200904212243.n3LMhW48027008>