Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 16 Nov 2014 20:12:50 +0000 (UTC)
From:      "Alexander V. Chernikov" <melifaro@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r274594 - in projects/routing/sys: net netinet netinet6
Message-ID:  <201411162012.sAGKCofT079139@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: melifaro
Date: Sun Nov 16 20:12:49 2014
New Revision: 274594
URL: https://svnweb.freebsd.org/changeset/base/274594

Log:
  Rework LLE code locking:
  * struct llentry is now basically split into 2 pieces:
    all fields within 64 bytes (amd64) are now protected by both
    ifdata lock AND lle lock, e.g. you require both locks to be held
    exclusively for modification. All data necessary for fast path
    operations is kept here. Some fields were added:
    - r_l3addr - makes lookup key liev within first 64 bytes.
    - r_flags - flags, containing pre-compiled decision whether given
      lle contains usable data or not. Current the only flag is RLLE_VALID.
    - r_len - prepend data len, currently unused
    - r_kick - used to provide feedback to control plane (see below).
    All other fields are protected by lle lock.
  * Add simple state machine for ARP to handle "about to expire" case:
    Current model (for the fast path) is the following:
    - rlock afdata
    - find / rlock rte
    - runlock afdata
    - see if "expire time" is approaching
      (time_uptime + la->la_preempt > la->la_expire)
    - if true, call arprequest() and decrease la_preempt
    - store MAC and runlock rte
    New model (data plane):
    - rlock afdata
    - find rte
    - check if it can be used using r_* fields only
    - if true, store MAC
    - if r_kick field != 0 set it to 0.
    - runlock afdata
    New mode (control plane):
    - schedule arptimer to be called in (V_arpt_keep - V_arp_maxtries)
      seconds instead of V_arpt_keep.
    - on first timer invocation change state from ARP_LLINFO_REACHABLE
      to ARP_LLINFO_VERIFY, sets r_kick to 1 and shedules next call in
      V_arpt_rexmit (default to 1 sec).
    - on subsequent timer invocations in ARP_LLINFO_VERIFY state, checks
      for r_kick value: reschedule if not changed, and send arprequest()
      if set to zero (e.g. entry was used).
  * Convert IPv4 path to use new single-lock approach. IPv6 bits to follow.
  * Slow down in_arpinput(): now valid reply will (in most cases) require
    acquiring afdata WLOCK twice. This is requirement for storing changed
    lle data. This change will be slightly optimized in future.
  * Provide explicit hash link/unlink functions for both ipv4/ipv6 code.
    This will probably be moved to generic lle code once we have per-AF
    hashing callback inside lltable.
  * Perform lle unlink on deletion immediately instead of delaying it to
    the timer routine.
  * Make r244183 more explicit: use new LLE_CALLOUTREF flag to indicate the
    presence of lle reference used for safe callout calls.

Modified:
  projects/routing/sys/net/if_llatbl.c
  projects/routing/sys/net/if_llatbl.h
  projects/routing/sys/netinet/if_ether.c
  projects/routing/sys/netinet/in.c
  projects/routing/sys/netinet6/in6.c
  projects/routing/sys/netinet6/nd6.c

Modified: projects/routing/sys/net/if_llatbl.c
==============================================================================
--- projects/routing/sys/net/if_llatbl.c	Sun Nov 16 20:10:37 2014	(r274593)
+++ projects/routing/sys/net/if_llatbl.c	Sun Nov 16 20:12:49 2014	(r274594)
@@ -106,11 +106,13 @@ llentry_free(struct llentry *lle)
 	size_t pkts_dropped;
 	struct mbuf *next;
 
-	IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
 	LLE_WLOCK_ASSERT(lle);
 
-	LIST_REMOVE(lle, lle_next);
-	lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
+	if ((lle->la_flags & LLE_LINKED) != 0) {
+		IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
+		LIST_REMOVE(lle, lle_next);
+		lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
+	}
 
 	pkts_dropped = 0;
 	while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) {
@@ -178,8 +180,10 @@ lltable_free(struct lltable *llt)
 	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
 		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
 			LLE_WLOCK(lle);
-			if (callout_stop(&lle->la_timer))
+			if (callout_stop(&lle->la_timer)) {
 				LLE_REMREF(lle);
+				lle->la_flags &= ~LLE_CALLOUTREF;
+			}
 			llentry_free(lle);
 		}
 	}

Modified: projects/routing/sys/net/if_llatbl.h
==============================================================================
--- projects/routing/sys/net/if_llatbl.h	Sun Nov 16 20:10:37 2014	(r274593)
+++ projects/routing/sys/net/if_llatbl.h	Sun Nov 16 20:12:49 2014	(r274594)
@@ -48,13 +48,24 @@ extern struct rwlock lltable_rwlock;
 #define	LLTABLE_WUNLOCK()	rw_wunlock(&lltable_rwlock)
 #define	LLTABLE_LOCK_ASSERT()	rw_assert(&lltable_rwlock, RA_LOCKED)
 
-/*
- * Code referencing llentry must at least hold
- * a shared lock
- */
 struct llentry {
+	/* FIELDS PROTECTED BY IFDATA LOCK */
 	LIST_ENTRY(llentry)	 lle_next;
-	struct rwlock		 lle_lock;
+	union {
+		struct in_addr	addr4;
+		struct in6_addr	addr6;
+	} r_l3addr;
+	union {
+		uint64_t	mac_aligned;
+		uint16_t	mac16[3];
+		uint8_t		mac8[20];	/* IB needs 20 bytes. */
+		char		ll_prepend[20];	/* L2 data to prepend */
+	} ll_addr;
+	uint16_t		r_flags;	/* runtime flags */
+	uint16_t		r_len;		/* length of prepend data */
+	uint64_t		r_kick;		/* for unused lle detection */
+
+	/* FIELDS PROTECTED BY LLE rwlock */
 	struct lltable		 *lle_tbl;
 	struct llentries	 *lle_head;
 	void			(*lle_free)(struct lltable *, struct llentry *);
@@ -69,12 +80,7 @@ struct llentry {
 	uint16_t		 ln_router;
 	time_t			 ln_ntick;
 	int			 lle_refcnt;
-
-	union {
-		uint64_t	mac_aligned;
-		uint16_t	mac16[3];
-		uint8_t		mac8[20];	/* IB needs 20 bytes. */
-	} ll_addr;
+	struct rwlock		 lle_lock;
 
 	/* XXX af-private? */
 	union {
@@ -94,8 +100,6 @@ struct llentry {
 #define	LLE_LOCK_DESTROY(lle)	rw_destroy(&(lle)->lle_lock)
 #define	LLE_WLOCK_ASSERT(lle)	rw_assert(&(lle)->lle_lock, RA_WLOCKED)
 
-#define LLE_IS_VALID(lle)	(((lle) != NULL) && ((lle) != (void *)-1))
-
 #define	LLE_ADDREF(lle) do {					\
 	LLE_WLOCK_ASSERT(lle);					\
 	KASSERT((lle)->lle_refcnt >= 0,				\
@@ -170,6 +174,11 @@ struct lltable {
 MALLOC_DECLARE(M_LLTABLE);
 
 /*
+ * LLE flags used by fast path code
+ */
+#define	RLLE_VALID	0x0001	/* ll_addr can be used */
+
+/*
  * Various LLE flags
  */
 #define	LLE_DELETED	0x0001	/* entry must be deleted */
@@ -178,8 +187,10 @@ MALLOC_DECLARE(M_LLTABLE);
 #define	LLE_VALID	0x0008	/* ll_addr is valid */
 #define	LLE_PUB		0x0020	/* publish entry ??? */
 #define	LLE_LINKED	0x0040	/* linked to lookup structure */
+#define	LLE_CALLOUTREF	0x0080	/* callout set */
 /* LLE request flags */
-#define	LLE_EXCLUSIVE	0x2000	/* return lle xlocked  */
+#define	LLE_UNLOCKED	0x0100	/* return lle unlocked  */
+#define	LLE_EXCLUSIVE	0x0200	/* return lle wlocked  */
 
 #define LLATBL_HASH(key, mask) \
 	(((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask)

Modified: projects/routing/sys/netinet/if_ether.c
==============================================================================
--- projects/routing/sys/netinet/if_ether.c	Sun Nov 16 20:10:37 2014	(r274593)
+++ projects/routing/sys/netinet/if_ether.c	Sun Nov 16 20:12:49 2014	(r274594)
@@ -80,6 +80,12 @@ __FBSDID("$FreeBSD$");
 #define SIN(s) ((const struct sockaddr_in *)(s))
 #define SDL(s) ((struct sockaddr_dl *)s)
 
+/* simple arp state machine */
+#define ARP_LLINFO_INCOMPLETE	0	/* no lle data */
+#define ARP_LLINFO_REACHABLE	1	/* lle is valid  */
+#define ARP_LLINFO_VERIFY	2	/* lle valid, re-check needed */
+#define	ARP_LLINFO_DELETED	3	/* entry is deleted */
+
 SYSCTL_DECL(_net_link_ether);
 static SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
 static SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, "");
@@ -91,6 +97,7 @@ static VNET_DEFINE(int, arp_maxtries) = 
 static VNET_DEFINE(int, arp_proxyall) = 0;
 static VNET_DEFINE(int, arpt_down) = 20;	/* keep incomplete entries for
 						 * 20 seconds */
+static VNET_DEFINE(int, arpt_rexmit) = 1;	/* retransmit arp entries, sec */
 VNET_PCPUSTAT_DEFINE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
 VNET_PCPUSTAT_SYSINIT(arpstat);
 
@@ -102,6 +109,7 @@ static VNET_DEFINE(int, arp_maxhold) = 1
 
 #define	V_arpt_keep		VNET(arpt_keep)
 #define	V_arpt_down		VNET(arpt_down)
+#define	V_arpt_rexmit		VNET(arpt_rexmit)
 #define	V_arp_maxtries		VNET(arp_maxtries)
 #define	V_arp_proxyall		VNET(arp_proxyall)
 #define	V_arp_maxhold		VNET(arp_maxhold)
@@ -168,15 +176,61 @@ arptimer(void *arg)
 {
 	struct llentry *lle = (struct llentry *)arg;
 	struct ifnet *ifp;
+	size_t pkts_dropped;
+	uint16_t la_flags;
+	int state;
 
 	if (lle->la_flags & LLE_STATIC) {
 		LLE_WUNLOCK(lle);
 		return;
 	}
 
+	la_flags = lle->la_flags;
+	state = (la_flags & LLE_DELETED) ? ARP_LLINFO_DELETED : lle->ln_state;
 	ifp = lle->lle_tbl->llt_ifp;
 	CURVNET_SET(ifp->if_vnet);
 
+	switch (state) {
+	case ARP_LLINFO_REACHABLE:
+
+		/*
+		 * Expiration time is approaching.
+		 * Let's try to refresh entry if it is still
+		 * in use.
+		 *
+		 * Set r_kick to get feedback from
+		 * fast path. Change state and re-schedule
+		 * ourselves.
+		 */
+		lle->r_kick = 1;
+		lle->ln_state = ARP_LLINFO_VERIFY;
+		callout_schedule(&lle->la_timer, hz * V_arpt_rexmit);
+		LLE_WUNLOCK(lle);
+		return;
+	case ARP_LLINFO_VERIFY:
+		if (lle->r_kick == 0 && lle->la_preempt > 0) {
+			/* Entry was used, issue refresh request */
+			struct sockaddr_in *dst;
+			dst = (struct sockaddr_in *)L3_ADDR(lle);
+			arprequest(ifp, NULL, &dst->sin_addr, NULL);
+			lle->la_preempt--;
+			lle->r_kick = 1;
+			callout_schedule(&lle->la_timer, hz * V_arpt_rexmit);
+			LLE_WUNLOCK(lle);
+			return;
+		}
+		/* Nothing happened. Reschedule if not too late */
+		if (lle->la_expire > time_uptime) {
+			callout_schedule(&lle->la_timer, hz * V_arpt_rexmit);
+			LLE_WUNLOCK(lle);
+			return;
+		}
+		break;
+	case ARP_LLINFO_INCOMPLETE:
+	case ARP_LLINFO_DELETED:
+		break;
+	}
+
 	if ((lle->la_flags & LLE_DELETED) == 0) {
 		int evt;
 
@@ -194,15 +248,17 @@ arptimer(void *arg)
 	IF_AFDATA_LOCK(ifp);
 	LLE_WLOCK(lle);
 
-	/* Guard against race with other llentry_free(). */
-	if (lle->la_flags & LLE_LINKED) {
-		size_t pkts_dropped;
-
+	/*
+	 * Note other thread could have removed given entry
+	 * stopping callout and removing LLE reference.
+	 */
+	if ((lle->la_flags & LLE_CALLOUTREF) != 0) {
 		LLE_REMREF(lle);
-		pkts_dropped = llentry_free(lle);
-		ARPSTAT_ADD(dropped, pkts_dropped);
-	} else
-		LLE_FREE_LOCKED(lle);
+		lle->la_flags &= ~LLE_CALLOUTREF;
+	}
+
+	pkts_dropped = llentry_free(lle);
+	ARPSTAT_ADD(dropped, pkts_dropped);
 
 	IF_AFDATA_UNLOCK(ifp);
 
@@ -295,9 +351,9 @@ int
 arpresolve_fast(struct ifnet *ifp, struct in_addr dst, u_int mflags,
     u_char *dst_addr)
 {
-	int do_arp, error;
 	struct llentry *la;
 	struct sockaddr_in sin;
+	const struct sockaddr *sa_dst;
 
 	if (mflags & M_BCAST) {
 		memcpy(dst_addr, ifp->if_broadcastaddr, ifp->if_addrlen);
@@ -308,17 +364,27 @@ arpresolve_fast(struct ifnet *ifp, struc
 		return (0);
 	}
 
-	do_arp = 0;
-	error = EAGAIN;
-
 	memset(&sin, 0, sizeof(sin));
 	sin.sin_addr = dst;
 	sin.sin_family = AF_INET;
 	sin.sin_len = sizeof(sin);
+	sa_dst = (const struct sockaddr *)&sin;
 
 	IF_AFDATA_RLOCK(ifp);
-	la = lla_lookup(LLTABLE(ifp), 0, (const struct sockaddr *)&sin);
+	la = lla_lookup(LLTABLE(ifp), LLE_UNLOCKED, sa_dst);
+	if (la != NULL && (la->r_flags & RLLE_VALID) != 0) {
+		/* Entry found, let's copy lle info */
+		bcopy(&la->ll_addr, dst_addr, ifp->if_addrlen);
+		if (la->r_kick != 0)
+			la->r_kick = 0; /* Notify that entry was used */
+		IF_AFDATA_RUNLOCK(ifp);
+		return (0);
+	}
+	IF_AFDATA_RUNLOCK(ifp);
 
+	return (EAGAIN);
+
+#if 0
 	/*
 	 * XXX: We need to convert all these checks to single one
 	 */
@@ -347,8 +413,8 @@ arpresolve_fast(struct ifnet *ifp, struc
 	 */
 	if (do_arp != 0)
 		arprequest(ifp, NULL, &dst, NULL);
-
 	return (error);
+#endif
 }
 
 
@@ -372,7 +438,6 @@ arpresolve(struct ifnet *ifp, struct rte
 {
 	struct llentry *la = NULL;
 	int is_gw;
-	uint16_t la_flags;
 
 	*lle = NULL;
 	if (m != NULL) {
@@ -390,38 +455,20 @@ arpresolve(struct ifnet *ifp, struct rte
 	}
 
 	IF_AFDATA_RLOCK(ifp);
-	la = lla_lookup(LLTABLE(ifp), 0, dst);
-	IF_AFDATA_RUNLOCK(ifp);
-	la_flags = la != NULL ? la->la_flags : 0;
-
-	/* Return to slow path if entry is not found or invalid/expired */
-	if (la == NULL || (la_flags & LLE_VALID) == 0 ||
-	    ((la_flags & LLE_STATIC) == 0 && la->la_expire <= time_uptime)) {
-		is_gw = (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY)) ? 1 : 0;
-		if (la != NULL)
-			LLE_RUNLOCK(la);
-		return (arpresolve_slow(ifp, is_gw, m, dst, desten, lle));
-	}
-
-
-	/* Entry found, let's copy lle info */
-	bcopy(&la->ll_addr, desten, ifp->if_addrlen);
-
-	/*
-	 * If entry has an expiry time and it is approaching,
-	 * see if we need to send an ARP request within this
-	 * arpt_down interval.
-	 */
-	if (!(la->la_flags & LLE_STATIC) &&
-	    time_uptime + la->la_preempt > la->la_expire) {
-		arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL);
-		la->la_preempt--;
+	la = lla_lookup(LLTABLE(ifp), LLE_UNLOCKED, dst);
+	if (la != NULL && (la->r_flags & RLLE_VALID) != 0) {
+		/* Entry found, let's copy lle info */
+		bcopy(&la->ll_addr, desten, ifp->if_addrlen);
+		if (la->r_kick != 0)
+			la->r_kick = 0; /* Notify that entry was used */
+		IF_AFDATA_RUNLOCK(ifp);
+		*lle = la;
+		return (0);
 	}
+	IF_AFDATA_RUNLOCK(ifp);
 
-	/* XXX: Possible use-after-free */
-	*lle = la;
-	LLE_RUNLOCK(la);
-	return (0);
+	is_gw = (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY)) ? 1 : 0;
+	return (arpresolve_slow(ifp, is_gw, m, dst, desten, lle));
 }
 
 static int
@@ -431,7 +478,7 @@ arpresolve_slow(struct ifnet *ifp, int i
 	struct llentry *la = 0;
 	struct mbuf *curr = NULL;
 	struct mbuf *next = NULL;
-	int create, error, renew;
+	int create, error;
 
 	create = 0;
 	*lle = NULL;
@@ -481,7 +528,6 @@ arpresolve_slow(struct ifnet *ifp, int i
 		goto done;
 	}
 
-	renew = (la->la_asked == 0 || la->la_expire != time_uptime);
 	/*
 	 * There is an arptab entry, but no ethernet address
 	 * response yet.  Add the mbuf to the list, dropping
@@ -518,7 +564,7 @@ arpresolve_slow(struct ifnet *ifp, int i
 	else
 		error = (is_gw != 0) ?  EHOSTUNREACH : EHOSTDOWN;
 
-	if (renew) {
+	if (la->la_asked == 0 || la->la_expire != time_uptime) {
 		int canceled;
 
 		LLE_ADDREF(la);
@@ -527,6 +573,8 @@ arpresolve_slow(struct ifnet *ifp, int i
 		    arptimer, la);
 		if (canceled)
 			LLE_REMREF(la);
+		else
+			la->la_flags |= LLE_CALLOUTREF;
 		la->la_asked++;
 		LLE_WUNLOCK(la);
 		arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL);
@@ -644,7 +692,8 @@ in_arpinput(struct mbuf *m)
 	int op, flags;
 	int req_len;
 	int bridged = 0, is_bridge = 0;
-	int carped, create;
+	int canceled, carped, create;
+	int wtime;
 	struct nhop4_extended nh_ext;
 	struct sockaddr_in sin;
 	sin.sin_len = sizeof(struct sockaddr_in);
@@ -849,20 +898,43 @@ match:
 			    ifp->if_addrlen);
 			goto drop;
 		}
-		(void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
-		la->la_flags |= LLE_VALID;
 
+		/* Check if something has changed */
+		if (memcmp(&la->ll_addr, ar_sha(ah), ifp->if_addrlen) != 0 ||
+		    (la->la_flags & LLE_VALID) == 0 ||
+		    la->la_expire != time_uptime + V_arpt_keep) {
+			/* use afdata WLOCK to update fields */
+			LLE_ADDREF(la);
+			LLE_WUNLOCK(la);
+			IF_AFDATA_WLOCK(ifp);
+			LLE_WLOCK(la);
+
+			/* Update data */
+			memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
+			la->la_flags |= LLE_VALID;
+			la->r_flags |= RLLE_VALID;
+			if ((la->la_flags & LLE_STATIC) == 0)
+				la->la_expire = time_uptime + V_arpt_keep;
+
+			IF_AFDATA_WUNLOCK(ifp);
+			LLE_REMREF(la);
+		}
+
+		la->ln_state = ARP_LLINFO_REACHABLE;
 		EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED);
 
 		if (!(la->la_flags & LLE_STATIC)) {
-			int canceled;
+			wtime = V_arpt_keep - V_arp_maxtries;
+			if (wtime < 0)
+				wtime = V_arpt_keep;
 
 			LLE_ADDREF(la);
-			la->la_expire = time_uptime + V_arpt_keep;
 			canceled = callout_reset(&la->la_timer,
-			    hz * V_arpt_keep, arptimer, la);
+			    hz * wtime, arptimer, la);
 			if (canceled)
 				LLE_REMREF(la);
+			else
+				la->la_flags |= LLE_CALLOUTREF;
 		}
 		la->la_asked = 0;
 		la->la_preempt = V_arp_maxtries;

Modified: projects/routing/sys/netinet/in.c
==============================================================================
--- projects/routing/sys/netinet/in.c	Sun Nov 16 20:10:37 2014	(r274593)
+++ projects/routing/sys/netinet/in.c	Sun Nov 16 20:12:49 2014	(r274594)
@@ -76,6 +76,9 @@ static int in_difaddr_ioctl(caddr_t, str
 static void	in_socktrim(struct sockaddr_in *);
 static void	in_purgemaddrs(struct ifnet *);
 
+static void	in_lltable_link(struct lltable *llt, struct llentry *lle);
+static void	in_lltable_unlink(struct llentry *lle);
+
 static VNET_DEFINE(int, nosameprefix);
 #define	V_nosameprefix			VNET(nosameprefix)
 SYSCTL_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_VNET | CTLFLAG_RW,
@@ -950,17 +953,21 @@ static struct llentry *
 in_lltable_new(const struct sockaddr *l3addr, u_int flags)
 {
 	struct in_llentry *lle;
+	const struct sockaddr_in *l3addr_sin;
 
 	lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO);
 	if (lle == NULL)		/* NB: caller generates msg */
 		return NULL;
 
+	l3addr_sin = (const struct sockaddr_in *)l3addr;
+	lle->base.r_l3addr.addr4 = l3addr_sin->sin_addr;
+	lle->l3_addr4 = *l3addr_sin;
+
 	/*
 	 * For IPv4 this will trigger "arpresolve" to generate
 	 * an ARP request.
 	 */
 	lle->base.la_expire = time_uptime; /* mark expired */
-	lle->l3_addr4 = *(const struct sockaddr_in *)l3addr;
 	lle->base.lle_refcnt = 1;
 	lle->base.lle_free = in_lltable_free;
 	LLE_LOCK_INIT(&lle->base);
@@ -994,8 +1001,10 @@ in_lltable_prefix_free(struct lltable *l
 			    pfx, msk) && ((flags & LLE_STATIC) ||
 			    !(lle->la_flags & LLE_STATIC))) {
 				LLE_WLOCK(lle);
-				if (callout_stop(&lle->la_timer))
+				if (callout_stop(&lle->la_timer)) {
 					LLE_REMREF(lle);
+					lle->la_flags &= ~LLE_CALLOUTREF;
+				}
 				pkts_dropped = llentry_free(lle);
 				ARPSTAT_ADD(dropped, pkts_dropped);
 			}
@@ -1051,16 +1060,12 @@ in_lltable_find_dst(struct lltable *llt,
 {
 	struct llentry *lle;
 	struct llentries *lleh;
-	struct sockaddr_in *sa2;
 	u_int hashkey;
 
 	hashkey = dst.s_addr;
 	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
 	LIST_FOREACH(lle, lleh, lle_next) {
-		sa2 = satosin(L3_ADDR(lle)); /* XXX: Change to proper L3 */
-		if (lle->la_flags & LLE_DELETED)
-			continue;
-		if (sa2->sin_addr.s_addr == dst.s_addr)
+		if (lle->r_l3addr.addr4.s_addr == dst.s_addr)
 			break;
 	}
 
@@ -1091,6 +1096,7 @@ in_lltable_delete(struct lltable *llt, u
 		LLE_WLOCK(lle);
 		lle->la_flags |= LLE_DELETED;
 		EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
+		in_lltable_unlink(lle);
 #ifdef DIAGNOSTIC
 		log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
 #endif
@@ -1109,8 +1115,6 @@ in_lltable_create(struct lltable *llt, u
 	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
 	struct ifnet *ifp = llt->llt_ifp;
 	struct llentry *lle;
-	struct llentries *lleh;
-	u_int hashkey;
 
 	IF_AFDATA_WLOCK_ASSERT(ifp);
 	KASSERT(l3addr->sa_family == AF_INET,
@@ -1145,16 +1149,38 @@ in_lltable_create(struct lltable *llt, u
 		lle->la_flags |= (LLE_VALID | LLE_STATIC);
 	}
 
-	hashkey = sin->sin_addr.s_addr;
+	in_lltable_link(llt, lle);
+	LLE_WLOCK(lle);
+
+	return (lle);
+}
+
+static void
+in_lltable_link(struct lltable *llt, struct llentry *lle)
+{
+	struct in_addr dst;
+	struct llentries *lleh;
+	u_int hashkey;
+
+	dst = lle->r_l3addr.addr4;
+	hashkey = dst.s_addr;
 	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
 
 	lle->lle_tbl  = llt;
 	lle->lle_head = lleh;
 	lle->la_flags |= LLE_LINKED;
 	LIST_INSERT_HEAD(lleh, lle, lle_next);
-	LLE_WLOCK(lle);
 
-	return (lle);
+}
+
+static void
+in_lltable_unlink(struct llentry *lle)
+{
+
+	LIST_REMOVE(lle, lle_next);
+	lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
+	lle->lle_tbl = NULL;
+	lle->lle_head = NULL;
 }
 
 /*
@@ -1164,32 +1190,23 @@ in_lltable_create(struct lltable *llt, u
 static struct llentry *
 in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
 {
-	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
 	struct ifnet *ifp = llt->llt_ifp;
 	struct llentry *lle;
-	struct llentries *lleh;
-	u_int hashkey;
+	struct in_addr dst;
 
 	IF_AFDATA_LOCK_ASSERT(ifp);
 	KASSERT(l3addr->sa_family == AF_INET,
 	    ("sin_family %d", l3addr->sa_family));
 
-	hashkey = sin->sin_addr.s_addr;
-	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
-	LIST_FOREACH(lle, lleh, lle_next) {
-		struct sockaddr_in *sa2 = satosin(L3_ADDR(lle));
-		if (lle->la_flags & LLE_DELETED)
-			continue;
-		if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr)
-			break;
-	}
+	dst = ((const struct sockaddr_in *)l3addr)->sin_addr;
+	lle = in_lltable_find_dst(llt, dst);
 
 	if (lle == NULL)
 		return (NULL);
 
 	if (flags & LLE_EXCLUSIVE)
 		LLE_WLOCK(lle);
-	else
+	else if ((flags & LLE_UNLOCKED) == 0)
 		LLE_RLOCK(lle);
 
 	return (lle);
@@ -1216,9 +1233,6 @@ in_lltable_dump(struct lltable *llt, str
 		LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
 			struct sockaddr_dl *sdl;
 
-			/* skip deleted entries */
-			if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
-				continue;
 			/* Skip if jailed and not a valid IP of the prison. */
 			if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)
 				continue;

Modified: projects/routing/sys/netinet6/in6.c
==============================================================================
--- projects/routing/sys/netinet6/in6.c	Sun Nov 16 20:10:37 2014	(r274593)
+++ projects/routing/sys/netinet6/in6.c	Sun Nov 16 20:12:49 2014	(r274594)
@@ -151,6 +151,9 @@ static int in6_update_ifa_internal(struc
 static int in6_broadcast_ifa(struct ifnet *, struct in6_aliasreq *,
     struct in6_ifaddr *, int);
 
+static void in6_lltable_link(struct lltable *llt, struct llentry *lle);
+static void in6_lltable_unlink(struct llentry *lle);
+
 #define ifa2ia6(ifa)	((struct in6_ifaddr *)(ifa))
 #define ia62ifa(ia6)	(&((ia6)->ia_ifa))
 
@@ -2117,12 +2120,15 @@ static struct llentry *
 in6_lltable_new(const struct sockaddr *l3addr, u_int flags)
 {
 	struct in6_llentry *lle;
+	const struct sockaddr_in6 *l3addr_sin6;
 
 	lle = malloc(sizeof(struct in6_llentry), M_LLTABLE, M_NOWAIT | M_ZERO);
 	if (lle == NULL)		/* NB: caller generates msg */
 		return NULL;
 
-	lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr;
+	l3addr_sin6 = (const struct sockaddr_in6 *)l3addr;
+	lle->l3_addr6 = *l3addr_sin6;
+	lle->base.r_l3addr.addr6 = l3addr_sin6->sin6_addr;
 	lle->base.lle_refcnt = 1;
 	lle->base.lle_free = in6_lltable_free;
 	LLE_LOCK_INIT(&lle->base);
@@ -2154,8 +2160,10 @@ in6_lltable_prefix_free(struct lltable *
 			    ((flags & LLE_STATIC) ||
 			    !(lle->la_flags & LLE_STATIC))) {
 				LLE_WLOCK(lle);
-				if (callout_stop(&lle->la_timer))
+				if (callout_stop(&lle->la_timer)) {
 					LLE_REMREF(lle);
+					lle->la_flags &= ~LLE_CALLOUTREF;
+				}
 				llentry_free(lle);
 			}
 		}
@@ -2210,10 +2218,7 @@ in6_lltable_find_dst(struct lltable *llt
 	hashkey = dst->s6_addr32[3];
 	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
 	LIST_FOREACH(lle, lleh, lle_next) {
-		struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)L3_ADDR(lle);
-		if (lle->la_flags & LLE_DELETED)
-			continue;
-		if (bcmp(&sa6->sin6_addr, dst, sizeof(struct in6_addr)) == 0)
+		if (IN6_ARE_ADDR_EQUAL(&lle->r_l3addr.addr6, dst) != 0)
 			break;
 	}
 
@@ -2240,6 +2245,7 @@ in6_lltable_delete(struct lltable *llt, 
 	if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
 		LLE_WLOCK(lle);
 		lle->la_flags |= LLE_DELETED;
+		in6_lltable_unlink(lle);
 #ifdef DIAGNOSTIC
 		log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
 #endif
@@ -2259,8 +2265,6 @@ in6_lltable_create(struct lltable *llt, 
 	const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
 	struct ifnet *ifp = llt->llt_ifp;
 	struct llentry *lle;
-	struct llentries *lleh;
-	u_int hashkey;
 
 	IF_AFDATA_WLOCK_ASSERT(ifp);
 	KASSERT(l3addr->sa_family == AF_INET6,
@@ -2293,18 +2297,41 @@ in6_lltable_create(struct lltable *llt, 
 		lle->la_flags |= (LLE_VALID | LLE_STATIC);
 	}
 
-	hashkey = sin6->sin6_addr.s6_addr32[3];
+	in6_lltable_link(llt, lle);
+	LLE_WLOCK(lle);
+
+	return (lle);
+}
+
+static void
+in6_lltable_link(struct lltable *llt, struct llentry *lle)
+{
+	struct in6_addr dst;
+	struct llentries *lleh;
+	u_int hashkey;
+
+	dst = lle->r_l3addr.addr6;;
+	hashkey = dst.s6_addr32[3];
 	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
 
 	lle->lle_tbl  = llt;
 	lle->lle_head = lleh;
 	lle->la_flags |= LLE_LINKED;
 	LIST_INSERT_HEAD(lleh, lle, lle_next);
-	LLE_WLOCK(lle);
 
-	return (lle);
 }
 
+static void
+in6_lltable_unlink(struct llentry *lle)
+{
+
+	LIST_REMOVE(lle, lle_next);
+	lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
+	lle->lle_tbl = NULL;
+	lle->lle_head = NULL;
+}
+
+
 static struct llentry *
 in6_lltable_lookup(struct lltable *llt, u_int flags,
 	const struct sockaddr *l3addr)
@@ -2358,8 +2385,8 @@ in6_lltable_dump(struct lltable *llt, st
 		LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
 			struct sockaddr_dl *sdl;
 
-			/* skip deleted or invalid entries */
-			if ((lle->la_flags & (LLE_DELETED|LLE_VALID)) != LLE_VALID)
+			/* skip invalid entries */
+			if ((lle->la_flags & LLE_VALID) == 0)
 				continue;
 			/* Skip if jailed and not a valid IP of the prison. */
 			if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)

Modified: projects/routing/sys/netinet6/nd6.c
==============================================================================
--- projects/routing/sys/netinet6/nd6.c	Sun Nov 16 20:10:37 2014	(r274593)
+++ projects/routing/sys/netinet6/nd6.c	Sun Nov 16 20:12:49 2014	(r274594)
@@ -447,6 +447,8 @@ nd6_llinfo_settimer_locked(struct llentr
 	}
 	if (canceled)
 		LLE_REMREF(ln);
+	else
+		ln->la_flags |= LLE_CALLOUTREF;
 }
 
 void
@@ -1138,12 +1140,16 @@ nd6_free(struct llentry *ln, int gc)
 	IF_AFDATA_LOCK(ifp);
 	LLE_WLOCK(ln);
 
-	/* Guard against race with other llentry_free(). */
-	if (ln->la_flags & LLE_LINKED) {
+	/*
+	 * Note other thread could have removed given entry
+	 * stopping callout and removing LLE reference.
+	 */
+	if ((ln->la_flags & LLE_CALLOUTREF) != 0) {
 		LLE_REMREF(ln);
-		llentry_free(ln);
-	} else
-		LLE_FREE_LOCKED(ln);
+		ln->la_flags &= ~LLE_CALLOUTREF;
+	}
+
+	llentry_free(ln);
 
 	IF_AFDATA_UNLOCK(ifp);
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201411162012.sAGKCofT079139>