Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 13 Dec 2015 07:39:49 +0000 (UTC)
From:      "Alexander V. Chernikov" <melifaro@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r292155 - in head/sys: net netinet6
Message-ID:  <201512130739.tBD7dnVm042288@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: melifaro
Date: Sun Dec 13 07:39:49 2015
New Revision: 292155
URL: https://svnweb.freebsd.org/changeset/base/292155

Log:
  Remove LLE read lock from IPv6 fast path.
  
  LLE structure is mostly unchanged during its lifecycle: there are only 2
  things relevant for fast path lookup code:
  1) link-level address change. Since r286722, these updates are performed
    under AFDATA WLOCK.
  2) Some sort of feedback indicating that this particular entry is used so
    we send NS to perform reachability verification instead of expiring entry.
    The only signal that is needed from fast path is something like binary
    yes/no.
  The latter is solved by the following changes:
  
  Special r_skip_req (introduced in D3688) value is used for fast path feedback.
    It is read lockless by fast path, but updated under req_mutex mutex. If this
    field is non-zero, then fast path will acquire lock and set it back to 0.
  
  After transitioning to STALE state, callout timer is armed to run each
    V_nd6_delay seconds to make sure that if packet was transmitted at the start
    of given interval, we would be able to switch to PROBE state in V_nd6_delay
    seconds as user expects.
  (in STALE state) timer is rescheduled until original V_nd6_gctimer expires
    keeping lle in STALE state (remaining timer value stored in lle_remtime).
  (in STALE state) timer is rescheduled if packet was transmitted less that
    V_nd6_delay seconds ago to make sure we transition to PROBE state exactly
    after V_n6_delay seconds.
  
  As a result, all packets towards lle in REACHABLE/STALE/PROBE states are handled
    by fast path without acquiring lle read lock.
  
  Differential Revision:		https://reviews.freebsd.org/D3780

Modified:
  head/sys/net/if_llatbl.c
  head/sys/net/if_llatbl.h
  head/sys/netinet6/in6.c
  head/sys/netinet6/nd6.c
  head/sys/netinet6/nd6_nbr.c

Modified: head/sys/net/if_llatbl.c
==============================================================================
--- head/sys/net/if_llatbl.c	Sun Dec 13 06:54:53 2015	(r292154)
+++ head/sys/net/if_llatbl.c	Sun Dec 13 07:39:49 2015	(r292155)
@@ -288,6 +288,47 @@ lltable_set_entry_addr(struct ifnet *ifp
 }
 
 /*
+ * Tries to update @lle link-level address.
+ * Since update requires AFDATA WLOCK, function
+ * drops @lle lock, acquires AFDATA lock and then acquires
+ * @lle lock to maintain lock order.
+ *
+ * Returns 1 on success.
+ */
+int
+lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
+    const char *lladdr)
+{
+
+	/* Perform real LLE update */
+	/* use afdata WLOCK to update fields */
+	LLE_WLOCK_ASSERT(lle);
+	LLE_ADDREF(lle);
+	LLE_WUNLOCK(lle);
+	IF_AFDATA_WLOCK(ifp);
+	LLE_WLOCK(lle);
+
+	/*
+	 * Since we droppped LLE lock, other thread might have deleted
+	 * this lle. Check and return
+	 */
+	if ((lle->la_flags & LLE_DELETED) != 0) {
+		IF_AFDATA_WUNLOCK(ifp);
+		LLE_FREE_LOCKED(lle);
+		return (0);
+	}
+
+	/* Update data */
+	lltable_set_entry_addr(ifp, lle, lladdr);
+
+	IF_AFDATA_WUNLOCK(ifp);
+
+	LLE_REMREF(lle);
+
+	return (1);
+}
+
+/*
  *
  * Performes generic cleanup routines and frees lle.
  *

Modified: head/sys/net/if_llatbl.h
==============================================================================
--- head/sys/net/if_llatbl.h	Sun Dec 13 06:54:53 2015	(r292154)
+++ head/sys/net/if_llatbl.h	Sun Dec 13 07:39:49 2015	(r292155)
@@ -79,6 +79,8 @@ struct llentry {
 	int16_t			 ln_state;	/* IPv6 has ND6_LLINFO_NOSTATE == -2 */
 	uint16_t		 ln_router;
 	time_t			 ln_ntick;
+	time_t			lle_remtime;	/* Real time remaining */
+	time_t			lle_hittime;	/* Time when r_skip_req was unset */
 	int			 lle_refcnt;
 
 	LIST_ENTRY(llentry)	lle_chain;	/* chain of deleted items */
@@ -222,6 +224,8 @@ struct llentry  *llentry_alloc(struct if
 size_t lltable_drop_entry_queue(struct llentry *);
 void lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
     const char *lladdr);
+int lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
+    const char *lladdr);
 
 struct llentry *lltable_alloc_entry(struct lltable *llt, u_int flags,
     const struct sockaddr *l4addr);

Modified: head/sys/netinet6/in6.c
==============================================================================
--- head/sys/netinet6/in6.c	Sun Dec 13 06:54:53 2015	(r292154)
+++ head/sys/netinet6/in6.c	Sun Dec 13 07:39:49 2015	(r292155)
@@ -2064,6 +2064,7 @@ in6_lltable_destroy_lle(struct llentry *
 
 	LLE_WUNLOCK(lle);
 	LLE_LOCK_DESTROY(lle);
+	LLE_REQ_DESTROY(lle);
 	free(lle, M_LLTABLE);
 }
 
@@ -2080,6 +2081,7 @@ in6_lltable_new(const struct in6_addr *a
 	lle->base.lle_refcnt = 1;
 	lle->base.lle_free = in6_lltable_destroy_lle;
 	LLE_LOCK_INIT(&lle->base);
+	LLE_REQ_INIT(&lle->base);
 	callout_init(&lle->base.lle_timer, 1);
 
 	return (&lle->base);
@@ -2288,6 +2290,13 @@ in6_lltable_lookup(struct lltable *llt, 
 	if (lle == NULL)
 		return (NULL);
 
+	KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) !=
+	    (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X",
+	    flags));
+
+	if (flags & LLE_UNLOCKED)
+		return (lle);
+
 	if (flags & LLE_EXCLUSIVE)
 		LLE_WLOCK(lle);
 	else
@@ -2350,8 +2359,8 @@ in6_lltable_dump_entry(struct lltable *l
 			sdl->sdl_index = ifp->if_index;
 			sdl->sdl_type = ifp->if_type;
 			bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
-			ndpc.rtm.rtm_rmx.rmx_expire =
-			    lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
+			ndpc.rtm.rtm_rmx.rmx_expire = lle->la_expire +
+			    lle->lle_remtime / hz;
 			ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
 			if (lle->la_flags & LLE_STATIC)
 				ndpc.rtm.rtm_flags |= RTF_STATIC;

Modified: head/sys/netinet6/nd6.c
==============================================================================
--- head/sys/netinet6/nd6.c	Sun Dec 13 06:54:53 2015	(r292154)
+++ head/sys/netinet6/nd6.c	Sun Dec 13 07:39:49 2015	(r292155)
@@ -542,6 +542,107 @@ nd6_llinfo_get_holdsrc(struct llentry *l
 }
 
 /*
+ * Checks if we need to switch from STALE state.
+ *
+ * RFC 4861 requires switching from STALE to DELAY state
+ * on first packet matching entry, waiting V_nd6_delay and
+ * transition to PROBE state (if upper layer confirmation was
+ * not received).
+ *
+ * This code performs a bit differently:
+ * On packet hit we don't change state (but desired state
+ * can be guessed by control plane). However, after V_nd6_delay
+ * seconds code will transition to PROBE state (so DELAY state
+ * is kinda skipped in most situations).
+ *
+ * Typically, V_nd6_gctimer is bigger than V_nd6_delay, so
+ * we perform the following upon entering STALE state:
+ *
+ * 1) Arm timer to run each V_nd6_delay seconds to make sure that
+ * if packet was transmitted at the start of given interval, we
+ * would be able to switch to PROBE state in V_nd6_delay seconds
+ * as user expects.
+ *
+ * 2) Reschedule timer until original V_nd6_gctimer expires keeping
+ * lle in STALE state (remaining timer value stored in lle_remtime).
+ *
+ * 3) Reschedule timer if packet was transmitted less that V_nd6_delay
+ * seconds ago.
+ *
+ * Returns non-zero value if the entry is still STALE (storing
+ * the next timer interval in @pdelay).
+ *
+ * Returns zero value if original timer expired or we need to switch to
+ * PROBE (store that in @do_switch variable).
+ */
+static int
+nd6_is_stale(struct llentry *lle, long *pdelay, int *do_switch)
+{
+	int nd_delay, nd_gctimer, r_skip_req;
+	time_t lle_hittime;
+	long delay;
+
+	*do_switch = 0;
+	nd_gctimer = V_nd6_gctimer;
+	nd_delay = V_nd6_delay;
+
+	LLE_REQ_LOCK(lle);
+	r_skip_req = lle->r_skip_req;
+	lle_hittime = lle->lle_hittime;
+	LLE_REQ_UNLOCK(lle);
+
+	if (r_skip_req > 0) {
+
+		/*
+		 * Nonzero r_skip_req value was set upon entering
+		 * STALE state. Since value was not changed, no
+		 * packets were passed using this lle. Ask for
+		 * timer reschedule and keep STALE state.
+		 */
+		delay = (long)(MIN(nd_gctimer, nd_delay));
+		delay *= hz;
+		if (lle->lle_remtime > delay)
+			lle->lle_remtime -= delay;
+		else {
+			delay = lle->lle_remtime;
+			lle->lle_remtime = 0;
+		}
+
+		if (delay == 0) {
+
+			/*
+			 * The original ng6_gctime timeout ended,
+			 * no more rescheduling.
+			 */
+			return (0);
+		}
+
+		*pdelay = delay;
+		return (1);
+	}
+
+	/*
+	 * Packet received. Verify timestamp
+	 */
+	delay = (long)(time_uptime - lle_hittime);
+	if (delay < nd_delay) {
+
+		/*
+		 * V_nd6_delay still not passed since the first
+		 * hit in STALE state.
+		 * Reshedule timer and return.
+		 */
+		*pdelay = (long)(nd_delay - delay) * hz;
+		return (1);
+	}
+
+	/* Request switching to probe */
+	*do_switch = 1;
+	return (0);
+}
+
+
+/*
  * Switch @lle state to new state optionally arming timers.
  *
  * Set noinline to be dtrace-friendly
@@ -550,9 +651,11 @@ __noinline void
 nd6_llinfo_setstate(struct llentry *lle, int newstate)
 {
 	struct ifnet *ifp;
-	long delay;
+	int nd_gctimer, nd_delay;
+	long delay, remtime;
 
 	delay = 0;
+	remtime = 0;
 
 	switch (newstate) {
 	case ND6_LLINFO_INCOMPLETE:
@@ -566,7 +669,19 @@ nd6_llinfo_setstate(struct llentry *lle,
 		}
 		break;
 	case ND6_LLINFO_STALE:
-		delay = (long)V_nd6_gctimer * hz;
+
+		/*
+		 * Notify fast path that we want to know if any packet
+		 * is transmitted by setting r_skip_req.
+		 */
+		LLE_REQ_LOCK(lle);
+		lle->r_skip_req = 1;
+		LLE_REQ_UNLOCK(lle);
+		nd_delay = V_nd6_delay;
+		nd_gctimer = V_nd6_gctimer;
+
+		delay = (long)(MIN(nd_gctimer, nd_delay)) * hz;
+		remtime = (long)nd_gctimer * hz - delay;
 		break;
 	case ND6_LLINFO_DELAY:
 		lle->la_asked = 0;
@@ -577,6 +692,7 @@ nd6_llinfo_setstate(struct llentry *lle,
 	if (delay > 0)
 		nd6_llinfo_settimer_locked(lle, delay);
 
+	lle->lle_remtime = remtime;
 	lle->ln_state = newstate;
 }
 
@@ -592,7 +708,8 @@ nd6_llinfo_timer(void *arg)
 	struct in6_addr *dst, *pdst, *psrc, src;
 	struct ifnet *ifp;
 	struct nd_ifinfo *ndi = NULL;
-	int send_ns;
+	int do_switch, send_ns;
+	long delay;
 
 	KASSERT(arg != NULL, ("%s: arg NULL", __func__));
 	ln = (struct llentry *)arg;
@@ -680,13 +797,35 @@ nd6_llinfo_timer(void *arg)
 		break;
 
 	case ND6_LLINFO_STALE:
-		/* Garbage Collection(RFC 2461 5.3) */
-		if (!ND6_LLINFO_PERMANENT(ln)) {
-			EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
-			nd6_free(ln, 1);
-			ln = NULL;
+		if (nd6_is_stale(ln, &delay, &do_switch) != 0) {
+
+			/*
+			 * No packet has used this entry and GC timeout
+			 * has not been passed. Reshedule timer and
+			 * return.
+			 */
+			nd6_llinfo_settimer_locked(ln, delay);
+			break;
 		}
-		break;
+
+		if (do_switch == 0) {
+
+			/*
+			 * GC timer has ended and entry hasn't been used.
+			 * Run Garbage collector (RFC 4861, 5.3)
+			 */
+			if (!ND6_LLINFO_PERMANENT(ln)) {
+				EVENTHANDLER_INVOKE(lle_event, ln,
+				    LLENTRY_EXPIRED);
+				nd6_free(ln, 1);
+				ln = NULL;
+			}
+			break;
+		}
+
+		/* Entry has been used AND delay timer has ended. */
+
+		/* FALLTHROUGH */
 
 	case ND6_LLINFO_DELAY:
 		if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
@@ -1796,7 +1935,11 @@ nd6_cache_lladdr(struct ifnet *ifp, stru
 		 * Record source link-layer address
 		 * XXX is it dependent to ifp->if_type?
 		 */
-		lltable_set_entry_addr(ifp, ln, lladdr);
+		if (lltable_try_set_entry_addr(ifp, ln, lladdr) == 0) {
+			/* Entry was deleted */
+			return;
+		}
+
 		nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
 
 		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
@@ -1996,31 +2139,25 @@ nd6_resolve(struct ifnet *ifp, int is_gw
 	}
 
 	IF_AFDATA_RLOCK(ifp);
-	ln = nd6_lookup(&dst6->sin6_addr, 0, ifp);
-	IF_AFDATA_RUNLOCK(ifp);
-
-	/*
-	 * Perform fast path for the following cases:
-	 * 1) lle state is REACHABLE
-	 * 2) lle state is DELAY (NS message sent)
-	 *
-	 * Every other case involves lle modification, so we handle
-	 * them separately.
-	 */
-	if (ln == NULL || (ln->ln_state != ND6_LLINFO_REACHABLE &&
-	    ln->ln_state != ND6_LLINFO_DELAY)) {
-		/* Fall back to slow processing path */
-		if (ln != NULL)
-			LLE_RUNLOCK(ln);
-		return (nd6_resolve_slow(ifp, m, dst6, desten, pflags));
+	ln = nd6_lookup(&dst6->sin6_addr, LLE_UNLOCKED, ifp);
+	if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) {
+		/* Entry found, let's copy lle info */
+		bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
+		if (pflags != NULL)
+			*pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR);
+		/* Check if we have feedback request from nd6 timer */
+		if (ln->r_skip_req != 0) {
+			LLE_REQ_LOCK(ln);
+			ln->r_skip_req = 0; /* Notify that entry was used */
+			ln->lle_hittime = time_uptime;
+			LLE_REQ_UNLOCK(ln);
+		}
+		IF_AFDATA_RUNLOCK(ifp);
+		return (0);
 	}
+	IF_AFDATA_RUNLOCK(ifp);
 
-
-	bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
-	if (pflags != NULL)
-		*pflags = ln->la_flags;
-	LLE_RUNLOCK(ln);
-	return (0);
+	return (nd6_resolve_slow(ifp, m, dst6, desten, pflags));
 }
 
 

Modified: head/sys/netinet6/nd6_nbr.c
==============================================================================
--- head/sys/netinet6/nd6_nbr.c	Sun Dec 13 06:54:53 2015	(r292154)
+++ head/sys/netinet6/nd6_nbr.c	Sun Dec 13 07:39:49 2015	(r292155)
@@ -765,7 +765,10 @@ nd6_na_input(struct mbuf *m, int off, in
 		/*
 		 * Record link-layer address, and update the state.
 		 */
-		lltable_set_entry_addr(ifp, ln, lladdr);
+		if (lltable_try_set_entry_addr(ifp, ln, lladdr) == 0) {
+			ln = NULL;
+			goto freeit;
+		}
 		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
 		if (is_solicited)
 			nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
@@ -831,7 +834,12 @@ nd6_na_input(struct mbuf *m, int off, in
 			 * Update link-local address, if any.
 			 */
 			if (lladdr != NULL) {
-				lltable_set_entry_addr(ifp, ln, lladdr);
+				int ret;
+				ret = lltable_try_set_entry_addr(ifp, ln,lladdr);
+				if (ret == 0) {
+					ln = NULL;
+					goto freeit;
+				}
 				EVENTHANDLER_INVOKE(lle_event, ln,
 				    LLENTRY_RESOLVED);
 			}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201512130739.tBD7dnVm042288>