Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 05 Sep 2008 16:13:53 -0700
From:      Julian Elischer <julian@elischer.org>
To:        FreeBSD Net <freebsd-net@freebsd.org>
Subject:   Re: rewrite of rt_check() (now rt_check_fib())
Message-ID:  <48C1BD31.6090804@elischer.org>
In-Reply-To: <48C1B83C.9000404@elischer.org>
References:  <48C19568.807@elischer.org> <48C1B774.2020405@elischer.org> <48C1B83C.9000404@elischer.org>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------060105090400010304060507
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit

Julian Elischer wrote:
> Julian Elischer wrote:
>> Julian Elischer wrote:
>>> In tryin gto understand rt_check_fib() (wsa rt_check()) I ended up 
>>> rewriting it to do what I thought it was trying to do..
>>> this stops the panics some people have seen, but allows the system to 
>>> stay up long enough to see some other problem..
>>> anyhow this si the patch:
>>>
>>> comments?
>>>
>>
>> I was thinking about this a bit.
>>
>> rt_check_fib() drops the lock on the given rtentry in order to be able 
>> to get a lock on another rtentry that MIGHT be the same rtentry.
>>
>> while it is doing this, another processor could free the original 
>> rtentry. The only safw way to get around this is to hold an additional 
>> reference on the first rtentry (we don't already have one) while it is 
>> unlocked so that we can be sure that it is not actually freed if this 
>> happens.
>>
>> to do this safely I'd have to add a couple of new items into route.h:

this time with less (I hope) bugs...

new macros...

#define RT_TEMP_UNLOCK(_rt) do {                                \
         RT_ADDREF(_rt);                                         \
         RT_UNLOCK(_rt);                                         \
} while (0)

#define RT_RELOCK(_rt) do {                                     \
         RT_LOCK(_rt)                                            \
         if ((_rt)->rt_refcnt <= 1)                              \
                 rtfree(_rt);                                    \
                 _rt = 0; /*  signal that it went away */        \
         else {                                                  \
                 RT_REMREF(_rt);                                 \
                 /* note that _rt is still valid */              \
         }                                                       \
} while (0)


with (better) code attached:



--------------060105090400010304060507
Content-Type: text/plain;
 name="rt_check.c"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="rt_check.c"

/*
 * rt_check() is invoked on each layer 2 output path, prior to
 * encapsulating outbound packets.
 *
 * The function is mostly used to find a routing entry for the gateway,
 * which in some protocol families could also point to the link-level
 * address for the gateway itself (the side effect of revalidating the
 * route to the destination is rather pointless at this stage, we did it
 * already a moment before in the pr_output() routine to locate the ifp
 * and gateway to use).
 *
 * When we remove the layer-3 to layer-2 mapping tables from the
 * routing table, this function can be removed.
 *
 * === On input ===
 *   *dst is the address of the NEXT HOP (which coincides with the
 *	final destination if directly reachable);
 *   *lrt0 points to the cached route to the final destination;
 *   *lrt is not meaningful;
 *    fibnum is the index to the correct network fib for this packet
 *	(*lrt0 has not ref held on it so REMREF is not needed )
 *
 * === Operation ===
 * If the route is marked down try to find a new route.  If the route
 * to the gateway is gone, try to setup a new route.  Otherwise,
 * if the route is marked for packets to be rejected, enforce that.
 * Note that rtalloc returns an rtentry with an extra REF that we need to lose.
 *
 * === On return ===
 *   *dst is unchanged;
 *   *lrt0 points to the (possibly new) route to the final destination
 *   *lrt points to the route to the next hop   [LOCKED]
 *
 * Their values are meaningful ONLY if no error is returned.
 *
 * To follow this you have to remember that:
 * RT_REMREF reduces the reference count by 1 but doesn't check it for 0 (!)
 * RTFREE_LOCKED includes an RT_REMREF (or an rtfree if refs == 1)
 *    and an RT_UNLOCK
 * RTFREE does an RT_LOCK and an RTFREE_LOCKED
 * The gwroute pointer counts as a reference on the rtentry to which it points.
 * so when we add it we use the ref that rtalloc gives us and when we lose it
 * we need to remove the reference.
 */
int
rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst)
{
	return (rt_check_fib(lrt, lrt0, dst, 0));
}

int
rt_check_fib(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst,
		u_int fibnum)
{
	struct rtentry *rt;
	struct rtentry *rt0;
	int error;

	KASSERT(*lrt0 != NULL, ("rt_check"));
	rt0 = *lrt0;
	rt = NULL;

	/* NB: the locking here is tortuous... */
	RT_LOCK(rt0);
retry:
	if (rt0 && (rt0->rt_flags & RTF_UP) == 0) {
		/* Current rt0 is useless, try get a replacement. */
		RT_UNLOCK(rt0);
		rt0 = NULL;
	}
	if (rt0 == NULL) {
		rt0 = rtalloc1_fib(dst, 1, 0UL, fibnum);
		if (rt0 == NULL) {
			return (EHOSTUNREACH);
		}
		RT_REMREF(rt0); /* don't need the reference. */
	}

	if (rt0->rt_flags & RTF_GATEWAY) {
		if ((rt = rt0->rt_gwroute) != NULL) {
			RT_LOCK(rt);		/* NB: gwroute */
			if ((rt->rt_flags & RTF_UP) == 0) {
				/* gw route is dud. ignore/lose it */
				RTFREE_LOCKED(rt); /* unref (&unlock) gwroute */
				rt = rt0->rt_gwroute = NULL;
			}
		}
		
		if (rt == NULL) {  /* NOT AN ELSE CLAUSE */
			RT_TEMP_UNLOCK(rt0); /* MUST return to undo this */
			rt = rtalloc1_fib(rt0->rt_gateway, 1, 0UL, fibnum);
			if ((rt == rt0) || (rt == NULL)) {
				/* the best we can do is not good enough */
				if (rt) {
					RT_REMREF(rt); /* assumes ref > 0 */
					RT_UNLOCK(rt);
				}
				RT_FREE(rt0); /* lock, unref, (unlock) */
				return (ENETUNREACH);
			}
			/*
			 * Relock it and lose the added reference.
			 * All sorts of things could have happenned while we
			 * had no lock on it, so check for them.
			 */
			RT_RELOCK(rt0);
			if (rt0 == NULL || ((rt0->rt_flags & RTF_UP) == 0))
				/* Ru-roh.. what we had is no longer any good */
				goto retry;
			/* 
			 * While we were away, someone replaced the gateway.
			 * Since a reference count is involved we can't just
			 * overwrite it.
			 */
			if (rt0->rt_gwroute) {
				if (rt0->rt_gwroute != rt) {
					RT_FREE_LOCKED(rt);
					goto retry;
				}
			} else {
				rt0->rt_gwroute = rt;
			}
		}
		RT_LOCK_ASSERT(rt);
		RT_UNLOCK(rt0);
	} else {
		/* think of rt as having the lock from now on.. */
		rt = rt0;
	}
	/* XXX why are we inspecting rmx_expire? */
	if ((rt->rt_flags & RTF_REJECT) &&
	    (rt->rt_rmx.rmx_expire == 0 ||
	    time_uptime < rt->rt_rmx.rmx_expire)) {
		RT_UNLOCK(rt);
		return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
	}

	*lrt = rt;
	*lrt0 = rt0;
	return (0);
}

--------------060105090400010304060507--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?48C1BD31.6090804>