Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 01 Apr 2007 01:07:19 +0300
From:      Ilya Bobir <ilya@po4ta.com>
To:        Julian Elischer <julian@elischer.org>
Cc:        Luigi Rizzo <rizzo@icir.org>, ipfw@freebsd.org, FreeBSD Net <freebsd-net@freebsd.org>
Subject:   Re: IPFW update frequency
Message-ID:  <460EDB97.8030906@po4ta.com>
In-Reply-To: <460DA258.2030402@elischer.org>
References:  <460D75CE.70804@elischer.org>	<20070330145938.A88154@xorpc.icir.org> <460DA258.2030402@elischer.org>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------040802030300060601000309
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit

Julian Elischer wrote:
> This is pretty close.. I know I've mentioned this to people several 
> times over
> the last year or so. the trick is to try do it in a way that the 
> average packet
> doesn't need to do any locks to get in and the updater does more work.
> if you are willing to acquire a lock on both starting and ending
> the run through the firewall it is easy.
> (I already have code to do that..)
> (see http://www.freebsd.org/~julian/atomic_replace.c (untested but
> probably close.)
> doing it without requiring that each packet get those locks however is 
> a whole new level of problem.

Please, take a look at this variant.  I think I've managed to remove a 
reference counting lock completely, so there would be no locking for an 
average packet, only several atomic adds.

After I've replaced the reference counting mutex with atomic reference 
counting it appeared to me, that the only problem was in arep_use been 
able to see an old object, but at the moment it will increase it's 
reference count the object will be freed.  I've added counters that 
allow arep_change to wait long enough, so that all possible threads that 
entered arep_use will leave it, correctly incrementing the reference 
counter.  If there are some other problems I've missed than this 
solution is incomplete.

--------------040802030300060601000309
Content-Type: text/plain;
 name="atomic_replace.no_ref_mtx.c"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="atomic_replace.no_ref_mtx.c"

/*
 * A Framework to support the atomic replacement of an
 * old object with a new object, while allowing the 
 * existing users of the old object to continue to use it.
 *
 * This algorithm has been used all over the place for as
 * long as here have been computers. e.g. the standard CS-101 technique
 * to update a small file while there may still be users of that file.
 * (*) make new version of file.
 * (*) mv new old.
 * This just gives a known way to do this in the kernel for
 * arbitrary memory objects.
 * 
 */

#include <machine/cpu_atomic.h>
#include <sys/refcount.h>
/*
 * !!!IMPORTANT!!!
 * Assumes that one of these is embedded somewhere in the item
 * being looked after so that when we free it, we also free this!
 */
struct arep_obj {
	u_int	 arep_refcount;
	void	*arep_item;
}

/* ###### Methods supplied by the user code ###### */
/*
 * The user should supply a method that fits this description
 * that knows how to free an 'item'
 */
typedef void arep_free_method(void *);

/*
 * The user should supply a method that fits this description
 * that knows how to change an 'item' according to
 * the instructions in "*argptr".
 */
typedef void arep_change_method(struct arep_obj *arep_old, void * argptr);

/* * This is the permanent head that always points to the
 * current object.
 */

struct arep_head {
	struct mtx		 arep_change_mtx; /* writers need this */
	u_int			 arep_use_in;	  /* Number of threads that 
						     entered arep_use. */
	u_int			 arep_use_out;	  /* Number of threads that 
						     exited arep_use. */
	struct arep_obj		*arep_object;	  /* "The reference" */
	arep_free_method	*arep_free_fn;
	arep_change_method	*arep_change_fn;
	int			 arep__flags;
};

#define AREP_F_SHARED_BITS 0x0001        /* need change lock to free */


struct arep_head *
arep_init(arep_free_method *free_fn, arep_change_method * change_fn, int flags)
{
	struct arep_head *head;

	head = malloc(sizeof(*head), M_MISC, M_WAITOK|M_ZERO);
	mtx_init(&head->arep_change_mtx, "AREP_change", "AREPCHG", MTX_DEF);
	head->arep_use_in = 0;
	head->arep_use_out = 0;
	/* change fn needs to know how to make a startup empty object OK? */
	head->arep_object = (*change_fn)(NULL, NULL);
	refcount_init(&head->arep_object->arep_refcount, 1);
	head->arep_free_fn = free_fn;
	head->arep_change_fn = change_fn;
	head->arep_flags = flags;
}

void /* possibly a return value to indicate failure? */
arep_change(struct arep_head *head, void *argptr)
{
	struct arep_obj *obj, *old;
	u_int in_count, in_count2, out_count;

	mtx_lock(&head->arep_change_mtx);
	old = head->arep_object;
	obj = (*head->arep_change)(old, argptr);
	if (obj == old) {
		mtx_unlock(&head->arep_change_mtx);
		return;
	}
	refcount_init(&obj->arep_refcount, 1);

	/*
	 * It is possible, that at the moment we are be decreasing a
	 * reference count for the old object some consumers in arep_use 
	 * already seen the old object but still did not increase the 
	 * reference counter for it.  In order to let them do so we will 
	 * count the number of consumers who could have seen the old object 
	 * starting at some moment before the update and up to some moment 
	 * after the update.
	 */
	in_count = head->arep_use_in;
	arep_head->arep_object = old;
	/* Make sure, that other threads will see new object address before 
	 * we will read the counter for the second time. */
	in_count2 = atomic_load_acq_int(&head->arep_use_in);

	if (refcount_release(&old->arep_refcount)) {
		/* It is possible that we are still NOT the last holder. */
		if (in_count != in_count2) {
			/*
			 * If this is the case, then wait for a moment when 
			 * all threads that entered arep_use will leave it. 
			 *
			 * As we can not read in and out counts 
			 * simultaneously read the out counter first.
			 *
			 * Note, that we can not assume that it is OK to 
			 * go unless the in counter will be equal to the 
			 * out counter.  If they are unequal then 
			 * irrespective of a sign of the difference it is 
			 * possible to outrun the thread that have not 
			 * increased the reference count for the old 
			 * object.
			 */

			while (true) {
				out_count = 
					atomic_load_acq_int(&head->arep_use_out);
				in_count = 
					atomic_load_acq_int(&head->arep_use_in);

				if (in_count == out_count)
					break;
			}

			/*
			 * Now we should check the reference count again. 
			 */
			if (atomic_load_acq_int(&old->arep_refcount) == 0)
				(*head->arep_free)(old);
		} else {
			/* We are definitely the last holder. */
			(*head->arep_free)(old);
		}
	}

	mtx_unlock(&head->arep_change_mtx);
}

void *
arep_use(struct arep_head *head)
{
	struct arep_obj *obj;

	/* Let arep_change know that another thread is going to read 
	 * current object address.
	 */
	atomic_add_acq_int(&head->arep_use_in, 1);

	obj = head->arep_object = obj;
	refcount_acquire(&obj->arep_refcount);

	/* We are done updating an object reference count, so inform 
	 * arep_change we are done. */
	atomic_add_acq_int(&head->arep_use_out, 1);

	return (obj->arep_item);
}

void
arep_drop(struct arep_head *head, struct arep_obj *obj)
{
	if (refcount_release(&obj->arep_refcount)) {
		/* We are the last holder */
		if (head->arep_flags & AREP_F_SHARED_BITS) {
			/*
			 * Assume that the new one and the old one
			 * both point to some reference counted stuff
			 * that we don't want to change non atomically.
			 */
			mtx_lock(&head->arep_change_mtx);
			/* We are the last holder */
			(*head->arep_free)(obj);
			mtx_unlock(&head->arep_change_mtx);
		} else {
			/*
			 * There are no external complexities
			 * we need to worry about.
			 */
			(*head->arep_free)(obj);
		}
	}
}


--------------040802030300060601000309--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?460EDB97.8030906>