Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 16 May 2010 17:03:56 +0000 (UTC)
From:      Randall Stewart <rrs@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r208160 - head/sys/netinet
Message-ID:  <201005161703.o4GH3ug7061710@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: rrs
Date: Sun May 16 17:03:56 2010
New Revision: 208160
URL: http://svn.freebsd.org/changeset/base/208160

Log:
  This adds back the Iterator to the sctp
  code base. We now properly have ONE thread
  that services all VNET's. Also we purge out
  the old timer based iterator code which had
  multiple LOR's and other issues.
  
  MFC after:	3 days

Modified:
  head/sys/netinet/sctp_bsd_addr.c
  head/sys/netinet/sctp_bsd_addr.h
  head/sys/netinet/sctp_constants.h
  head/sys/netinet/sctp_lock_bsd.h
  head/sys/netinet/sctp_pcb.c
  head/sys/netinet/sctp_pcb.h
  head/sys/netinet/sctp_structs.h
  head/sys/netinet/sctp_timer.c
  head/sys/netinet/sctputil.c

Modified: head/sys/netinet/sctp_bsd_addr.c
==============================================================================
--- head/sys/netinet/sctp_bsd_addr.c	Sun May 16 16:54:05 2010	(r208159)
+++ head/sys/netinet/sctp_bsd_addr.c	Sun May 16 17:03:56 2010	(r208160)
@@ -49,16 +49,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/unistd.h>
 
 /* Declare all of our malloc named types */
-
-/* Note to Michael/Peter for mac-os,
- * I think mac has this too since I
- * do see the M_PCB type, so I
- * will also put in the mac file the
- * MALLOC_DECLARE. If this does not
- * work for mac uncomment the defines for
- * the strings that we use in Panda, I put
- * them in comments in the mac-os file.
- */
 MALLOC_DEFINE(SCTP_M_MAP, "sctp_map", "sctp asoc map descriptor");
 MALLOC_DEFINE(SCTP_M_STRMI, "sctp_stri", "sctp stream in array");
 MALLOC_DEFINE(SCTP_M_STRMO, "sctp_stro", "sctp stream out array");
@@ -79,46 +69,77 @@ MALLOC_DEFINE(SCTP_M_MVRF, "sctp_mvrf", 
 MALLOC_DEFINE(SCTP_M_ITER, "sctp_iter", "sctp iterator control");
 MALLOC_DEFINE(SCTP_M_SOCKOPT, "sctp_socko", "sctp socket option");
 
-#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
+/* Global NON-VNET structure that controls the iterator */
+struct iterator_control sctp_it_ctl;
+static int __sctp_thread_based_iterator_started = 0;
+
+
+static void
+sctp_cleanup_itqueue(void)
+{
+	struct sctp_iterator *it;
+
+	while ((it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead)) != NULL) {
+		if (it->function_atend != NULL) {
+			(*it->function_atend) (it->pointer, it->val);
+		}
+		TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
+		SCTP_FREE(it, SCTP_M_ITER);
+	}
+}
+
+
 void
 sctp_wakeup_iterator(void)
 {
-	wakeup(&SCTP_BASE_INFO(iterator_running));
+	wakeup(&sctp_it_ctl.iterator_running);
 }
 
 static void
 sctp_iterator_thread(void *v)
 {
-	CURVNET_SET((struct vnet *)v);
 	SCTP_IPI_ITERATOR_WQ_LOCK();
-	SCTP_BASE_INFO(iterator_running) = 0;
 	while (1) {
-		msleep(&SCTP_BASE_INFO(iterator_running),
-		    &SCTP_BASE_INFO(ipi_iterator_wq_mtx),
+		msleep(&sctp_it_ctl.iterator_running,
+		    &sctp_it_ctl.ipi_iterator_wq_mtx,
 		    0, "waiting_for_work", 0);
-		if (SCTP_BASE_INFO(threads_must_exit)) {
+		if (sctp_it_ctl.iterator_flags & SCTP_ITERATOR_MUST_EXIT) {
 			SCTP_IPI_ITERATOR_WQ_DESTROY();
+			SCTP_ITERATOR_LOCK_DESTROY();
+			sctp_cleanup_itqueue();
+			__sctp_thread_based_iterator_started = 0;
 			kthread_exit();
 		}
 		sctp_iterator_worker();
 	}
-	CURVNET_RESTORE();
 }
 
 void
 sctp_startup_iterator(void)
 {
+	if (__sctp_thread_based_iterator_started) {
+		/* You only get one */
+		return;
+	}
+	/* init the iterator head */
+	__sctp_thread_based_iterator_started = 1;
+	sctp_it_ctl.iterator_running = 0;
+	sctp_it_ctl.iterator_flags = 0;
+	sctp_it_ctl.cur_it = NULL;
+	SCTP_ITERATOR_LOCK_INIT();
+	SCTP_IPI_ITERATOR_WQ_INIT();
+	TAILQ_INIT(&sctp_it_ctl.iteratorhead);
+
 	int ret;
 
 	ret = kproc_create(sctp_iterator_thread,
-	    (void *)curvnet,
-	    &SCTP_BASE_INFO(thread_proc),
+	    (void *)NULL,
+	    &sctp_it_ctl.thread_proc,
 	    RFPROC,
 	    SCTP_KTHREAD_PAGES,
 	    SCTP_KTRHEAD_NAME);
 }
 
-#endif
 
 #ifdef INET6
 

Modified: head/sys/netinet/sctp_bsd_addr.h
==============================================================================
--- head/sys/netinet/sctp_bsd_addr.h	Sun May 16 16:54:05 2010	(r208159)
+++ head/sys/netinet/sctp_bsd_addr.h	Sun May 16 17:03:56 2010	(r208160)
@@ -37,12 +37,11 @@ __FBSDID("$FreeBSD$");
 
 #if defined(_KERNEL) || defined(__Userspace__)
 
-#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
+extern struct iterator_control sctp_it_ctl;
 void sctp_wakeup_iterator(void);
 
 void sctp_startup_iterator(void);
 
-#endif
 
 #ifdef INET6
 void sctp_gather_internal_ifa_flags(struct sctp_ifa *ifa);

Modified: head/sys/netinet/sctp_constants.h
==============================================================================
--- head/sys/netinet/sctp_constants.h	Sun May 16 16:54:05 2010	(r208159)
+++ head/sys/netinet/sctp_constants.h	Sun May 16 17:03:56 2010	(r208160)
@@ -87,10 +87,6 @@ __FBSDID("$FreeBSD$");
 /* #define SCTP_AUDITING_ENABLED 1 used for debug/auditing */
 #define SCTP_AUDIT_SIZE 256
 
-/* temporary disabled since it does not work with VNET. */
-#if 0
-#define SCTP_USE_THREAD_BASED_ITERATOR 1
-#endif
 
 #define SCTP_KTRHEAD_NAME "sctp_iterator"
 #define SCTP_KTHREAD_PAGES 0
@@ -572,7 +568,6 @@ __FBSDID("$FreeBSD$");
 #define SCTP_TIMER_TYPE_EVENTWAKE	13
 #define SCTP_TIMER_TYPE_STRRESET        14
 #define SCTP_TIMER_TYPE_INPKILL         15
-#define SCTP_TIMER_TYPE_ITERATOR        16
 #define SCTP_TIMER_TYPE_EARLYFR         17
 #define SCTP_TIMER_TYPE_ASOCKILL        18
 #define SCTP_TIMER_TYPE_ADDR_WQ         19

Modified: head/sys/netinet/sctp_lock_bsd.h
==============================================================================
--- head/sys/netinet/sctp_lock_bsd.h	Sun May 16 16:54:05 2010	(r208159)
+++ head/sys/netinet/sctp_lock_bsd.h	Sun May 16 17:03:56 2010	(r208160)
@@ -107,42 +107,36 @@ extern int sctp_logoff_stuff;
 #define SCTP_INP_INFO_WUNLOCK()		rw_wunlock(&SCTP_BASE_INFO(ipi_ep_mtx))
 
 
-#define SCTP_IPI_ADDR_INIT() \
+#define SCTP_IPI_ADDR_INIT()								\
         rw_init(&SCTP_BASE_INFO(ipi_addr_mtx), "sctp-addr")
-
 #define SCTP_IPI_ADDR_DESTROY() do  { \
         if(rw_wowned(&SCTP_BASE_INFO(ipi_addr_mtx))) { \
              rw_wunlock(&SCTP_BASE_INFO(ipi_addr_mtx)); \
         } \
 	rw_destroy(&SCTP_BASE_INFO(ipi_addr_mtx)); \
       }  while (0)
-
-
-
 #define SCTP_IPI_ADDR_RLOCK()	do { 					\
              rw_rlock(&SCTP_BASE_INFO(ipi_addr_mtx));                         \
 } while (0)
-
 #define SCTP_IPI_ADDR_WLOCK()	do { 					\
              rw_wlock(&SCTP_BASE_INFO(ipi_addr_mtx));                         \
 } while (0)
 
-
 #define SCTP_IPI_ADDR_RUNLOCK()		rw_runlock(&SCTP_BASE_INFO(ipi_addr_mtx))
 #define SCTP_IPI_ADDR_WUNLOCK()		rw_wunlock(&SCTP_BASE_INFO(ipi_addr_mtx))
 
 
 #define SCTP_IPI_ITERATOR_WQ_INIT() \
-        mtx_init(&SCTP_BASE_INFO(ipi_iterator_wq_mtx), "sctp-it-wq", "sctp_it_wq", MTX_DEF)
+        mtx_init(&sctp_it_ctl.ipi_iterator_wq_mtx, "sctp-it-wq", "sctp_it_wq", MTX_DEF)
 
 #define SCTP_IPI_ITERATOR_WQ_DESTROY() \
-	mtx_destroy(&SCTP_BASE_INFO(ipi_iterator_wq_mtx))
+	mtx_destroy(&sctp_it_ctl.ipi_iterator_wq_mtx)
 
 #define SCTP_IPI_ITERATOR_WQ_LOCK()	do { 					\
-             mtx_lock(&SCTP_BASE_INFO(ipi_iterator_wq_mtx));                \
+             mtx_lock(&sctp_it_ctl.ipi_iterator_wq_mtx);                \
 } while (0)
 
-#define SCTP_IPI_ITERATOR_WQ_UNLOCK()		mtx_unlock(&SCTP_BASE_INFO(ipi_iterator_wq_mtx))
+#define SCTP_IPI_ITERATOR_WQ_UNLOCK()		mtx_unlock(&sctp_it_ctl.ipi_iterator_wq_mtx)
 
 
 #define SCTP_IP_PKTLOG_INIT() \
@@ -300,25 +294,45 @@ extern int sctp_logoff_stuff;
 #endif
 
 #define SCTP_ITERATOR_LOCK_INIT() \
-        mtx_init(&SCTP_BASE_INFO(it_mtx), "sctp-it", "iterator", MTX_DEF)
+        mtx_init(&sctp_it_ctl.it_mtx, "sctp-it", "iterator", MTX_DEF)
 
 #ifdef INVARIANTS
 #define SCTP_ITERATOR_LOCK() \
 	do {								\
-		if (mtx_owned(&SCTP_BASE_INFO(it_mtx)))			\
+		if (mtx_owned(&sctp_it_ctl.it_mtx))			\
 			panic("Iterator Lock");				\
-		mtx_lock(&SCTP_BASE_INFO(it_mtx));				\
+		mtx_lock(&sctp_it_ctl.it_mtx);				\
 	} while (0)
 #else
 #define SCTP_ITERATOR_LOCK() \
 	do {								\
-		mtx_lock(&SCTP_BASE_INFO(it_mtx));				\
+		mtx_lock(&sctp_it_ctl.it_mtx);				\
 	} while (0)
 
 #endif
 
-#define SCTP_ITERATOR_UNLOCK()	        mtx_unlock(&SCTP_BASE_INFO(it_mtx))
-#define SCTP_ITERATOR_LOCK_DESTROY()	mtx_destroy(&SCTP_BASE_INFO(it_mtx))
+#define SCTP_ITERATOR_UNLOCK()	        mtx_unlock(&sctp_it_ctl.it_mtx)
+#define SCTP_ITERATOR_LOCK_DESTROY()	mtx_destroy(&sctp_it_ctl.it_mtx)
+
+
+#define SCTP_WQ_ADDR_INIT() do { \
+        mtx_init(&SCTP_BASE_INFO(wq_addr_mtx), "sctp-addr-wq","sctp_addr_wq",MTX_DEF); \
+ } while (0)
+
+#define SCTP_WQ_ADDR_DESTROY() do  { \
+        if(mtx_owned(&SCTP_BASE_INFO(wq_addr_mtx))) { \
+             mtx_unlock(&SCTP_BASE_INFO(wq_addr_mtx)); \
+        } \
+	    mtx_destroy(&SCTP_BASE_INFO(wq_addr_mtx)); \
+      }  while (0)
+
+#define SCTP_WQ_ADDR_LOCK()	do { \
+             mtx_lock(&SCTP_BASE_INFO(wq_addr_mtx));  \
+} while (0)
+#define SCTP_WQ_ADDR_UNLOCK() do { \
+		mtx_unlock(&SCTP_BASE_INFO(wq_addr_mtx)); \
+} while (0)
+
 
 
 #define SCTP_INCR_EP_COUNT() \

Modified: head/sys/netinet/sctp_pcb.c
==============================================================================
--- head/sys/netinet/sctp_pcb.c	Sun May 16 16:54:05 2010	(r208159)
+++ head/sys/netinet/sctp_pcb.c	Sun May 16 17:03:56 2010	(r208160)
@@ -692,13 +692,11 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, vo
 		(void)SCTP_GETTIME_TIMEVAL(&wi->start_time);
 		wi->ifa = sctp_ifap;
 		wi->action = SCTP_ADD_IP_ADDRESS;
-		SCTP_IPI_ITERATOR_WQ_LOCK();
-		/*
-		 * Should this really be a tailq? As it is we will process
-		 * the newest first :-0
-		 */
+
+		SCTP_WQ_ADDR_LOCK();
 		LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr);
-		SCTP_IPI_ITERATOR_WQ_UNLOCK();
+		SCTP_WQ_ADDR_UNLOCK();
+
 		sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
 		    (struct sctp_inpcb *)NULL,
 		    (struct sctp_tcb *)NULL,
@@ -806,13 +804,13 @@ out_now:
 		(void)SCTP_GETTIME_TIMEVAL(&wi->start_time);
 		wi->ifa = sctp_ifap;
 		wi->action = SCTP_DEL_IP_ADDRESS;
-		SCTP_IPI_ITERATOR_WQ_LOCK();
+		SCTP_WQ_ADDR_LOCK();
 		/*
 		 * Should this really be a tailq? As it is we will process
 		 * the newest first :-0
 		 */
 		LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr);
-		SCTP_IPI_ITERATOR_WQ_UNLOCK();
+		SCTP_WQ_ADDR_UNLOCK();
 
 		sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
 		    (struct sctp_inpcb *)NULL,
@@ -3017,57 +3015,68 @@ continue_anyway:
 
 
 static void
-sctp_iterator_inp_being_freed(struct sctp_inpcb *inp, struct sctp_inpcb *inp_next)
+sctp_iterator_inp_being_freed(struct sctp_inpcb *inp)
 {
-	struct sctp_iterator *it;
+	struct sctp_iterator *it, *nit;
 
 	/*
 	 * We enter with the only the ITERATOR_LOCK in place and a write
 	 * lock on the inp_info stuff.
 	 */
-
+	it = sctp_it_ctl.cur_it;
+	if (it && (it->vn != curvnet)) {
+		/* Its not looking at our VNET */
+		return;
+	}
+	if (it && (it->inp == inp)) {
+		/*
+		 * This is tricky and we hold the iterator lock, but when it
+		 * returns and gets the lock (when we release it) the
+		 * iterator will try to operate on inp. We need to stop that
+		 * from happening. But of course the iterator has a
+		 * reference on the stcb and inp. We can mark it and it will
+		 * stop.
+		 * 
+		 * If its a single iterator situation, we set the end iterator
+		 * flag. Otherwise we set the iterator to go to the next
+		 * inp.
+		 * 
+		 */
+		if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+			sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT;
+		} else {
+			sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_INP;
+		}
+	}
 	/*
-	 * Go through all iterators, we must do this since it is possible
-	 * that some iterator does NOT have the lock, but is waiting for it.
-	 * And the one that had the lock has either moved in the last
-	 * iteration or we just cleared it above. We need to find all of
-	 * those guys. The list of iterators should never be very big
-	 * though.
-	 */
-	TAILQ_FOREACH(it, &SCTP_BASE_INFO(iteratorhead), sctp_nxt_itr) {
-		if (it == inp->inp_starting_point_for_iterator)
-			/* skip this guy, he's special */
+	 * Now go through and remove any single reference to our inp that
+	 * may be still pending on the list
+	 */
+	SCTP_IPI_ITERATOR_WQ_LOCK();
+	it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead);
+	while (it) {
+		nit = TAILQ_NEXT(it, sctp_nxt_itr);
+		if (it->vn != curvnet) {
+			it = nit;
 			continue;
+		}
 		if (it->inp == inp) {
-			/*
-			 * This is tricky and we DON'T lock the iterator.
-			 * Reason is he's running but waiting for me since
-			 * inp->inp_starting_point_for_iterator has the lock
-			 * on me (the guy above we skipped). This tells us
-			 * its is not running but waiting for
-			 * inp->inp_starting_point_for_iterator to be
-			 * released by the guy that does have our INP in a
-			 * lock.
-			 */
+			/* This one points to me is it inp specific? */
 			if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
-				it->inp = NULL;
-				it->stcb = NULL;
+				/* Remove and free this one */
+				TAILQ_REMOVE(&sctp_it_ctl.iteratorhead,
+				    it, sctp_nxt_itr);
+				if (it->function_atend != NULL) {
+					(*it->function_atend) (it->pointer, it->val);
+				}
+				SCTP_FREE(it, SCTP_M_ITER);
 			} else {
-				/* set him up to do the next guy not me */
-				it->inp = inp_next;
-				it->stcb = NULL;
+				it->inp = LIST_NEXT(it->inp, sctp_list);
 			}
 		}
+		it = nit;
 	}
-	it = inp->inp_starting_point_for_iterator;
-	if (it) {
-		if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
-			it->inp = NULL;
-		} else {
-			it->inp = inp_next;
-		}
-		it->stcb = NULL;
-	}
+	SCTP_IPI_ITERATOR_WQ_UNLOCK();
 }
 
 /* release sctp_inpcb unbind the port */
@@ -3083,7 +3092,6 @@ sctp_inpcb_free(struct sctp_inpcb *inp, 
 	 * all associations. d) finally the ep itself.
 	 */
 	struct sctp_pcb *m;
-	struct sctp_inpcb *inp_save;
 	struct sctp_tcb *asoc, *nasoc;
 	struct sctp_laddr *laddr, *nladdr;
 	struct inpcb *ip_pcb;
@@ -3100,6 +3108,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, 
 	sctp_log_closing(inp, NULL, 0);
 #endif
 	SCTP_ITERATOR_LOCK();
+
 	so = inp->sctp_socket;
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
 		/* been here before.. eeks.. get out of here */
@@ -3126,6 +3135,9 @@ sctp_inpcb_free(struct sctp_inpcb *inp, 
 		inp->sctp_flags |= SCTP_PCB_FLAGS_DONT_WAKE;
 		inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT;
 		inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT;
+
+		/* mark any iterators on the list or being processed */
+		sctp_iterator_inp_being_freed(inp);
 	}
 	sctp_timer_stop(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL,
 	    SCTP_FROM_SCTP_PCB + SCTP_LOC_1);
@@ -3494,11 +3506,8 @@ sctp_inpcb_free(struct sctp_inpcb *inp, 
 		shared_key = LIST_FIRST(&inp->sctp_ep.shared_keys);
 	}
 
-	inp_save = LIST_NEXT(inp, sctp_list);
 	LIST_REMOVE(inp, sctp_list);
 
-	/* fix any iterators only after out of the list */
-	sctp_iterator_inp_being_freed(inp, inp_save);
 	/*
 	 * if we have an address list the following will free the list of
 	 * ifaddr's that are set into this ep. Again macro limitations here,
@@ -5436,8 +5445,6 @@ sctp_pcb_init()
 	/* init the empty list of (All) Endpoints */
 	LIST_INIT(&SCTP_BASE_INFO(listhead));
 
-	/* init the iterator head */
-	TAILQ_INIT(&SCTP_BASE_INFO(iteratorhead));
 
 	/* init the hash table of endpoints */
 	TUNABLE_INT_FETCH("net.inet.sctp.tcbhashsize", &SCTP_BASE_SYSCTL(sctp_hashtblsize));
@@ -5500,16 +5507,15 @@ sctp_pcb_init()
 	/* Master Lock INIT for info structure */
 	SCTP_INP_INFO_LOCK_INIT();
 	SCTP_STATLOG_INIT_LOCK();
-	SCTP_ITERATOR_LOCK_INIT();
 
 	SCTP_IPI_COUNT_INIT();
 	SCTP_IPI_ADDR_INIT();
-	SCTP_IPI_ITERATOR_WQ_INIT();
 #ifdef SCTP_PACKET_LOGGING
 	SCTP_IP_PKTLOG_INIT();
 #endif
 	LIST_INIT(&SCTP_BASE_INFO(addr_wq));
 
+	SCTP_WQ_ADDR_INIT();
 	/* not sure if we need all the counts */
 	SCTP_BASE_INFO(ipi_count_ep) = 0;
 	/* assoc/tcb zone info */
@@ -5537,11 +5543,7 @@ sctp_pcb_init()
 		LIST_INIT(&SCTP_BASE_INFO(vtag_timewait)[i]);
 	}
 
-#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
-	SCTP_BASE_INFO(iterator_running) = 0;
-	SCTP_BASE_INFO(threads_must_exit) = 0;
 	sctp_startup_iterator();
-#endif
 
 	/*
 	 * INIT the default VRF which for BSD is the only one, other O/S's
@@ -5565,30 +5567,49 @@ sctp_pcb_finish(void)
 	struct sctpvtaghead *chain;
 	struct sctp_tagblock *twait_block, *prev_twait_block;
 	struct sctp_laddr *wi;
-	struct sctp_iterator *it;
 	int i;
 
-#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
-	SCTP_BASE_INFO(threads_must_exit) = 1;
-	/* Wake the thread up so it will exit now */
-	sctp_wakeup_iterator();
+	/*
+	 * Free BSD the it thread never exits but we do clean up. The only
+	 * way freebsd reaches here if we have VRF's but we still add the
+	 * ifdef to make it compile on old versions.
+	 */
+	{
+		struct sctp_iterator *it, *nit;
+
+		SCTP_IPI_ITERATOR_WQ_LOCK();
+		it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead);
+		while (it) {
+			nit = TAILQ_NEXT(it, sctp_nxt_itr);
+			if (it->vn != curvnet) {
+				it = nit;
+				continue;
+			}
+			TAILQ_REMOVE(&sctp_it_ctl.iteratorhead,
+			    it, sctp_nxt_itr);
+			if (it->function_atend != NULL) {
+				(*it->function_atend) (it->pointer, it->val);
+			}
+			SCTP_FREE(it, SCTP_M_ITER);
+			it = nit;
+		}
+		SCTP_IPI_ITERATOR_WQ_UNLOCK();
+		SCTP_ITERATOR_LOCK();
+		if ((sctp_it_ctl.cur_it) &&
+		    (sctp_it_ctl.cur_it->vn == curvnet)) {
+			sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT;
+		}
+		SCTP_ITERATOR_UNLOCK();
+	}
 
-#endif
 	SCTP_OS_TIMER_STOP(&SCTP_BASE_INFO(addr_wq_timer.timer));
-	SCTP_IPI_ITERATOR_WQ_LOCK();
+	SCTP_WQ_ADDR_LOCK();
 	while ((wi = LIST_FIRST(&SCTP_BASE_INFO(addr_wq))) != NULL) {
 		LIST_REMOVE(wi, sctp_nxt_addr);
 		SCTP_DECR_LADDR_COUNT();
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), wi);
 	}
-	SCTP_IPI_ITERATOR_WQ_UNLOCK();
-	while ((it = TAILQ_FIRST(&SCTP_BASE_INFO(iteratorhead))) != NULL) {
-		if (it->function_atend != NULL) {
-			(*it->function_atend) (it->pointer, it->val);
-		}
-		TAILQ_REMOVE(&SCTP_BASE_INFO(iteratorhead), it, sctp_nxt_itr);
-		SCTP_FREE(it, SCTP_M_ITER);
-	}
+	SCTP_WQ_ADDR_UNLOCK();
 
 	/*
 	 * free the vrf/ifn/ifa lists and hashes (be sure address monitor is
@@ -5640,10 +5661,11 @@ sctp_pcb_finish(void)
 	SCTP_IP_PKTLOG_DESTROY();
 #endif
 	SCTP_IPI_ADDR_DESTROY();
-	SCTP_ITERATOR_LOCK_DESTROY();
 	SCTP_STATLOG_DESTROY();
 	SCTP_INP_INFO_LOCK_DESTROY();
 
+	SCTP_WQ_ADDR_DESTROY();
+
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_ep));
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asoc));
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_laddr));
@@ -6632,6 +6654,7 @@ sctp_initiate_iterator(inp_func inpf,
 	it->asoc_state = asoc_state;
 	it->function_inp_end = inpe;
 	it->no_chunk_output = chunk_output_off;
+	it->vn = curvnet;
 	if (s_inp) {
 		it->inp = s_inp;
 		it->iterator_flags = SCTP_ITERATOR_DO_SINGLE_INP;
@@ -6647,22 +6670,11 @@ sctp_initiate_iterator(inp_func inpf,
 	if (it->inp) {
 		SCTP_INP_INCR_REF(it->inp);
 	}
-	TAILQ_INSERT_TAIL(&SCTP_BASE_INFO(iteratorhead), it, sctp_nxt_itr);
-#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
-	if (SCTP_BASE_INFO(iterator_running) == 0) {
+	TAILQ_INSERT_TAIL(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
+	if (sctp_it_ctl.iterator_running == 0) {
 		sctp_wakeup_iterator();
 	}
 	SCTP_IPI_ITERATOR_WQ_UNLOCK();
-#else
-	if (it->inp)
-		SCTP_INP_DECR_REF(it->inp);
-	SCTP_IPI_ITERATOR_WQ_UNLOCK();
-	/* Init the timer */
-	SCTP_OS_TIMER_INIT(&it->tmr.timer);
-	/* add to the list of all iterators */
-	sctp_timer_start(SCTP_TIMER_TYPE_ITERATOR, (struct sctp_inpcb *)it,
-	    NULL, NULL);
-#endif
 	/* sa_ignore MEMLEAK {memory is put on the tailq for the iterator} */
 	return (0);
 }

Modified: head/sys/netinet/sctp_pcb.h
==============================================================================
--- head/sys/netinet/sctp_pcb.h	Sun May 16 16:54:05 2010	(r208159)
+++ head/sys/netinet/sctp_pcb.h	Sun May 16 17:03:56 2010	(r208160)
@@ -177,8 +177,6 @@ struct sctp_epinfo {
 	struct sctppcbhead listhead;
 	struct sctpladdr addr_wq;
 
-	struct sctpiterators iteratorhead;
-	int threads_must_exit;
 	/* ep zone info */
 	sctp_zone_t ipi_zone_ep;
 	sctp_zone_t ipi_zone_asoc;
@@ -191,10 +189,10 @@ struct sctp_epinfo {
 	sctp_zone_t ipi_zone_asconf_ack;
 
 	struct rwlock ipi_ep_mtx;
-	struct mtx it_mtx;
 	struct mtx ipi_iterator_wq_mtx;
 	struct rwlock ipi_addr_mtx;
 	struct mtx ipi_pktlog_mtx;
+	struct mtx wq_addr_mtx;
 	uint32_t ipi_count_ep;
 
 	/* assoc/tcb zone info */
@@ -228,14 +226,9 @@ struct sctp_epinfo {
 	uint32_t ipi_free_chunks;
 	uint32_t ipi_free_strmoq;
 
-
 	struct sctpvtaghead vtag_timewait[SCTP_STACK_VTAG_HASH_SIZE];
 
 	/* address work queue handling */
-#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
-	uint32_t iterator_running;
-	SCTP_PROCESS_STRUCT thread_proc;
-#endif
 	struct sctp_timer addr_wq_timer;
 
 };

Modified: head/sys/netinet/sctp_structs.h
==============================================================================
--- head/sys/netinet/sctp_structs.h	Sun May 16 16:54:05 2010	(r208159)
+++ head/sys/netinet/sctp_structs.h	Sun May 16 17:03:56 2010	(r208160)
@@ -108,9 +108,11 @@ typedef void (*end_func) (void *ptr, uin
 
 struct sctp_iterator {
 	TAILQ_ENTRY(sctp_iterator) sctp_nxt_itr;
+	struct vnet *vn;
 	struct sctp_timer tmr;
 	struct sctp_inpcb *inp;	/* current endpoint */
 	struct sctp_tcb *stcb;	/* current* assoc */
+	struct sctp_inpcb *next_inp;	/* special hook to skip to */
 	asoc_func function_assoc;	/* per assoc function */
 	inp_func function_inp;	/* per endpoint function */
 	inp_func function_inp_end;	/* end INP function */
@@ -129,6 +131,7 @@ struct sctp_iterator {
 #define SCTP_ITERATOR_DO_ALL_INP	0x00000001
 #define SCTP_ITERATOR_DO_SINGLE_INP	0x00000002
 
+
 TAILQ_HEAD(sctpiterators, sctp_iterator);
 
 struct sctp_copy_all {
@@ -145,6 +148,20 @@ struct sctp_asconf_iterator {
 	int cnt;
 };
 
+struct iterator_control {
+	struct mtx ipi_iterator_wq_mtx;
+	struct mtx it_mtx;
+	SCTP_PROCESS_STRUCT thread_proc;
+	struct sctpiterators iteratorhead;
+	struct sctp_iterator *cur_it;
+	uint32_t iterator_running;
+	uint32_t iterator_flags;
+};
+
+#define SCTP_ITERATOR_MUST_EXIT   	0x00000001
+#define SCTP_ITERATOR_STOP_CUR_IT  	0x00000002
+#define SCTP_ITERATOR_STOP_CUR_INP  	0x00000004
+
 struct sctp_net_route {
 	sctp_rtentry_t *ro_rt;
 	void *ro_lle;

Modified: head/sys/netinet/sctp_timer.c
==============================================================================
--- head/sys/netinet/sctp_timer.c	Sun May 16 16:54:05 2010	(r208159)
+++ head/sys/netinet/sctp_timer.c	Sun May 16 17:03:56 2010	(r208160)
@@ -1880,143 +1880,3 @@ sctp_autoclose_timer(struct sctp_inpcb *
 		}
 	}
 }
-
-void
-sctp_iterator_timer(struct sctp_iterator *it)
-{
-	int iteration_count = 0;
-	int inp_skip = 0;
-
-	/*
-	 * only one iterator can run at a time. This is the only way we can
-	 * cleanly pull ep's from underneath all the running interators when
-	 * a ep is freed.
-	 */
-	SCTP_ITERATOR_LOCK();
-	if (it->inp == NULL) {
-		/* iterator is complete */
-done_with_iterator:
-		SCTP_ITERATOR_UNLOCK();
-		SCTP_INP_INFO_WLOCK();
-		TAILQ_REMOVE(&SCTP_BASE_INFO(iteratorhead), it, sctp_nxt_itr);
-		/* stopping the callout is not needed, in theory */
-		SCTP_INP_INFO_WUNLOCK();
-		(void)SCTP_OS_TIMER_STOP(&it->tmr.timer);
-		if (it->function_atend != NULL) {
-			(*it->function_atend) (it->pointer, it->val);
-		}
-		SCTP_FREE(it, SCTP_M_ITER);
-		return;
-	}
-select_a_new_ep:
-	SCTP_INP_WLOCK(it->inp);
-	while (((it->pcb_flags) &&
-	    ((it->inp->sctp_flags & it->pcb_flags) != it->pcb_flags)) ||
-	    ((it->pcb_features) &&
-	    ((it->inp->sctp_features & it->pcb_features) != it->pcb_features))) {
-		/* endpoint flags or features don't match, so keep looking */
-		if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
-			SCTP_INP_WUNLOCK(it->inp);
-			goto done_with_iterator;
-		}
-		SCTP_INP_WUNLOCK(it->inp);
-		it->inp = LIST_NEXT(it->inp, sctp_list);
-		if (it->inp == NULL) {
-			goto done_with_iterator;
-		}
-		SCTP_INP_WLOCK(it->inp);
-	}
-	if ((it->inp->inp_starting_point_for_iterator != NULL) &&
-	    (it->inp->inp_starting_point_for_iterator != it)) {
-		SCTP_PRINTF("Iterator collision, waiting for one at %p\n",
-		    it->inp);
-		SCTP_INP_WUNLOCK(it->inp);
-		goto start_timer_return;
-	}
-	/* mark the current iterator on the endpoint */
-	it->inp->inp_starting_point_for_iterator = it;
-	SCTP_INP_WUNLOCK(it->inp);
-	SCTP_INP_RLOCK(it->inp);
-	/* now go through each assoc which is in the desired state */
-	if (it->done_current_ep == 0) {
-		if (it->function_inp != NULL)
-			inp_skip = (*it->function_inp) (it->inp, it->pointer, it->val);
-		it->done_current_ep = 1;
-	}
-	if (it->stcb == NULL) {
-		/* run the per instance function */
-		it->stcb = LIST_FIRST(&it->inp->sctp_asoc_list);
-	}
-	SCTP_INP_RUNLOCK(it->inp);
-	if ((inp_skip) || it->stcb == NULL) {
-		if (it->function_inp_end != NULL) {
-			inp_skip = (*it->function_inp_end) (it->inp,
-			    it->pointer,
-			    it->val);
-		}
-		goto no_stcb;
-	}
-	if ((it->stcb) &&
-	    (it->stcb->asoc.stcb_starting_point_for_iterator == it)) {
-		it->stcb->asoc.stcb_starting_point_for_iterator = NULL;
-	}
-	while (it->stcb) {
-		SCTP_TCB_LOCK(it->stcb);
-		if (it->asoc_state && ((it->stcb->asoc.state & it->asoc_state) != it->asoc_state)) {
-			/* not in the right state... keep looking */
-			SCTP_TCB_UNLOCK(it->stcb);
-			goto next_assoc;
-		}
-		/* mark the current iterator on the assoc */
-		it->stcb->asoc.stcb_starting_point_for_iterator = it;
-		/* see if we have limited out the iterator loop */
-		iteration_count++;
-		if (iteration_count > SCTP_ITERATOR_MAX_AT_ONCE) {
-	start_timer_return:
-			/* set a timer to continue this later */
-			if (it->stcb)
-				SCTP_TCB_UNLOCK(it->stcb);
-			sctp_timer_start(SCTP_TIMER_TYPE_ITERATOR,
-			    (struct sctp_inpcb *)it, NULL, NULL);
-			SCTP_ITERATOR_UNLOCK();
-			return;
-		}
-		/* run function on this one */
-		(*it->function_assoc) (it->inp, it->stcb, it->pointer, it->val);
-
-		/*
-		 * we lie here, it really needs to have its own type but
-		 * first I must verify that this won't effect things :-0
-		 */
-		if (it->no_chunk_output == 0)
-			sctp_chunk_output(it->inp, it->stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
-
-		SCTP_TCB_UNLOCK(it->stcb);
-next_assoc:
-		it->stcb = LIST_NEXT(it->stcb, sctp_tcblist);
-		if (it->stcb == NULL) {
-			if (it->function_inp_end != NULL) {
-				inp_skip = (*it->function_inp_end) (it->inp,
-				    it->pointer,
-				    it->val);
-			}
-		}
-	}
-no_stcb:
-	/* done with all assocs on this endpoint, move on to next endpoint */
-	it->done_current_ep = 0;
-	SCTP_INP_WLOCK(it->inp);
-	it->inp->inp_starting_point_for_iterator = NULL;
-	SCTP_INP_WUNLOCK(it->inp);
-	if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
-		it->inp = NULL;
-	} else {
-		SCTP_INP_INFO_RLOCK();
-		it->inp = LIST_NEXT(it->inp, sctp_list);
-		SCTP_INP_INFO_RUNLOCK();
-	}
-	if (it->inp == NULL) {
-		goto done_with_iterator;
-	}
-	goto select_a_new_ep;
-}

Modified: head/sys/netinet/sctputil.c
==============================================================================
--- head/sys/netinet/sctputil.c	Sun May 16 16:54:05 2010	(r208159)
+++ head/sys/netinet/sctputil.c	Sun May 16 17:03:56 2010	(r208160)
@@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet/sctp_auth.h>
 #include <netinet/sctp_asconf.h>
 #include <netinet/sctp_cc_functions.h>
+#include <netinet/sctp_bsd_addr.h>
 
 #define NUMBER_OF_MTU_SIZES 18
 
@@ -1255,7 +1256,6 @@ sctp_expand_mapping_array(struct sctp_as
 }
 
 
-#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
 static void
 sctp_iterator_work(struct sctp_iterator *it)
 {
@@ -1277,27 +1277,23 @@ done_with_iterator:
 		return;
 	}
 select_a_new_ep:
-	SCTP_INP_WLOCK(it->inp);
+	SCTP_INP_RLOCK(it->inp);
 	while (((it->pcb_flags) &&
 	    ((it->inp->sctp_flags & it->pcb_flags) != it->pcb_flags)) ||
 	    ((it->pcb_features) &&
 	    ((it->inp->sctp_features & it->pcb_features) != it->pcb_features))) {
 		/* endpoint flags or features don't match, so keep looking */
 		if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
-			SCTP_INP_WUNLOCK(it->inp);
+			SCTP_INP_RUNLOCK(it->inp);
 			goto done_with_iterator;
 		}
-		SCTP_INP_WUNLOCK(it->inp);
+		SCTP_INP_RUNLOCK(it->inp);
 		it->inp = LIST_NEXT(it->inp, sctp_list);
 		if (it->inp == NULL) {
 			goto done_with_iterator;
 		}
-		SCTP_INP_WLOCK(it->inp);
+		SCTP_INP_RLOCK(it->inp);
 	}
-
-	SCTP_INP_WUNLOCK(it->inp);
-	SCTP_INP_RLOCK(it->inp);
-
 	/* now go through each assoc which is in the desired state */
 	if (it->done_current_ep == 0) {
 		if (it->function_inp != NULL)
@@ -1330,13 +1326,34 @@ select_a_new_ep:
 			/* Pause to let others grab the lock */
 			atomic_add_int(&it->stcb->asoc.refcnt, 1);
 			SCTP_TCB_UNLOCK(it->stcb);
-
 			SCTP_INP_INCR_REF(it->inp);
 			SCTP_INP_RUNLOCK(it->inp);
 			SCTP_ITERATOR_UNLOCK();
 			SCTP_ITERATOR_LOCK();
+			if (sctp_it_ctl.iterator_flags) {
+				/* We won't be staying here */
+				SCTP_INP_DECR_REF(it->inp);
+				atomic_add_int(&it->stcb->asoc.refcnt, -1);
+				if (sctp_it_ctl.iterator_flags &
+				    SCTP_ITERATOR_MUST_EXIT) {
+					goto done_with_iterator;
+				}
+				if (sctp_it_ctl.iterator_flags &
+				    SCTP_ITERATOR_STOP_CUR_IT) {
+					sctp_it_ctl.iterator_flags &= ~SCTP_ITERATOR_STOP_CUR_IT;
+					goto done_with_iterator;
+				}
+				if (sctp_it_ctl.iterator_flags &
+				    SCTP_ITERATOR_STOP_CUR_INP) {
+					sctp_it_ctl.iterator_flags &= ~SCTP_ITERATOR_STOP_CUR_INP;
+					goto no_stcb;
+				}
+				/* If we reach here huh? */
+				printf("Unknown it ctl flag %x\n",
+				    sctp_it_ctl.iterator_flags);
+				sctp_it_ctl.iterator_flags = 0;
+			}
 			SCTP_INP_RLOCK(it->inp);
-
 			SCTP_INP_DECR_REF(it->inp);
 			SCTP_TCB_LOCK(it->stcb);
 			atomic_add_int(&it->stcb->asoc.refcnt, -1);
@@ -1368,8 +1385,6 @@ next_assoc:
 no_stcb:
 	/* done with all assocs on this endpoint, move on to next endpoint */
 	it->done_current_ep = 0;
-	SCTP_INP_WLOCK(it->inp);
-	SCTP_INP_WUNLOCK(it->inp);
 	if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
 		it->inp = NULL;
 	} else {
@@ -1390,27 +1405,28 @@ sctp_iterator_worker(void)
 
 	/* This function is called with the WQ lock in place */
 
-	SCTP_BASE_INFO(iterator_running) = 1;
-again:
-	it = TAILQ_FIRST(&SCTP_BASE_INFO(iteratorhead));
+	sctp_it_ctl.iterator_running = 1;
+	sctp_it_ctl.cur_it = it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead);
 	while (it) {
 		/* now lets work on this one */
-		TAILQ_REMOVE(&SCTP_BASE_INFO(iteratorhead), it, sctp_nxt_itr);
+		TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
 		SCTP_IPI_ITERATOR_WQ_UNLOCK();
+		CURVNET_SET(it->vn);
 		sctp_iterator_work(it);
+
+		CURVNET_RESTORE();
 		SCTP_IPI_ITERATOR_WQ_LOCK();
+		if (sctp_it_ctl.iterator_flags & SCTP_ITERATOR_MUST_EXIT) {
+			sctp_it_ctl.cur_it = NULL;
+			break;
+		}
 		/* sa_ignore FREED_MEMORY */
-		it = TAILQ_FIRST(&SCTP_BASE_INFO(iteratorhead));
-	}
-	if (TAILQ_FIRST(&SCTP_BASE_INFO(iteratorhead))) {
-		goto again;
+		sctp_it_ctl.cur_it = it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead);
 	}
-	SCTP_BASE_INFO(iterator_running) = 0;
+	sctp_it_ctl.iterator_running = 0;
 	return;
 }
 
-#endif
-
 
 static void
 sctp_handle_addr_wq(void)
@@ -1431,7 +1447,8 @@ sctp_handle_addr_wq(void)
 	}
 	LIST_INIT(&asc->list_of_work);
 	asc->cnt = 0;
-	SCTP_IPI_ITERATOR_WQ_LOCK();
+
+	SCTP_WQ_ADDR_LOCK();
 	wi = LIST_FIRST(&SCTP_BASE_INFO(addr_wq));
 	while (wi != NULL) {
 		LIST_REMOVE(wi, sctp_nxt_addr);
@@ -1439,7 +1456,8 @@ sctp_handle_addr_wq(void)
 		asc->cnt++;
 		wi = LIST_FIRST(&SCTP_BASE_INFO(addr_wq));
 	}
-	SCTP_IPI_ITERATOR_WQ_UNLOCK();
+	SCTP_WQ_ADDR_UNLOCK();
+
 	if (asc->cnt == 0) {
 		SCTP_FREE(asc, SCTP_M_ASC_IT);
 	} else {
@@ -1470,7 +1488,6 @@ sctp_timeout_handler(void *t)
 
 #endif
 	int did_output, type;
-	struct sctp_iterator *it = NULL;
 
 	tmr = (struct sctp_timer *)t;
 	inp = (struct sctp_inpcb *)tmr->ep;
@@ -1509,10 +1526,6 @@ sctp_timeout_handler(void *t)
 	}
 	/* if this is an iterator timeout, get the struct and clear inp */
 	tmr->stopped_from = 0xa003;
-	if (tmr->type == SCTP_TIMER_TYPE_ITERATOR) {
-		it = (struct sctp_iterator *)inp;
-		inp = NULL;
-	}
 	type = tmr->type;
 	if (inp) {
 		SCTP_INP_INCR_REF(inp);
@@ -1611,10 +1624,6 @@ sctp_timeout_handler(void *t)
 	case SCTP_TIMER_TYPE_ADDR_WQ:
 		sctp_handle_addr_wq();
 		break;
-	case SCTP_TIMER_TYPE_ITERATOR:
-		SCTP_STAT_INCR(sctps_timoiterator);
-		sctp_iterator_timer(it);
-		break;
 	case SCTP_TIMER_TYPE_SEND:
 		if ((stcb == NULL) || (inp == NULL)) {
 			break;
@@ -1962,15 +1971,6 @@ sctp_timer_start(int t_type, struct sctp
 		tmr = &SCTP_BASE_INFO(addr_wq_timer);
 		to_ticks = SCTP_ADDRESS_TICK_DELAY;
 		break;
-	case SCTP_TIMER_TYPE_ITERATOR:
-		{
-			struct sctp_iterator *it;
-
-			it = (struct sctp_iterator *)inp;
-			tmr = &it->tmr;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201005161703.o4GH3ug7061710>