Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 16 Jan 2010 08:23:09 +0000 (UTC)
From:      Luigi Rizzo <luigi@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r202433 - user/luigi/ipfw3-head/sys/netinet/ipfw
Message-ID:  <201001160823.o0G8N9wn039334@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: luigi
Date: Sat Jan 16 08:23:09 2010
New Revision: 202433
URL: http://svn.freebsd.org/changeset/base/202433

Log:
  import initial wf2q+ code ported by Riccardo Panicucci.
  Code not working yet.

Modified:
  user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c
  user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c
  user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c

Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c	Sat Jan 16 08:12:22 2010	(r202432)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c	Sat Jan 16 08:23:09 2010	(r202433)
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa
  * All rights reserved
  *
  * Redistribution and use in source and binary forms, with or without
@@ -24,7 +25,6 @@
  * SUCH DAMAGE.
  */
 
-#ifdef _KERNEL
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -35,60 +35,354 @@
 #include <netinet/in.h>
 #include <netinet/ip_var.h>		/* ipfw_rule_ref */
 #include <netinet/ip_fw.h>	/* flow_id */
-#else
-#include "dn_test.h"
-#endif
 #include <netinet/ip_dummynet.h>
 #include <netinet/ipfw/dn_heap.h>
 #include <netinet/ipfw/ip_dn_private.h>
 #include <netinet/ipfw/dn_sched.h>
 
+#ifndef MAX64
+#define MAX64(x,y)  (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
+#endif
+
+#ifndef MY_M
+#define MY_M    16 /* shift for fixed point arithmetic */
+#endif
+
+#ifndef isdigit
+#define isdigit(x)  ((x) >= '0' && (x) <= '9' )
+#endif
+
+/*
+ * Private information for the scheduler instance:
+ * sch_heap (key is Finish time) returns the next queue to serve
+ * ne_heap (key is Start time) stores not-eligible queues
+ * idle_heap (key=start/finish time) stores idle flows. It must
+ *	support extract-from-middle.
+ * A flow is normally only in 1 of the three heaps.
+ * XXX todo: use a more efficient data structure, e.g. a tree sorted
+ * by F with min_subtree(S) in each node
+ */
+struct wf2qp_si {
+    struct dn_heap *sch_heap;      /* top extract - key Finish  time */
+    struct dn_heap *ne_heap;   /* top extract - key Start   time */
+    struct dn_heap *idle_heap;   /* random extract - key Start=Finish time */
+    dn_key V ;                  /* virtual time */
+    int sum;
+};
+
+struct wf2qp_queue {
+    struct new_queue g;
+
+    dn_key S,F;                /* start time, finish time */
+
+    int heap_pos; /* position (index) of struct in heap */
+//     struct wf2qp_queue *next; /* next queue in the bucket */
+};
+
 /*
- * This file implements a FIFO scheduler for a single queue.
- * The queue is allocated as part of the scheduler instance,
- * and there is a single flowset is in the template which stores
- * queue-related information.
- * No parameters are used except queue sizes and management policy.
- * Enqueue and dequeue use the default library functions.
+ * This file implements a WF2Q+ scheduler as it has been in dummynet
+ * since 2000.
+ * The scheduler supports per-flow queues and has O(log N) complexity.
  */
+
 static int 
-fifo_enqueue(struct new_sch_inst *si, struct new_queue *q, struct mbuf *m)
+wf2qp_enqueue(struct new_sch_inst *_si, struct new_queue *q, struct mbuf *m)
 {
-	return dn_enqueue((struct new_queue *)(si+1), m, 0);
+    struct new_fsk *fs = q->fs;
+    struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+    struct wf2qp_queue *alg_fq;
+    uint64_t len = m->m_pkthdr.len;
+
+    int q_was_idle = 0;
+
+    if (q->mq.head == NULL)
+        q_was_idle = 1;
+
+    if (dn_enqueue(q, m, 0)) /* packet was dropped */
+        return 1;
+
+    if(!q_was_idle)
+        return 0;
+
+    /* If reach this point, queue q was idle */ 
+    alg_fq = (struct wf2qp_queue *)q;
+
+    if (DN_KEY_LT(alg_fq->F, alg_fq->S)) {
+        /* F<S means timestamps are invalid ->brand new queue. */
+        alg_fq->S = si->V;
+        si->sum += fs->fs.weight; /* Add weight of new queue. */
+    } else { /* if it was idle then it was in the idle heap */
+        heap_extract(si->idle_heap, q);
+        alg_fq->S = MAX64(alg_fq->F, si->V);
+    }
+    // XXX use div64
+    alg_fq->F = alg_fq->S + (len << MY_M) / (uint64_t)fs->fs.weight;
+
+    if (si->ne_heap->elements == 0 && si->sch_heap->elements == 0)
+        si->V = MAX64(alg_fq->S, si->V);
+
+    /*
+     * Look at eligibility. A flow is not eligibile if S>V (when
+     * this happens, it means that there is some other flow already
+     * scheduled for the same pipe, so the sch_heap cannot be
+     * empty). If the flow is not eligible we just store it in the
+     * ne_heap. Otherwise, we store in the sch_heap.
+     * Note that for all flows in sch_heap (SCH), S_i <= V,
+     * and for all flows in ne_heap (NEH), S_i > V.
+     * So when we need to compute max(V, min(S_i)) forall i in
+     * SCH+NEH, we only need to look into NEH.
+     */
+    if (DN_KEY_LT(si->V, alg_fq->S)) {
+        /* S>V means flow Not eligible. */
+        if (si->sch_heap->elements == 0)
+            printf("dummynet: ++ ouch! not eligible but empty scheduler!\n");
+        heap_insert(si->ne_heap, alg_fq->S, q);
+    }
+    else {
+        heap_insert(si->sch_heap, alg_fq->F, q);
+        /* Pipe *must* be idle. */
+#if 0
+        if ((si->sch_heap.elements != 1) && (s->numbytes >= 0))
+            printf("dummynet: OUCH! pipe should have been idle!\n");
+        DPRINTF(("dummynet: waking up pipe %d at %d\n",
+                  s->sched_nr, (int)(alg_fq->F >> MY_M)));
+#endif
+    }
+    return 0;
 }
 
 static struct mbuf *
-fifo_dequeue(struct new_sch_inst *si)
+wf2qp_dequeue(struct new_sch_inst *_si)
+{
+    /* Access scheduler instance private data */
+    struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+    
+    struct mbuf *pkt = NULL;
+    struct new_queue *qq, *q1, *q = NULL;
+
+    struct dn_heap *sch = si->sch_heap;
+    struct dn_heap *neh = si->ne_heap;
+    
+    struct wf2qp_queue *alg_fq = NULL;
+
+    uint64_t len;
+
+    /* XXX:
+     * This block in the previous version were done in the dummynet_task()
+     * function. Check if this is the appropriate function to do it.
+     */
+    if (si->idle_heap->elements > 0 &&
+		DN_KEY_LT(HEAP_TOP(si->idle_heap)->key, si->V)) {
+        qq = HEAP_TOP(si->idle_heap)->object;
+        alg_fq = (struct wf2qp_queue *) qq;
+
+        heap_extract(si->idle_heap, NULL);
+        /* Mark timestamp as invalid. */
+        /* XXX why don't delete the queue now? */
+        alg_fq->S = alg_fq->F + 1;
+        si->sum -= qq->fs->fs.weight;
+    }
+
+
+#if 0
+    if (p->if_name[0] == 0) {   /* tx clock is simulated */
+        s->numbytes += (curr_time - s->sched_time) * p->bandwidth;
+    }
+    else {
+        /* tx clock is for real, the ifq must be empty or this is a NOP. */
+        if (p->ifp && p->ifp->if_snd.ifq_head != NULL)
+            return NULL;
+         else {
+            DPRINTF(("dummynet: pipe %d ready from %s --\n",
+                        p->pipe_nr, p->if_name));
+        }
+    }
+#endif
+    /*
+     * While we have backlogged traffic AND credit, we need to do
+     * something on the queue.
+     * XXX cannot be a 'while --
+     */
+    while ( sch->elements > 0 || neh->elements > 0) {
+        if (sch->elements > 0) {
+            /* Have some eligible pkts to send out. */
+            q = HEAP_TOP(sch)->object;
+            alg_fq = (struct wf2qp_queue *)q;
+            
+            pkt = q->mq.head;
+            
+            heap_extract(sch, NULL); /* Remove queue from heap. */
+
+            si->V += (pkt->m_pkthdr.len << MY_M) / si->sum;   /* Update V. */
+            alg_fq->S = alg_fq->F;  /* Update start time. */
+            
+            if (q->ni.length == 1)     /* Flow not backlogged any more. */
+                heap_insert(si->idle_heap, alg_fq->F, q);
+            else {
+                /* Still backlogged. */
+
+                /*
+                 * Update F and position in backlogged queue,
+                 * then put flow in ne_heap
+                 * (we will fix this later).
+                 */
+                /* len = (q->head)->m_pkthdr.len; */
+                len = ((q->mq.head)->m_nextpkt)->m_pkthdr.len;
+                alg_fq->F += (len << MY_M) / (uint64_t)q->fs->fs.weight;
+                if (DN_KEY_LEQ(alg_fq->S, si->V)) {
+                    heap_insert(neh, alg_fq->S, q);
+                }
+                else {
+                    heap_insert(sch, alg_fq->F, q);
+                }
+            }
+        }
+        /*
+         * Now compute V = max(V, min(S_i)). Remember that all elements
+         * in sch have by definition S_i <= V so if sch is not empty,
+         * V is surely the max and we must not update it. Conversely,
+         * if sch is empty we only need to look at neh.
+         */
+        if (sch->elements == 0 && neh->elements > 0)
+            si->V = MAX64(si->V, HEAP_TOP(neh)->key);
+        /* Move from neh to sch any packets that have become eligible */
+        while (neh->elements > 0 && DN_KEY_LEQ(HEAP_TOP(neh)->key, si->V)) {
+            qq = HEAP_TOP(neh)->object;
+            alg_fq = (struct wf2qp_queue *)qq;
+            heap_extract(neh, NULL);
+            heap_insert(sch, alg_fq->F, qq);
+        }
+#if 0
+        if (p->if_name[0] != '\0') { /* Tx clock is from a real thing */
+            s->numbytes = -1;   /* Mark not ready for I/O. */ /* WARNING*/
+            debMes("  %s f\n", __FUNCTION__);
+            break;
+        }
+#endif
+        if (pkt != NULL)
+            return dn_dequeue(q);
+
+    } /* end of while */
+
+    if (sch->elements == 0 && neh->elements == 0 && 
+                                    si->idle_heap->elements > 0) {
+        /*
+         * No traffic and no events scheduled.
+         * We can get rid of idle-heap.
+         */
+        int i;
+        int elem = si->idle_heap->elements;
+
+        for (i = 0; i < elem; i++) {
+            /* XXX NOTE: before the heap changed, this block extracted
+             *           all elements
+             */
+            q1 = HEAP_TOP(si->idle_heap)->object;
+            heap_extract(si->idle_heap, NULL);
+            alg_fq = (struct wf2qp_queue *)q1;
+
+            alg_fq->F = 0;
+            alg_fq->S = alg_fq->F + 1;
+            /* XXX: why don't call delete queue? */
+        }
+        si->sum = 0;
+        si->V = 0;
+//         si->idle_heap.elements = 0;
+    }
+    /*
+     * If we are getting clocks from dummynet (not a real interface) and
+     * If we are under credit, schedule the next ready event.
+     * Also fix the delivery time of the last packet.
+     */
+
+#if 0
+    if (p->if_name[0]==0 && s->numbytes < 0)
+    ...
+#endif
+    return NULL;
+}
+
+static int
+wf2qp_new_sched(struct new_sch_inst *_si)
+{
+    struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+    int ofs = offsetof(struct wf2qp_queue, heap_pos);
+
+    /* only idle-heap supports extract from middle */
+    if (heap_init(si->idle_heap, 16, ofs) ||
+        heap_init(si->sch_heap, 16, -1) ||
+        heap_init(si->ne_heap, 16, -1)) {
+	heap_free(si->ne_heap);
+	heap_free(si->sch_heap);
+	heap_free(si->idle_heap);
+	return ENOMEM;
+    }
+    return 0;
+}
+
+static int
+wf2qp_free_sched(struct new_sch_inst *_si)
+{
+    struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+
+    heap_free(si->sch_heap);
+    heap_free(si->ne_heap);
+    heap_free(si->idle_heap);
+
+    return 0;
+}
+
+static int
+wf2qp_new_queue (struct new_queue *q)
 {
-	return dn_dequeue((struct new_queue *)(si + 1));
+    struct wf2qp_queue *tmp = (struct wf2qp_queue *) q;
+
+    q->ni.oid.subtype = DN_SCHED_WF2QP;
+
+    tmp->S = tmp->F + 1;    /* hack - mark timestamp as invalid. */
+
+    return 0;
 }
 
 static int
-wf2qp_new_sched(struct new_sch_inst *si)
+wf2qp_free_queue(struct new_queue *q)
 {
-	/* This scheduler instance contains the queue */
-	struct new_queue *q = (struct new_queue *)(si + 1);
+    struct wf2qp_si *si = (struct wf2qp_si *)q->si + 1;
+    struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
+    
+    /* If the queue was valid, decrement the sum value
+     * else this was done by dequeue()
+     */
+    if(alg_fq->S != alg_fq->F + 1)
+        si->sum -= q->fs->fs.weight;
 
-        set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
-	q->si = si;
-	q->fs = si->sched->fs;
-	return 0;
+    return 0;
 }
 
 /*
- * FIFO scheduler descriptor
- * contains the type of the scheduler, the name, the size of extra
- * data structures, and function pointers.
+ * WF2Q+ scheduler descriptor
+ * contains the type of the scheduler, the name, the size of the various
+ * structures and function pointers. If a function is not implemented,
+ * the pointer is initialized to NULL
  */
-static struct dn_sched fifo_desc = {
-	.type = DN_SCHED_WF2QP,
-	.name = "WF2QP",
-
-	.sch_inst_len = sizeof(struct new_queue),
-
-	.enqueue = fifo_enqueue,
-	.dequeue = fifo_dequeue,
-	.new_sched = wf2qp_new_sched,
+static struct dn_sched wf2qp_desc = {
+    .type = DN_SCHED_WF2QP,
+    .name = "WF2Q+",
+    .flags = DN_MULTIQUEUE,
+
+    .sch_inst_len = sizeof(struct wf2qp_si),
+    .queue_len = sizeof(struct wf2qp_queue),
+
+    .enqueue = wf2qp_enqueue,
+    .dequeue = wf2qp_dequeue,
+
+    .new_sched = wf2qp_new_sched,
+    .free_sched = wf2qp_free_sched,
+    
+    .new_queue = wf2qp_new_queue,
+    .free_queue = wf2qp_free_queue,
+
 };
 
-DECLARE_DNSCHED_MODULE(dn_wf2qp, &fifo_desc);
+
+DECLARE_DNSCHED_MODULE(dn_wf2qp, &wf2qp_desc);

Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c	Sat Jan 16 08:12:22 2010	(r202432)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c	Sat Jan 16 08:23:09 2010	(r202433)
@@ -1,6 +1,5 @@
 /*-
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * Copyright (c) 2010 Luigi Rizzo, Universita` di Pisa
+ * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
  * All rights reserved
  *
  * Redistribution and use in source and binary forms, with or without
@@ -305,9 +304,6 @@ extra_bits(struct mbuf *m, struct new_sc
 	return bits;
 }
 
-
-
-
 /*
  * Send traffic from a scheduler instance due by 'now'.
  * Return a pointer to the head of the queue.
@@ -338,7 +334,7 @@ serve_sched(struct mq *q, struct new_sch
 	while (si->credit >= 0 && (m = s->fp->dequeue(si)) != NULL) {
 		uint64_t len_scaled;
 		done++;
-		len_scaled = bw == 0 ? 0 : hz *
+		len_scaled = (bw == 0) ? 0 : hz *
 		    (m->m_pkthdr.len * 8 + extra_bits(m, s));
 		si->credit -= len_scaled;
 		/* Move packet in the delay line */
@@ -355,7 +351,7 @@ serve_sched(struct mq *q, struct new_sch
 	} else {
 		dn_key t;
 		KASSERT (bw > 0, ("bw=0 and credit<0 ?"));
-		t = (bw - 1 - si->credit) / bw;
+		t = div64(bw - 1 - si->credit, bw);
 		if (m)
 			dn_tag_get(m)->output_time += t;
 		si->kflags |= DN_ACTIVE;
@@ -617,17 +613,16 @@ dummynet_io(struct mbuf **m0, int dir, s
 	}
 
 	/* compute the initial allowance */
-    {
-	struct new_pipe *pipe = &fs->sched->pipe;
-        si->credit = dn_cfg.io_fast ? pipe->bandwidth : 0;
-        if (pipe->burst) {
-		uint64_t burst = (now - si->idle_time) *
-                                    pipe->bandwidth;
-		if (burst > pipe->burst)
-			burst = pipe->burst;
+	{
+	    struct new_pipe *p = &fs->sched->pipe;
+	    si->credit = dn_cfg.io_fast ? p->bandwidth : 0;
+	    if (p->burst) {
+		uint64_t burst = (now - si->idle_time) * p->bandwidth;
+		if (burst > p->burst)
+			burst = p->burst;
 		si->credit += burst;
-        }
-    }
+	    }
+	}
 	/* pass through scheduler and delay line */
 	m = serve_sched(NULL, si, now);
 

Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c	Sat Jan 16 08:12:22 2010	(r202432)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c	Sat Jan 16 08:23:09 2010	(r202433)
@@ -447,14 +447,9 @@ fsk_destroy_list(struct new_fsk_head *h,
 	struct new_fsk *fs;
 
 	while ((fs = SLIST_FIRST(h))) {
-		/* remember if the flowset is dying */
-		printf("%s unlink flowset %d\n",
-			__FUNCTION__, fs->fs.fs_nr);
 		SLIST_REMOVE_HEAD(h, sch_chain);
 		if (fs == int_fs) {
-			/* free the internal flowset.
-			 * XXX maybe do it same as others
-			 */
+			/* free the internal flowset. */
 			free(fs, M_DUMMYNET);
 			dn_cfg.fsk_count--;
 			continue;
@@ -633,7 +628,6 @@ static void
 update_fs(struct new_schk *s)
 {
 	struct new_fsk *fs, *tmp;
-	printf("%s XXX chech be implemented\n", __FUNCTION__);
 	SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) {
 		if (s->sch.sched_nr != fs->fs.fs_nr) {
 			printf("fs %d still unlinked\n", fs->fs.fs_nr);
@@ -645,10 +639,6 @@ update_fs(struct new_schk *s)
 		fs->sched = s;
 		SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain);
 	}
-#if 0 // XXX to be completed
-	scan the children of s and see if they still apply.
-	scan fsunlinked and link all schedulers to s;
-#endif
 }
 
 /*
@@ -825,7 +815,10 @@ again: /* run twice, for wfq and fifo */
 		printf("sched %d type changed from %s to %s"
 			" let it drain and reallocate\n",
 			i, s->fp->name, fp->name);
-		/* XXX detach flowsets */
+		/* mark delete so it won't be matched.
+		 * XXX todo: make it die when its queues are done.
+		 * XXX otherwise do a real delete.
+		 */
 		s->kflags = DN_DELETE;
 		notify_fs = 1;
 		goto again;
@@ -879,7 +872,6 @@ config_profile(struct new_profile *pf, s
 	if (i <= 0 || i >= DN_MAX_ID)
 		return EINVAL;
 	/* XXX other sanity checks */
-
 	DUMMYNET_LOCK();
 	s = locate_scheduler(i);
 
@@ -1118,10 +1110,8 @@ dummynet_get(struct sockopt *sopt)
 		a.flags = to_copy;
 		a.type = DN_SCH;
 		dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a);
-#if 0 // XXX temporarily disable
 		a.type = DN_FS;
 		dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a);
-#endif
 	}
 	DUMMYNET_UNLOCK();
 	error = sooptcopyout(sopt, start, buf - start);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201001160823.o0G8N9wn039334>