Date: Sat, 16 Jan 2010 08:23:09 +0000 (UTC) From: Luigi Rizzo <luigi@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r202433 - user/luigi/ipfw3-head/sys/netinet/ipfw Message-ID: <201001160823.o0G8N9wn039334@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: luigi Date: Sat Jan 16 08:23:09 2010 New Revision: 202433 URL: http://svn.freebsd.org/changeset/base/202433 Log: import initial wf2q+ code ported by Riccardo Panicucci. Code not working yet. Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c ============================================================================== --- user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c Sat Jan 16 08:12:22 2010 (r202432) +++ user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c Sat Jan 16 08:23:09 2010 (r202433) @@ -1,5 +1,6 @@ /* * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa + * Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa * All rights reserved * * Redistribution and use in source and binary forms, with or without @@ -24,7 +25,6 @@ * SUCH DAMAGE. */ -#ifdef _KERNEL #include <sys/malloc.h> #include <sys/socket.h> #include <sys/socketvar.h> @@ -35,60 +35,354 @@ #include <netinet/in.h> #include <netinet/ip_var.h> /* ipfw_rule_ref */ #include <netinet/ip_fw.h> /* flow_id */ -#else -#include "dn_test.h" -#endif #include <netinet/ip_dummynet.h> #include <netinet/ipfw/dn_heap.h> #include <netinet/ipfw/ip_dn_private.h> #include <netinet/ipfw/dn_sched.h> +#ifndef MAX64 +#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x) +#endif + +#ifndef MY_M +#define MY_M 16 /* shift for fixed point arithmetic */ +#endif + +#ifndef isdigit +#define isdigit(x) ((x) >= '0' && (x) <= '9' ) +#endif + +/* + * Private information for the scheduler instance: + * sch_heap (key is Finish time) returns the next queue to serve + * ne_heap (key is Start time) stores not-eligible queues + * idle_heap (key=start/finish time) stores idle flows. It must + * support extract-from-middle. + * A flow is normally only in 1 of the three heaps. + * XXX todo: use a more efficient data structure, e.g. a tree sorted + * by F with min_subtree(S) in each node + */ +struct wf2qp_si { + struct dn_heap *sch_heap; /* top extract - key Finish time */ + struct dn_heap *ne_heap; /* top extract - key Start time */ + struct dn_heap *idle_heap; /* random extract - key Start=Finish time */ + dn_key V ; /* virtual time */ + int sum; +}; + +struct wf2qp_queue { + struct new_queue g; + + dn_key S,F; /* start time, finish time */ + + int heap_pos; /* position (index) of struct in heap */ +// struct wf2qp_queue *next; /* next queue in the bucket */ +}; + /* - * This file implements a FIFO scheduler for a single queue. - * The queue is allocated as part of the scheduler instance, - * and there is a single flowset is in the template which stores - * queue-related information. - * No parameters are used except queue sizes and management policy. - * Enqueue and dequeue use the default library functions. + * This file implements a WF2Q+ scheduler as it has been in dummynet + * since 2000. + * The scheduler supports per-flow queues and has O(log N) complexity. */ + static int -fifo_enqueue(struct new_sch_inst *si, struct new_queue *q, struct mbuf *m) +wf2qp_enqueue(struct new_sch_inst *_si, struct new_queue *q, struct mbuf *m) { - return dn_enqueue((struct new_queue *)(si+1), m, 0); + struct new_fsk *fs = q->fs; + struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1); + struct wf2qp_queue *alg_fq; + uint64_t len = m->m_pkthdr.len; + + int q_was_idle = 0; + + if (q->mq.head == NULL) + q_was_idle = 1; + + if (dn_enqueue(q, m, 0)) /* packet was dropped */ + return 1; + + if(!q_was_idle) + return 0; + + /* If reach this point, queue q was idle */ + alg_fq = (struct wf2qp_queue *)q; + + if (DN_KEY_LT(alg_fq->F, alg_fq->S)) { + /* F<S means timestamps are invalid ->brand new queue. */ + alg_fq->S = si->V; + si->sum += fs->fs.weight; /* Add weight of new queue. */ + } else { /* if it was idle then it was in the idle heap */ + heap_extract(si->idle_heap, q); + alg_fq->S = MAX64(alg_fq->F, si->V); + } + // XXX use div64 + alg_fq->F = alg_fq->S + (len << MY_M) / (uint64_t)fs->fs.weight; + + if (si->ne_heap->elements == 0 && si->sch_heap->elements == 0) + si->V = MAX64(alg_fq->S, si->V); + + /* + * Look at eligibility. A flow is not eligibile if S>V (when + * this happens, it means that there is some other flow already + * scheduled for the same pipe, so the sch_heap cannot be + * empty). If the flow is not eligible we just store it in the + * ne_heap. Otherwise, we store in the sch_heap. + * Note that for all flows in sch_heap (SCH), S_i <= V, + * and for all flows in ne_heap (NEH), S_i > V. + * So when we need to compute max(V, min(S_i)) forall i in + * SCH+NEH, we only need to look into NEH. + */ + if (DN_KEY_LT(si->V, alg_fq->S)) { + /* S>V means flow Not eligible. */ + if (si->sch_heap->elements == 0) + printf("dummynet: ++ ouch! not eligible but empty scheduler!\n"); + heap_insert(si->ne_heap, alg_fq->S, q); + } + else { + heap_insert(si->sch_heap, alg_fq->F, q); + /* Pipe *must* be idle. */ +#if 0 + if ((si->sch_heap.elements != 1) && (s->numbytes >= 0)) + printf("dummynet: OUCH! pipe should have been idle!\n"); + DPRINTF(("dummynet: waking up pipe %d at %d\n", + s->sched_nr, (int)(alg_fq->F >> MY_M))); +#endif + } + return 0; } static struct mbuf * -fifo_dequeue(struct new_sch_inst *si) +wf2qp_dequeue(struct new_sch_inst *_si) +{ + /* Access scheduler instance private data */ + struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1); + + struct mbuf *pkt = NULL; + struct new_queue *qq, *q1, *q = NULL; + + struct dn_heap *sch = si->sch_heap; + struct dn_heap *neh = si->ne_heap; + + struct wf2qp_queue *alg_fq = NULL; + + uint64_t len; + + /* XXX: + * This block in the previous version were done in the dummynet_task() + * function. Check if this is the appropriate function to do it. + */ + if (si->idle_heap->elements > 0 && + DN_KEY_LT(HEAP_TOP(si->idle_heap)->key, si->V)) { + qq = HEAP_TOP(si->idle_heap)->object; + alg_fq = (struct wf2qp_queue *) qq; + + heap_extract(si->idle_heap, NULL); + /* Mark timestamp as invalid. */ + /* XXX why don't delete the queue now? */ + alg_fq->S = alg_fq->F + 1; + si->sum -= qq->fs->fs.weight; + } + + +#if 0 + if (p->if_name[0] == 0) { /* tx clock is simulated */ + s->numbytes += (curr_time - s->sched_time) * p->bandwidth; + } + else { + /* tx clock is for real, the ifq must be empty or this is a NOP. */ + if (p->ifp && p->ifp->if_snd.ifq_head != NULL) + return NULL; + else { + DPRINTF(("dummynet: pipe %d ready from %s --\n", + p->pipe_nr, p->if_name)); + } + } +#endif + /* + * While we have backlogged traffic AND credit, we need to do + * something on the queue. + * XXX cannot be a 'while -- + */ + while ( sch->elements > 0 || neh->elements > 0) { + if (sch->elements > 0) { + /* Have some eligible pkts to send out. */ + q = HEAP_TOP(sch)->object; + alg_fq = (struct wf2qp_queue *)q; + + pkt = q->mq.head; + + heap_extract(sch, NULL); /* Remove queue from heap. */ + + si->V += (pkt->m_pkthdr.len << MY_M) / si->sum; /* Update V. */ + alg_fq->S = alg_fq->F; /* Update start time. */ + + if (q->ni.length == 1) /* Flow not backlogged any more. */ + heap_insert(si->idle_heap, alg_fq->F, q); + else { + /* Still backlogged. */ + + /* + * Update F and position in backlogged queue, + * then put flow in ne_heap + * (we will fix this later). + */ + /* len = (q->head)->m_pkthdr.len; */ + len = ((q->mq.head)->m_nextpkt)->m_pkthdr.len; + alg_fq->F += (len << MY_M) / (uint64_t)q->fs->fs.weight; + if (DN_KEY_LEQ(alg_fq->S, si->V)) { + heap_insert(neh, alg_fq->S, q); + } + else { + heap_insert(sch, alg_fq->F, q); + } + } + } + /* + * Now compute V = max(V, min(S_i)). Remember that all elements + * in sch have by definition S_i <= V so if sch is not empty, + * V is surely the max and we must not update it. Conversely, + * if sch is empty we only need to look at neh. + */ + if (sch->elements == 0 && neh->elements > 0) + si->V = MAX64(si->V, HEAP_TOP(neh)->key); + /* Move from neh to sch any packets that have become eligible */ + while (neh->elements > 0 && DN_KEY_LEQ(HEAP_TOP(neh)->key, si->V)) { + qq = HEAP_TOP(neh)->object; + alg_fq = (struct wf2qp_queue *)qq; + heap_extract(neh, NULL); + heap_insert(sch, alg_fq->F, qq); + } +#if 0 + if (p->if_name[0] != '\0') { /* Tx clock is from a real thing */ + s->numbytes = -1; /* Mark not ready for I/O. */ /* WARNING*/ + debMes(" %s f\n", __FUNCTION__); + break; + } +#endif + if (pkt != NULL) + return dn_dequeue(q); + + } /* end of while */ + + if (sch->elements == 0 && neh->elements == 0 && + si->idle_heap->elements > 0) { + /* + * No traffic and no events scheduled. + * We can get rid of idle-heap. + */ + int i; + int elem = si->idle_heap->elements; + + for (i = 0; i < elem; i++) { + /* XXX NOTE: before the heap changed, this block extracted + * all elements + */ + q1 = HEAP_TOP(si->idle_heap)->object; + heap_extract(si->idle_heap, NULL); + alg_fq = (struct wf2qp_queue *)q1; + + alg_fq->F = 0; + alg_fq->S = alg_fq->F + 1; + /* XXX: why don't call delete queue? */ + } + si->sum = 0; + si->V = 0; +// si->idle_heap.elements = 0; + } + /* + * If we are getting clocks from dummynet (not a real interface) and + * If we are under credit, schedule the next ready event. + * Also fix the delivery time of the last packet. + */ + +#if 0 + if (p->if_name[0]==0 && s->numbytes < 0) + ... +#endif + return NULL; +} + +static int +wf2qp_new_sched(struct new_sch_inst *_si) +{ + struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1); + int ofs = offsetof(struct wf2qp_queue, heap_pos); + + /* only idle-heap supports extract from middle */ + if (heap_init(si->idle_heap, 16, ofs) || + heap_init(si->sch_heap, 16, -1) || + heap_init(si->ne_heap, 16, -1)) { + heap_free(si->ne_heap); + heap_free(si->sch_heap); + heap_free(si->idle_heap); + return ENOMEM; + } + return 0; +} + +static int +wf2qp_free_sched(struct new_sch_inst *_si) +{ + struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1); + + heap_free(si->sch_heap); + heap_free(si->ne_heap); + heap_free(si->idle_heap); + + return 0; +} + +static int +wf2qp_new_queue (struct new_queue *q) { - return dn_dequeue((struct new_queue *)(si + 1)); + struct wf2qp_queue *tmp = (struct wf2qp_queue *) q; + + q->ni.oid.subtype = DN_SCHED_WF2QP; + + tmp->S = tmp->F + 1; /* hack - mark timestamp as invalid. */ + + return 0; } static int -wf2qp_new_sched(struct new_sch_inst *si) +wf2qp_free_queue(struct new_queue *q) { - /* This scheduler instance contains the queue */ - struct new_queue *q = (struct new_queue *)(si + 1); + struct wf2qp_si *si = (struct wf2qp_si *)q->si + 1; + struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q; + + /* If the queue was valid, decrement the sum value + * else this was done by dequeue() + */ + if(alg_fq->S != alg_fq->F + 1) + si->sum -= q->fs->fs.weight; - set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q)); - q->si = si; - q->fs = si->sched->fs; - return 0; + return 0; } /* - * FIFO scheduler descriptor - * contains the type of the scheduler, the name, the size of extra - * data structures, and function pointers. + * WF2Q+ scheduler descriptor + * contains the type of the scheduler, the name, the size of the various + * structures and function pointers. If a function is not implemented, + * the pointer is initialized to NULL */ -static struct dn_sched fifo_desc = { - .type = DN_SCHED_WF2QP, - .name = "WF2QP", - - .sch_inst_len = sizeof(struct new_queue), - - .enqueue = fifo_enqueue, - .dequeue = fifo_dequeue, - .new_sched = wf2qp_new_sched, +static struct dn_sched wf2qp_desc = { + .type = DN_SCHED_WF2QP, + .name = "WF2Q+", + .flags = DN_MULTIQUEUE, + + .sch_inst_len = sizeof(struct wf2qp_si), + .queue_len = sizeof(struct wf2qp_queue), + + .enqueue = wf2qp_enqueue, + .dequeue = wf2qp_dequeue, + + .new_sched = wf2qp_new_sched, + .free_sched = wf2qp_free_sched, + + .new_queue = wf2qp_new_queue, + .free_queue = wf2qp_free_queue, + }; -DECLARE_DNSCHED_MODULE(dn_wf2qp, &fifo_desc); + +DECLARE_DNSCHED_MODULE(dn_wf2qp, &wf2qp_desc); Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c ============================================================================== --- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c Sat Jan 16 08:12:22 2010 (r202432) +++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c Sat Jan 16 08:23:09 2010 (r202433) @@ -1,6 +1,5 @@ /*- - * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa - * Copyright (c) 2010 Luigi Rizzo, Universita` di Pisa + * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa * All rights reserved * * Redistribution and use in source and binary forms, with or without @@ -305,9 +304,6 @@ extra_bits(struct mbuf *m, struct new_sc return bits; } - - - /* * Send traffic from a scheduler instance due by 'now'. * Return a pointer to the head of the queue. @@ -338,7 +334,7 @@ serve_sched(struct mq *q, struct new_sch while (si->credit >= 0 && (m = s->fp->dequeue(si)) != NULL) { uint64_t len_scaled; done++; - len_scaled = bw == 0 ? 0 : hz * + len_scaled = (bw == 0) ? 0 : hz * (m->m_pkthdr.len * 8 + extra_bits(m, s)); si->credit -= len_scaled; /* Move packet in the delay line */ @@ -355,7 +351,7 @@ serve_sched(struct mq *q, struct new_sch } else { dn_key t; KASSERT (bw > 0, ("bw=0 and credit<0 ?")); - t = (bw - 1 - si->credit) / bw; + t = div64(bw - 1 - si->credit, bw); if (m) dn_tag_get(m)->output_time += t; si->kflags |= DN_ACTIVE; @@ -617,17 +613,16 @@ dummynet_io(struct mbuf **m0, int dir, s } /* compute the initial allowance */ - { - struct new_pipe *pipe = &fs->sched->pipe; - si->credit = dn_cfg.io_fast ? pipe->bandwidth : 0; - if (pipe->burst) { - uint64_t burst = (now - si->idle_time) * - pipe->bandwidth; - if (burst > pipe->burst) - burst = pipe->burst; + { + struct new_pipe *p = &fs->sched->pipe; + si->credit = dn_cfg.io_fast ? p->bandwidth : 0; + if (p->burst) { + uint64_t burst = (now - si->idle_time) * p->bandwidth; + if (burst > p->burst) + burst = p->burst; si->credit += burst; - } - } + } + } /* pass through scheduler and delay line */ m = serve_sched(NULL, si, now); Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c ============================================================================== --- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Sat Jan 16 08:12:22 2010 (r202432) +++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Sat Jan 16 08:23:09 2010 (r202433) @@ -447,14 +447,9 @@ fsk_destroy_list(struct new_fsk_head *h, struct new_fsk *fs; while ((fs = SLIST_FIRST(h))) { - /* remember if the flowset is dying */ - printf("%s unlink flowset %d\n", - __FUNCTION__, fs->fs.fs_nr); SLIST_REMOVE_HEAD(h, sch_chain); if (fs == int_fs) { - /* free the internal flowset. - * XXX maybe do it same as others - */ + /* free the internal flowset. */ free(fs, M_DUMMYNET); dn_cfg.fsk_count--; continue; @@ -633,7 +628,6 @@ static void update_fs(struct new_schk *s) { struct new_fsk *fs, *tmp; - printf("%s XXX chech be implemented\n", __FUNCTION__); SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) { if (s->sch.sched_nr != fs->fs.fs_nr) { printf("fs %d still unlinked\n", fs->fs.fs_nr); @@ -645,10 +639,6 @@ update_fs(struct new_schk *s) fs->sched = s; SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain); } -#if 0 // XXX to be completed - scan the children of s and see if they still apply. - scan fsunlinked and link all schedulers to s; -#endif } /* @@ -825,7 +815,10 @@ again: /* run twice, for wfq and fifo */ printf("sched %d type changed from %s to %s" " let it drain and reallocate\n", i, s->fp->name, fp->name); - /* XXX detach flowsets */ + /* mark delete so it won't be matched. + * XXX todo: make it die when its queues are done. + * XXX otherwise do a real delete. + */ s->kflags = DN_DELETE; notify_fs = 1; goto again; @@ -879,7 +872,6 @@ config_profile(struct new_profile *pf, s if (i <= 0 || i >= DN_MAX_ID) return EINVAL; /* XXX other sanity checks */ - DUMMYNET_LOCK(); s = locate_scheduler(i); @@ -1118,10 +1110,8 @@ dummynet_get(struct sockopt *sopt) a.flags = to_copy; a.type = DN_SCH; dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a); -#if 0 // XXX temporarily disable a.type = DN_FS; dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a); -#endif } DUMMYNET_UNLOCK(); error = sooptcopyout(sopt, start, buf - start);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201001160823.o0G8N9wn039334>