From owner-svn-src-user@FreeBSD.ORG Thu Jan 21 13:00:28 2010 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id D57C3106568D; Thu, 21 Jan 2010 13:00:28 +0000 (UTC) (envelope-from luigi@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id C440D8FC15; Thu, 21 Jan 2010 13:00:28 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o0LD0SVQ033366; Thu, 21 Jan 2010 13:00:28 GMT (envelope-from luigi@svn.freebsd.org) Received: (from luigi@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o0LD0SGo033358; Thu, 21 Jan 2010 13:00:28 GMT (envelope-from luigi@svn.freebsd.org) Message-Id: <201001211300.o0LD0SGo033358@svn.freebsd.org> From: Luigi Rizzo Date: Thu, 21 Jan 2010 13:00:28 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r202746 - in user/luigi/ipfw3-head: sbin/ipfw sys/netinet sys/netinet/ipfw X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 21 Jan 2010 13:00:28 -0000 Author: luigi Date: Thu Jan 21 13:00:28 2010 New Revision: 202746 URL: http://svn.freebsd.org/changeset/base/202746 Log: - remove stale data structures - remove old, incorrect documentation and add correct one; - adjust the handling of masks. When both flow_mask and sched_mask are present, we must do the initial grouping by (flow_mask|sched_mask) otherwise a queue might end up to two different schedulers. TODO: find better names for 'things' -- especially the naming of queue-related structures is very confusing. Modified: user/luigi/ipfw3-head/sbin/ipfw/dummynet.c user/luigi/ipfw3-head/sys/netinet/ip_dummynet.h user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_private.h user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Modified: user/luigi/ipfw3-head/sbin/ipfw/dummynet.c ============================================================================== --- user/luigi/ipfw3-head/sbin/ipfw/dummynet.c Thu Jan 21 12:18:29 2010 (r202745) +++ user/luigi/ipfw3-head/sbin/ipfw/dummynet.c Thu Jan 21 13:00:28 2010 (r202746) @@ -87,12 +87,12 @@ static struct _s_x dummynet_params[] = { #define O_NEXT(p, len) ((void *)(char *)(p) + len) static void -oid_fill(struct dn_id *oid, int len, int type) +oid_fill(struct dn_id *oid, int len, int type, uintptr_t id) { oid->len = len; oid->type = type; oid->subtype = 0; - oid->id = 0; + oid->id = id; } /* make room in the buffer and move the pointer forward */ @@ -100,7 +100,7 @@ static void * o_next(struct dn_id **o, int len, int type) { struct dn_id *ret = *o; - oid_fill(ret, len, type); + oid_fill(ret, len, type, 0); *o = O_NEXT(*o, len); return ret; } @@ -379,12 +379,15 @@ list_pipes(struct dn_id *oid, struct dn_ int ipfw_delete_pipe(int pipe_or_queue, int i) { - struct dn_id oid; - oid_fill(&oid, sizeof(oid), DN_CMD_DELETE); - oid.subtype = (co.do_pipe == 1) ? DN_PIPE : + struct { + struct dn_id oid; + uint32_t a[1]; /* more if we want a list */ + } cmd; + oid_fill((void *)&cmd, sizeof(cmd), DN_CMD_DELETE, DN_API_VERSION); + cmd.oid.subtype = (co.do_pipe == 1) ? DN_PIPE : ( (co.do_pipe == 2) ? DN_FS : DN_SCH); - oid.id = i; - i = do_cmd(IP_DUMMYNET3, &oid, oid.len); + cmd.a[0] = i; + i = do_cmd(IP_DUMMYNET3, &cmd, cmd.oid.len); if (i) { i = 1; warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", i); @@ -750,13 +753,18 @@ ipfw_config_pipe(int ac, char **av) struct new_pipe *p = NULL; struct new_fs *fs = NULL; struct new_profile *pf = NULL; - struct new_cmd *cmd = NULL; struct ipfw_flow_id *mask = NULL; - int lmax = sizeof(*cmd); /* always present */ + int lmax; int _foo = 0, *flags = &_foo; - /* worst case: 2 schedulers, 1 profile, 1 pipe, 1 flowset */ - lmax += 2*sizeof(*sch) + 2*sizeof(*p) + sizeof(*fs) + sizeof(*pf); + /* + * allocate space for 1 header, + * 1 scheduler, 1 pipe, 1 flowset, 1 profile + */ + lmax = sizeof(struct dn_id); /* command header */ + lmax += sizeof(struct new_sch) + sizeof(struct new_pipe) + + sizeof(struct new_fs) + + sizeof(struct new_profile); av++; ac--; /* Pipe number */ @@ -769,8 +777,9 @@ ipfw_config_pipe(int ac, char **av) if (buf == NULL) { errx(1, "no memory for pipe buffer"); } - cmd = o_next(&buf, sizeof(*cmd), DN_CMD_CONFIGURE); - cmd->entries = 0; /* no explicit arguments */ + /* all commands start with a 'CONFIGURE' and a version */ + o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIGURE); + base->id = DN_API_VERSION; switch (co.do_pipe) { case 1: @@ -1213,7 +1222,7 @@ void dummynet_flush(void) { struct dn_id oid; - oid_fill(&oid, sizeof(oid), DN_CMD_FLUSH); + oid_fill(&oid, sizeof(oid), DN_CMD_FLUSH, DN_API_VERSION); do_cmd(IP_DUMMYNET3, &oid, oid.len); } @@ -1225,7 +1234,7 @@ dummynet_list(int ac, char *av[], int sh oid.type = DN_CMD_GET; oid.len = l; - oid.id = 0; + oid.id = DN_API_VERSION; switch (co.do_pipe) { case 1: oid.subtype = DN_PIPE; /* list pipe */ @@ -1237,7 +1246,6 @@ dummynet_list(int ac, char *av[], int sh oid.subtype = DN_SCH; /* list sched */ break; } - /* XXX we could use oid.id for the filter */ ret = do_cmd(-IP_DUMMYNET3, &oid, (uintptr_t)&l); // printf("%s returns %d need %d\n", __FUNCTION__, ret, oid.id); if (ret != 0 || oid.id <= sizeof(oid)) Modified: user/luigi/ipfw3-head/sys/netinet/ip_dummynet.h ============================================================================== --- user/luigi/ipfw3-head/sys/netinet/ip_dummynet.h Thu Jan 21 12:18:29 2010 (r202745) +++ user/luigi/ipfw3-head/sys/netinet/ip_dummynet.h Thu Jan 21 13:00:28 2010 (r202746) @@ -36,14 +36,21 @@ * Setsockopt() and getsockopt() pass a batch of objects, each * of them starting with a "struct dn_id" which should fully identify * the object and its relation with others in the sequence. - * objects in a batch of requests. - * This struct store in the type field an identifier of the type of object - * passed (for example a pipe, a scheduler...). The subtype - * field contains more detail info, if needed. + * The first object in each request should have + * type= DN_CMD_*, id = DN_API_VERSION. + * For other objects, type and subtype specify the object, len indicates + * the total length including the header, and 'id' identifies the specific + * object. + * + * Most objects are numbered with an identifier in the range 1..65535. + * DN_MAX_ID indicates the first value outside the range. */ +#define DN_API_VERSION 12500000 +#define DN_MAX_ID 0x10000 + struct dn_id { - uint16_t len; /* total len including this header */ + uint16_t len; /* total obj len including this header */ uint8_t type; uint8_t subtype; uintptr_t id; /* generic id or pointer */ @@ -63,10 +70,10 @@ enum { DN_QUEUE, DN_DELAY_LINE, DN_PROFILE, - DN_NI, /* new_inst */ + DN_NI, /* struct new_inst */ //DN_FS_EXT, //DN_QUEUE_EXT, - DN_TEXT, /* subtype is the object */ + DN_TEXT, /* opaque text is the object */ DN_CMD_CONFIGURE, /* objects follow */ DN_CMD_DELETE, /* subtype + list of entries */ DN_CMD_GET, /* subtype + list of entries */ @@ -82,36 +89,12 @@ enum { /* subtype for schedulers, flowse }; enum { /* user flags */ - DN_HAVE_MASK = 0x0001, - DN_NOERROR = 0x0002, - DN_QSIZE_BYTES = 0x0008, + DN_HAVE_MASK = 0x0001, /* fs or sched has a mask */ + DN_NOERROR = 0x0002, /* do not report errors */ + DN_QSIZE_BYTES = 0x0008, /* queue size is in bytes */ + DN_HAS_PROFILE = 0x0010, /* a pipe has a profile */ DN_IS_RED = 0x0020, DN_IS_GENTLE_RED= 0x0040, -#if 0 -#define DN_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */ -#endif -}; - -typedef uint64_t dn_key; - -struct new_cmd { /* header for all sockopt */ - struct dn_id oid; - int entries; - uint32_t data[0]; /* actually, entries elements */ -}; - -/* A delay profile is attached to a pipe */ -#define ED_MAX_SAMPLES_NO 1024 -struct new_profile { - struct dn_id oid; - /* fields to simulate a delay profile */ -#define ED_MAX_NAME_LEN 32 - char name[ED_MAX_NAME_LEN]; - int pipe_nr; - int loss_level; - int bandwidth; - int samples_no; - int samples[ED_MAX_SAMPLES_NO]; /* this has actually samples_no slots */ }; /* @@ -187,77 +170,66 @@ struct new_sch { }; -/* - * "queue N" and "pipe N" accept 1<=N<=65535. - * So valid names are from 1 to DN_MAXID-1 - */ -#define DN_MAX_ID 0x10000 +/* A delay profile is attached to a pipe */ +#define ED_MAX_SAMPLES_NO 1024 +struct new_profile { + struct dn_id oid; + /* fields to simulate a delay profile */ +#define ED_MAX_NAME_LEN 32 + char name[ED_MAX_NAME_LEN]; + int pipe_nr; + int loss_level; + int bandwidth; + int samples_no; /* actual length of samples[] */ + int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */ +}; -/* - * The maximum hash table size for queues (unused ?) - */ -#define DN_MAX_HASH_SIZE 65536 /* - * Overall structure of dummynet (with WF2Q+): + * Overall structure of dummynet In dummynet, packets are selected with the firewall rules, and passed -to two different objects: PIPE or QUEUE. +to two different objects: PIPE or QUEUE (bad name). -A QUEUE is just a queue with configurable size and queue management -policy. It is also associated with a mask (to discriminate among -different flows), a weight (used to give different shares of the -bandwidth to different flows) and a "pipe", which essentially -supplies the transmit clock for all queues associated with that -pipe. - -A PIPE emulates a fixed-bandwidth link, whose bandwidth is -configurable. The "clock" for a pipe can come from either an -internal timer, or from the transmit interrupt of an interface. -A pipe is also associated with one (or more, if masks are used) -queue, where all packets for that pipe are stored. - -The bandwidth available on the pipe is shared by the queues -associated with that pipe (only one in case the packet is sent -to a PIPE) according to the WF2Q+ scheduling algorithm and the -configured weights. - -In general, incoming packets are stored in the appropriate queue, -which is then placed into one of a few heaps managed by a scheduler -to decide when the packet should be extracted. -The scheduler (a function called dummynet()) is run at every timer -tick, and grabs queues from the head of the heaps when they are -ready for processing. +A QUEUE defines a classifier, which groups packets into flows +according to a 'mask', puts them into independent queues (one +per flow) with configurable size and queue management policy, +and passes flows to a scheduler: + + (flow_mask|sched_mask) sched_mask + +---------+ weight Wx +-------------+ + | |->-[flow]-->--| |-+ + -->--| QUEUE x | ... | | | + | |->-[flow]-->--| SCHEDuler N | | + +---------+ | | | + ... | +--[LINK N]-->-- + +---------+ weight Wy | | +--[LINK N]-->-- + | |->-[flow]-->--| | | + -->--| QUEUE y | ... | | | + | |->-[flow]-->--| | | + +---------+ +-------------+ | + +-------------+ + +Many QUEUE objects can connect to the same scheduler, each +QUEUE object can have its own set of parameters. + +In turn, the SCHEDuler 'forks' multiple instances according +to a 'sched_mask', each instance manages its own set of queues +and transmits on a private instance of a configurable LINK. + +A PIPE is a simplified version of the above, where there +is no flow_mask, and each scheduler instance handles a single queue. There are three data structures definining a pipe and associated queues: + dn_pipe, which contains the main configuration parameters related to delay and bandwidth; - + dn_flow_set, which contains WF2Q+ configuration, flow - masks, plr and RED configuration; - + dn_flow_queue, which is the per-flow queue (containing the packets) - -Multiple dn_flow_set can be linked to the same pipe, and multiple -dn_flow_queue can be linked to the same dn_flow_set. -All data structures are linked in a linear list which is used for -housekeeping purposes. - -During configuration, we create and initialize the dn_flow_set -and dn_pipe structures (a dn_pipe also contains a dn_flow_set). - -At runtime: packets are sent to the appropriate dn_flow_set (either -WFQ ones, or the one embedded in the dn_pipe for fixed-rate flows), -which in turn dispatches them to the appropriate dn_flow_queue -(created dynamically according to the masks). - -The transmit clock for fixed rate flows (ready_event()) selects the -dn_flow_queue to be used to transmit the next packet. For WF2Q, -wfq_ready_event() extract a pipe which in turn selects the right -flow using a number of heaps defined into the pipe itself. + + dn_flowset, which contains flow masks, weights and queue + parameters; + + dn_flow, which contains the queue status (flow id, statistics) * */ - #endif /* _IP_DUMMYNET_H */ Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c ============================================================================== --- user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c Thu Jan 21 12:18:29 2010 (r202745) +++ user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c Thu Jan 21 13:00:28 2010 (r202746) @@ -62,13 +62,13 @@ struct wf2qp_si { struct dn_heap sch_heap; /* top extract - key Finish time */ struct dn_heap ne_heap; /* top extract - key Start time */ struct dn_heap idle_heap; /* random extract - key Start=Finish time */ - dn_key V ; /* virtual time */ + uint64_t V; /* virtual time */ uint32_t sum; /* sum of weights */ }; struct wf2qp_queue { - dn_key S,F; /* start time, finish time */ - int heap_pos; /* position (index) of struct in heap */ + uint64_t S, F; /* start time, finish time */ + int32_t heap_pos; /* position (index) of struct in heap */ }; /* Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c ============================================================================== --- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c Thu Jan 21 12:18:29 2010 (r202745) +++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c Thu Jan 21 13:00:28 2010 (r202746) @@ -66,7 +66,7 @@ __FBSDID("$FreeBSD$"); * We keep a private variable for the simulation time, but we could * probably use an existing one ("softticks" in sys/kern/kern_timeout.c) */ -static dn_key curr_time = 0 ; /* current simulation time */ +static uint64_t curr_time = 0; /* current simulation time */ struct dn_parms dn_cfg = { .pipe_slot_limit = 100, /* Foot shooting limit for pipe queues. */ @@ -176,7 +176,7 @@ struct dn_pkt_tag { /* second part, dummynet specific */ int dn_dir; /* action when packet comes out.*/ /* see ip_fw_private.h */ - dn_key output_time; /* when the pkt is due for delivery*/ + uint64_t output_time; /* when the pkt is due for delivery*/ struct ifnet *ifp; /* interface, for ip_output */ struct _ip6dn_args ip6opt; /* XXX ipv6 options */ }; @@ -279,7 +279,7 @@ drop: * Runs under scheduler lock. */ static void -transmit_event(struct mq *q, struct delay_line *dline, dn_key now) +transmit_event(struct mq *q, struct delay_line *dline, uint64_t now) { struct mbuf *m; struct dn_pkt_tag *pkt = NULL; @@ -327,7 +327,7 @@ extra_bits(struct mbuf *m, struct new_sc * Return a pointer to the head of the queue. */ static struct mbuf * -serve_sched(struct mq *q, struct new_sch_inst *si, dn_key now) +serve_sched(struct mq *q, struct new_sch_inst *si, uint64_t now) { struct mq def_q; struct new_schk *s = si->sched; @@ -367,7 +367,7 @@ serve_sched(struct mq *q, struct new_sch if (si->credit >= 0) { si->idle_time = now; } else { - dn_key t; + uint64_t t; KASSERT (bw > 0, ("bw=0 and credit<0 ?")); t = div64(bw - 1 - si->credit, bw); if (m) @@ -611,9 +611,8 @@ dummynet_io(struct mbuf **m0, int dir, s if (si == NULL) goto dropit; /* - * If the support multiple queues, find the right one + * If the scheduler supports multiple queues, find the right one * (otherwise it will be ignored by enqueue). - * We cannot pass si as an argument :( */ if (fs->sched->fp->flags & DN_MULTIQUEUE) { q = ipdn_q_find(fs, si, &(fwa->f_id)); Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_private.h ============================================================================== --- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_private.h Thu Jan 21 12:18:29 2010 (r202745) +++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_private.h Thu Jan 21 13:00:28 2010 (r202746) @@ -150,6 +150,9 @@ struct delay_line { * The kernel side of a flowset. It is linked in a hash table * of flowsets, and in a list of children of their parent scheduler. * qht is either the queue or (if HAVE_MASK) a hash table queues. + * Note that the mask to use is the (flow_mask|sched_mask), which + * changes as we attach/detach schedulers. So we store it here. + * * XXX If we want to add scheduler-specific parameters, we need to * put them in external storage because the scheduler may not be * available when the fsk is created. @@ -158,6 +161,8 @@ struct new_fsk { /* kernel side of a flo struct new_fs fs; SLIST_ENTRY(new_fsk) fsk_next; /* hash chain list */ + struct ipfw_flow_id fsk_mask; + /* hash table of queues, or just single queue */ struct dn_ht *_qht; struct new_schk *sched; /* Sched we are linked to */ @@ -220,8 +225,8 @@ struct new_sch_inst { int kflags; /* DN_ACTIVE */ int64_t credit; /* bits I can transmit (more or less). */ - dn_key sched_time; /* time pipe was scheduled in ready_heap */ - dn_key idle_time; /* start of scheduler instance idle time */ + uint64_t sched_time; /* time pipe was scheduled in ready_heap */ + uint64_t idle_time; /* start of scheduler instance idle time */ }; /* kernel-side flags */ Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c ============================================================================== --- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Thu Jan 21 12:18:29 2010 (r202745) +++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Thu Jan 21 13:00:28 2010 (r202746) @@ -142,6 +142,33 @@ flow_id_mask(struct ipfw_flow_id *mask, return id; } +/* computes an OR of two masks, result in dst and also returned */ +static struct ipfw_flow_id * +flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst) +{ + int is_v6 = IS_IP6_FLOW_ID(dst); + + dst->dst_port |= src->dst_port; + dst->src_port |= src->src_port; + dst->proto |= src->proto; + dst->flags = 0; /* we don't care about this one */ + if (is_v6) { +#define OR_MASK(_d, _s) \ + (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \ + (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \ + (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \ + (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3]; + OR_MASK(&dst->dst_ip6, &src->dst_ip6); + OR_MASK(&dst->src_ip6, &src->src_ip6); +#undef OR_MASK + dst->flow_id6 |= src->flow_id6; + } else { + dst->dst_ip |= src->dst_ip; + dst->src_ip |= src->src_ip; + } + return dst; +} + /* XXX we may want a better hash function */ static uint32_t flow_id_hash(struct ipfw_flow_id *id) @@ -338,7 +365,7 @@ ipdn_q_find(struct new_fsk *fs, struct n return NULL; } masked_id = *id; - flow_id_mask(&fs->fs.flow_mask, &masked_id); + flow_id_mask(&fs->fsk_mask, &masked_id); return dn_ht_find(fs->_qht, (uintptr_t)&masked_id, DNHT_INSERT, &template); } else { @@ -839,6 +866,10 @@ fsk_attach(struct new_fsk *fs, struct ne SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain); if (s->fp->new_fsk) s->fp->new_fsk(fs); + /* XXX compute fsk_mask */ + fs->fsk_mask = fs->fs.flow_mask; + if (fs->sched->sch.flags & DN_HAVE_MASK) + flow_id_or(&fs->fsk_mask, &fs->sched->sch.sched_mask); if (!fs->_qht) return; D("XXX TODO requeue from fs %d to sch %d", @@ -1221,7 +1252,8 @@ dummynet_flush(void) } /* - * Main handler for configuration. Rules of the game: + * Main handler for configuration. We are guaranteed to be called + * with an oid which is at least a dn_id. * - the first object is the command (config, delete, flush, ...) * - config_pipe must be issued after the corresponding config_sched * - parameters (DN_TXT) for an object must preceed the object @@ -1234,10 +1266,13 @@ do_config(void *p, int l) int err = 0, err2 = 0; struct dn_id *arg = NULL; - /* XXX TODO require the first block to be a 'CONFIGURE' - * or at least carry with a version number - */ - for (o = p; l >= sizeof(*o); o = next) { + o = p; + if (o->id != DN_API_VERSION) { + D("invalid api version got %d need %d", + o->id, DN_API_VERSION); + return EINVAL; + } + for (; l >= sizeof(*o); o = next) { struct dn_id *prev = arg; if (o->len < sizeof(*o) || l < o->len) { D("bad len o->len %d len %d", o->len, l); @@ -1451,7 +1486,7 @@ ip_dn_ctl(struct sockopt *sopt) break; } l = sopt->sopt_valsize; - if (l < 0 || l > 12000) { + if (l < sizeof(struct dn_id) || l > 12000) { D("argument len %d invalid", l); break; }