Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 9 Apr 2009 12:46:00 +0000 (UTC)
From:      Luigi Rizzo <luigi@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r190865 - in head: sbin/ipfw sys/netinet
Message-ID:  <200904091246.n39Ck05i004627@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: luigi
Date: Thu Apr  9 12:46:00 2009
New Revision: 190865
URL: http://svn.freebsd.org/changeset/base/190865

Log:
  Add emulation of delay profiles, which lets you model various
  types of MAC overheads such as preambles, link level retransmissions
  and more.
  
  Note- this commit changes the userland/kernel ABI for pipes
  (but not for ordinary firewall rules) so you need to rebuild
  kernel and /sbin/ipfw to use dummynet features.
  
  Please check the manpage for details on the new feature.
  
  The MFC would be trivial but it breaks the ABI, so it will
  be postponed until after 7.2 is released.
  
  Interested users are welcome to apply the patch manually
  to their RELENG_7 tree.
  
  Work supported by the European Commission, Projects Onelab and
  Onelab2 (contract 224263).

Modified:
  head/sbin/ipfw/dummynet.c
  head/sbin/ipfw/ipfw.8
  head/sbin/ipfw/ipfw2.h
  head/sys/netinet/ip_dummynet.c
  head/sys/netinet/ip_dummynet.h

Modified: head/sbin/ipfw/dummynet.c
==============================================================================
--- head/sbin/ipfw/dummynet.c	Thu Apr  9 12:20:37 2009	(r190864)
+++ head/sbin/ipfw/dummynet.c	Thu Apr  9 12:46:00 2009	(r190865)
@@ -69,6 +69,7 @@ static struct _s_x dummynet_params[] = {
 	{ "dst-ip6",		TOK_DSTIP6},
 	{ "src-ipv6",		TOK_SRCIP6},
 	{ "src-ip6",		TOK_SRCIP6},
+	{ "profile",		TOK_PIPE_PROFILE},
 	{ "dummynet-params",	TOK_NULL },
 	{ NULL, 0 }	/* terminator */
 };
@@ -248,6 +249,19 @@ print_flowset_parms(struct dn_flow_set *
 	    prefix, qs, plr, fs->rq_elements, fs->rq_size, red);
 }
 
+static void
+print_extra_delay_parms(struct dn_pipe *p, char *prefix)
+{
+	double loss;
+	if (p->samples_no <= 0)
+		return;
+
+	loss = p->loss_level;
+	loss /= p->samples_no;
+	printf("%s profile: name \"%s\" loss %f samples %d\n",
+		prefix, p->name, loss, p->samples_no);
+}
+
 void
 ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[])
 {
@@ -296,6 +310,9 @@ ipfw_list_pipes(void *data, uint nbytes,
 
 		sprintf(prefix, "%05d: %s %4d ms ",
 		    p->pipe_nr, buf, p->delay);
+
+		print_extra_delay_parms(p, prefix);
+
 		print_flowset_parms(&(p->fs), prefix);
 		if (co.verbose)
 			printf("   V %20llu\n", align_uint64(&p->V) >> MY_M);
@@ -346,9 +363,284 @@ ipfw_delete_pipe(int pipe_or_queue, int 
 	return i;
 }
 
+/*
+ * Code to parse delay profiles.
+ *
+ * Some link types introduce extra delays in the transmission
+ * of a packet, e.g. because of MAC level framing, contention on
+ * the use of the channel, MAC level retransmissions and so on.
+ * From our point of view, the channel is effectively unavailable
+ * for this extra time, which is constant or variable depending
+ * on the link type. Additionally, packets may be dropped after this
+ * time (e.g. on a wireless link after too many retransmissions).
+ * We can model the additional delay with an empirical curve
+ * that represents its distribution.
+ *
+ *	cumulative probability
+ *	1.0 ^
+ *	    |
+ *	L   +-- loss-level          x
+ *	    |                 ******
+ *	    |                *
+ *	    |           *****
+ *	    |          *
+ *	    |        **
+ *	    |       *                         
+ *	    +-------*------------------->
+ *			delay
+ *
+ * The empirical curve may have both vertical and horizontal lines.
+ * Vertical lines represent constant delay for a range of
+ * probabilities; horizontal lines correspond to a discontinuty
+ * in the delay distribution: the pipe will use the largest delay
+ * for a given probability.
+ * 
+ * To pass the curve to dummynet, we must store the parameters
+ * in a file as described below, and issue the command
+ *
+ *      ipfw pipe <n> config ... bw XXX profile <filename> ...
+ *
+ * The file format is the following, with whitespace acting as
+ * a separator and '#' indicating the beginning a comment:
+ *
+ *	samples N
+ *		the number of samples used in the internal
+ *		representation (2..1024; default 100);
+ *
+ *	loss-level L 
+ *		The probability above which packets are lost.
+ *               (0.0 <= L <= 1.0, default 1.0 i.e. no loss);
+ *
+ *	name identifier
+ *		Optional a name (listed by "ipfw pipe show")
+ *		to identify the distribution;
+ *
+ *	"delay prob" | "prob delay"
+ *		One of these two lines is mandatory and defines
+ *		the format of the following lines with data points.
+ *
+ *	XXX YYY
+ *		2 or more lines representing points in the curve,
+ *		with either delay or probability first, according
+ *		to the chosen format.
+ *		The unit for delay is milliseconds.
+ *
+ * Data points does not need to be ordered or equal to the number
+ * specified in the "samples" line. ipfw will sort and interpolate
+ * the curve as needed.
+ *
+ * Example of a profile file:
+ 
+        name    bla_bla_bla
+        samples 100
+        loss-level    0.86
+        prob    delay
+        0       200	# minimum overhead is 200ms
+        0.5     200
+        0.5     300
+        0.8     1000
+        0.9     1300
+        1       1300
+ 
+ * Internally, we will convert the curve to a fixed number of
+ * samples, and when it is time to transmit a packet we will
+ * model the extra delay as extra bits in the packet.
+ *
+ */
+
+#define ED_MAX_LINE_LEN	256+ED_MAX_NAME_LEN
+#define ED_TOK_SAMPLES	"samples"
+#define ED_TOK_LOSS	"loss-level"
+#define ED_TOK_NAME	"name"
+#define ED_TOK_DELAY	"delay"
+#define ED_TOK_PROB	"prob"
+#define ED_SEPARATORS	" \t\n"
+#define ED_MIN_SAMPLES_NO	2
+
+/*
+ * returns 1 if s is a non-negative number, with at least one '.'
+ */
+static int
+is_valid_number(const char *s)
+{
+	int i, dots_found = 0;
+	int len = strlen(s);
+
+	for (i = 0; i<len; ++i)
+		if (!isdigit(s[i]) && (s[i] !='.' || ++dots_found > 1))
+			return 0;
+	return 1;
+}
+
+struct point {
+	double prob;
+	double delay;
+};
+
+int
+compare_points(const void *vp1, const void *vp2)
+{
+	const struct point *p1 = vp1;
+	const struct point *p2 = vp2;
+	double res = 0;
+
+	res = p1->prob - p2->prob;
+	if (res == 0)
+		res = p1->delay - p2->delay;
+	if (res < 0)
+		return -1;
+	else if (res > 0)
+		return 1;
+	else
+		return 0;
+}
+
+#define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno
+
+static void
+load_extra_delays(const char *filename, struct dn_pipe *p)
+{
+	char    line[ED_MAX_LINE_LEN];
+	FILE    *f;
+	int     lineno = 0;
+	int     i;
+
+	int     samples = -1;
+	double  loss = -1.0;
+	char    profile_name[ED_MAX_NAME_LEN];
+	int     delay_first = -1;
+	int     do_points = 0;
+	struct point    points[ED_MAX_SAMPLES_NO];
+	int     points_no = 0;
+
+	profile_name[0] = '\0';
+	f = fopen(filename, "r");
+	if (f == NULL)
+		err(EX_UNAVAILABLE, "fopen: %s", filename);
+
+	while (fgets(line, ED_MAX_LINE_LEN, f)) {         /* read commands */
+		char *s, *cur = line, *name = NULL, *arg = NULL;
+
+		++lineno;
+
+		/* parse the line */
+		while (cur) {
+			s = strsep(&cur, ED_SEPARATORS);
+			if (s == NULL || *s == '#')
+				break;
+			if (*s == '\0')
+				continue;
+			if (arg)
+				errx(ED_EFMT("too many arguments"));
+			if (name == NULL)
+				name = s;
+			else
+				arg = s;
+		}
+		if (name == NULL)	/* empty line */
+			continue;
+		if (arg == NULL)
+			errx(ED_EFMT("missing arg for %s"), name);
+
+		if (!strcasecmp(name, ED_TOK_SAMPLES)) {
+		    if (samples > 0)
+			errx(ED_EFMT("duplicate ``samples'' line"));
+		    if (atoi(arg) <=0)
+			errx(ED_EFMT("invalid number of samples"));
+		    samples = atoi(arg);
+		    if (samples>ED_MAX_SAMPLES_NO)
+			    errx(ED_EFMT("too many samples, maximum is %d"),
+				ED_MAX_SAMPLES_NO);
+		    do_points = 0;
+		} else if (!strcasecmp(name, ED_TOK_LOSS)) {
+		    if (loss != -1.0)
+			errx(ED_EFMT("duplicated token: %s"), name);
+		    if (!is_valid_number(arg))
+			errx(ED_EFMT("invalid %s"), arg);
+		    loss = atof(arg);
+		    if (loss > 1)
+			errx(ED_EFMT("%s greater than 1.0"), name);
+		    do_points = 0;
+		} else if (!strcasecmp(name, ED_TOK_NAME)) {
+		    if (profile_name[0] != '\0')
+			errx(ED_EFMT("duplicated token: %s"), name);
+		    strncpy(profile_name, arg, sizeof(profile_name) - 1);
+		    profile_name[sizeof(profile_name)-1] = '\0';
+		    do_points = 0;
+		} else if (!strcasecmp(name, ED_TOK_DELAY)) {
+		    if (do_points)
+			errx(ED_EFMT("duplicated token: %s"), name);
+		    delay_first = 1;
+		    do_points = 1;
+		} else if (!strcasecmp(name, ED_TOK_PROB)) {
+		    if (do_points)
+			errx(ED_EFMT("duplicated token: %s"), name);
+		    delay_first = 0;
+		    do_points = 1;
+		} else if (do_points) {
+		    if (!is_valid_number(name) || !is_valid_number(arg))
+			errx(ED_EFMT("invalid point found"));
+		    if (delay_first) {
+			points[points_no].delay = atof(name);
+			points[points_no].prob = atof(arg);
+		    } else {
+			points[points_no].delay = atof(arg);
+			points[points_no].prob = atof(name);
+		    }
+		    if (points[points_no].prob > 1.0)
+			errx(ED_EFMT("probability greater than 1.0"));
+		    ++points_no;
+		} else {
+		    errx(ED_EFMT("unrecognised command '%s'"), name);
+		}
+	}
+
+	if (samples == -1) {
+	    warnx("'%s' not found, assuming 100", ED_TOK_SAMPLES);
+	    samples = 100;
+	}
+
+	if (loss == -1.0) {
+	    warnx("'%s' not found, assuming no loss", ED_TOK_LOSS);
+	    loss = 1;
+	}
+
+	/* make sure that there are enough points. */
+	if (points_no < ED_MIN_SAMPLES_NO)
+	    errx(ED_EFMT("too few samples, need at least %d"),
+		ED_MIN_SAMPLES_NO);
+
+	qsort(points, points_no, sizeof(struct point), compare_points);
+
+	/* interpolation */
+	for (i = 0; i<points_no-1; ++i) {
+	    double y1 = points[i].prob * samples;
+	    double x1 = points[i].delay;
+	    double y2 = points[i+1].prob * samples;
+	    double x2 = points[i+1].delay;
+
+	    int index = y1;
+	    int stop = y2;
+
+	    if (x1 == x2) {
+		for (; index<stop; ++index)
+		    p->samples[index] = x1;
+	    } else {
+		double m = (y2-y1)/(x2-x1);
+		double c = y1 - m*x1;
+		for (; index<stop ; ++index)
+		    p->samples[index] = (index - c)/m;
+	    }
+	}
+	p->samples_no = samples;
+	p->loss_level = loss * samples;
+	strncpy(p->name, profile_name, sizeof(p->name));
+}
+
 void
 ipfw_config_pipe(int ac, char **av)
 {
+	int samples[ED_MAX_SAMPLES_NO];
 	struct dn_pipe p;
 	int i;
 	char *end;
@@ -611,6 +903,15 @@ end_mask:
 			ac--; av++;
 			break;
 
+		case TOK_PIPE_PROFILE:
+			if (co.do_pipe != 1)
+			    errx(EX_DATAERR, "extra delay only valid for pipes");
+			NEED1("extra delay needs the file name\n");
+			p.samples = &samples[0];
+			load_extra_delays(av[0], &p);
+			--ac; ++av;
+			break;
+
 		default:
 			errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]);
 		}
@@ -620,6 +921,9 @@ end_mask:
 			errx(EX_DATAERR, "pipe_nr must be > 0");
 		if (p.delay > 10000)
 			errx(EX_DATAERR, "delay must be < 10000");
+		if (p.samples_no > 0 && p.bandwidth == 0)
+			errx(EX_DATAERR,
+				"profile requires a bandwidth limit");
 	} else { /* co.do_pipe == 2, queue */
 		if (p.fs.parent_nr == 0)
 			errx(EX_DATAERR, "pipe must be > 0");
@@ -713,7 +1017,18 @@ end_mask:
 			weight *= 1 - w_q;
 		p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED));
 	}
-	i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p);
+	if (p.samples_no <= 0) {
+		i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p);
+	} else {
+		struct dn_pipe_max pm;
+		int len = sizeof(pm);
+
+		memcpy(&pm.pipe, &p, sizeof(pm.pipe));
+		memcpy(&pm.samples, samples, sizeof(pm.samples));
+
+		i = do_cmd(IP_DUMMYNET_CONFIGURE, &pm, len);
+	}
+
 	if (i)
 		err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE");
 }

Modified: head/sbin/ipfw/ipfw.8
==============================================================================
--- head/sbin/ipfw/ipfw.8	Thu Apr  9 12:20:37 2009	(r190864)
+++ head/sbin/ipfw/ipfw.8	Thu Apr  9 12:46:00 2009	(r190865)
@@ -1,7 +1,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd April 5, 2009
+.Dd April 9, 2009
 .Dt IPFW 8
 .Os
 .Sh NAME
@@ -1942,6 +1942,80 @@ with
 to reduce
 the granularity to 1ms or less).
 Default value is 0, meaning no delay.
+.Pp
+.It Cm profile Ar filename
+A file specifying the additional overhead incurred in the transmission
+of a packet on the link.
+.Pp
+Some link types introduce extra delays in the transmission
+of a packet, e.g. because of MAC level framing, contention on
+the use of the channel, MAC level retransmissions and so on.
+From our point of view, the channel is effectively unavailable
+for this extra time, which is constant or variable depending
+on the link type. Additionally, packets may be dropped after this
+time (e.g. on a wireless link after too many retransmissions).
+We can model the additional delay with an empirical curve
+that represents its distribution.
+.Bd -literal -offset indent
+      cumulative probability
+      1.0 ^
+          |
+      L   +-- loss-level          x
+          |                 ******
+          |                *
+          |           *****
+          |          *
+          |        **
+          |       *
+          +-------*------------------->
+                      delay
+.Ed
+The empirical curve may have both vertical and horizontal lines.
+Vertical lines represent constant delay for a range of
+probabilities.
+Horizontal lines correspond to a discontinuty in the delay
+distribution: the pipe will use the largest delay for a
+given probability.
+.Pp
+The file format is the following, with whitespace acting as
+a separator and '#' indicating the beginning a comment:
+.Bl -tag -width indent
+.It Cm samples Ar N
+the number of samples used in the internal
+representation (2..1024; default 100);
+.It Cm loss-level Ar L
+The probability above which packets are lost.
+(0.0 <= L <= 1.0, default 1.0 i.e. no loss);
+.It Cm name Ar identifier
+Optional a name (listed by "ipfw pipe show")
+to identify the distribution;
+.It Cm "delay prob" | "prob delay"
+One of these two lines is mandatory and defines
+the format of the following lines with data points.
+.It Ar XXX Ar YYY
+2 or more lines representing points in the curve,
+with either delay or probability first, according
+to the chosen format.
+The unit for delay is milliseconds.
+Data points do not need to be ordered or equal to the number
+specified in the "samples" line. ipfw will sort and interpolate
+the curve as needed.
+.El
+.Pp
+Example of a profile file:
+.Bd -literal -offset indent
+name    bla_bla_bla
+samples 100
+loss-level    0.86
+prob    delay
+0       200	# minimum overhead is 200ms
+0.5     200
+0.5     300
+0.8     1000
+0.9     1300
+1       1300
+#configuration file end
+.Ed
 .El
 .Pp
 The following parameters can be configured for a queue:
@@ -2917,10 +2991,17 @@ API based upon code written by
 .An Daniel Boulet
 for BSDI.
 .Pp
+Dummynet has been introduced by Luigi Rizzo in 1997-1998.
+.Pp
 Some early work (1999-2000) on the
 .Nm dummynet
 traffic shaper supported by Akamba Corp.
 .Pp
+The ipfw core (ipfw2) has been completely redesigned and
+reimplemented by Luigi Rizzo in summer 2002. Further
+actions and
+options have been added by various developer over the years.
+.Pp
 .An -nosplit
 In-kernel NAT support written by
 .An Paolo Pisati Aq piso@FreeBSD.org
@@ -2933,6 +3014,10 @@ support has been developed by
 The primary developers and maintainers are David Hayes and Jason But.
 For further information visit:
 .Aq http://www.caia.swin.edu.au/urp/SONATA
+.Pp
+Delay profiles have been developed by Alessandro Cerri and
+Luigi Rizzo, supported by the
+European Commission within Projects Onelab and Onelab2.
 .Sh BUGS
 The syntax has grown over the years and sometimes it might be confusing.
 Unfortunately, backward compatibility prevents cleaning up mistakes

Modified: head/sbin/ipfw/ipfw2.h
==============================================================================
--- head/sbin/ipfw/ipfw2.h	Thu Apr  9 12:20:37 2009	(r190864)
+++ head/sbin/ipfw/ipfw2.h	Thu Apr  9 12:46:00 2009	(r190865)
@@ -153,6 +153,7 @@ enum tokens {
 	TOK_MASK,
 	TOK_BW,
 	TOK_DELAY,
+	TOK_PIPE_PROFILE,
 	TOK_RED,
 	TOK_GRED,
 	TOK_DROPTAIL,

Modified: head/sys/netinet/ip_dummynet.c
==============================================================================
--- head/sys/netinet/ip_dummynet.c	Thu Apr  9 12:20:37 2009	(r190864)
+++ head/sys/netinet/ip_dummynet.c	Thu Apr  9 12:46:00 2009	(r190865)
@@ -519,14 +519,64 @@ transmit_event(struct dn_pipe *pipe, str
 	}
 }
 
+#define div64(a, b)	((int64_t)(a) / (int64_t)(b))
+#define DN_TO_DROP	0xffff
 /*
- * the following macro computes how many ticks we have to wait
- * before being able to transmit a packet. The credit is taken from
- * either a pipe (WF2Q) or a flow_queue (per-flow queueing)
- */
-#define SET_TICKS(_m, q, p)	\
-    ((_m)->m_pkthdr.len * 8 * hz - (q)->numbytes + p->bandwidth - 1) / \
-    p->bandwidth;
+ * Compute how many ticks we have to wait before being able to send
+ * a packet. This is computed as the "wire time" for the packet
+ * (length + extra bits), minus the credit available, scaled to ticks.
+ * Check that the result is not be negative (it could be if we have
+ * too much leftover credit in q->numbytes).
+ */
+static inline dn_key
+set_ticks(struct mbuf *m, struct dn_flow_queue *q, struct dn_pipe *p)
+{
+	int64_t ret;
+
+	ret = div64( (m->m_pkthdr.len * 8 + q->extra_bits) * hz
+		- q->numbytes + p->bandwidth - 1 , p->bandwidth);
+#if 0
+	printf("%s %d extra_bits %d numb %d ret %d\n",
+		__FUNCTION__, __LINE__,
+		(int)(q->extra_bits & 0xffffffff),
+		(int)(q->numbytes & 0xffffffff),
+		(int)(ret & 0xffffffff));
+#endif
+	if (ret < 0)
+		ret = 0;
+	return ret;
+}
+
+/*
+ * Convert the additional MAC overheads/delays into an equivalent
+ * number of bits for the given data rate. The samples are in milliseconds
+ * so we need to divide by 1000.
+ */
+static dn_key
+compute_extra_bits(struct mbuf *pkt, struct dn_pipe *p)
+{
+	int index;
+	dn_key extra_bits;
+
+	if (!p->samples || p->samples_no == 0)
+		return 0;
+	index  = random() % p->samples_no;
+	extra_bits = ((dn_key)p->samples[index] * p->bandwidth) / 1000;
+	if (index >= p->loss_level) {
+		struct dn_pkt_tag *dt = dn_tag_get(pkt);
+		if (dt)
+			dt->dn_dir = DN_TO_DROP;
+	}
+	return extra_bits;
+}
+
+static void
+free_pipe(struct dn_pipe *p)
+{
+	if (p->samples)
+		free(p->samples, M_DUMMYNET);
+	free(p, M_DUMMYNET);
+}
 
 /*
  * extract pkt from queue, compute output time (could be now)
@@ -585,12 +635,16 @@ ready_event(struct dn_flow_queue *q, str
 	q->numbytes += (curr_time - q->sched_time) * p->bandwidth;
 	while ((pkt = q->head) != NULL) {
 		int len = pkt->m_pkthdr.len;
-		int len_scaled = p->bandwidth ? len * 8 * hz : 0;
+		dn_key len_scaled = p->bandwidth ? len*8*hz
+			+ q->extra_bits*hz
+			: 0;
 
-		if (len_scaled > q->numbytes)
+		if (DN_KEY_GT(len_scaled, q->numbytes))
 			break;
 		q->numbytes -= len_scaled;
 		move_pkt(pkt, q, p, len);
+		if (q->head)
+			q->extra_bits = compute_extra_bits(q->head, p);
 	}
 	/*
 	 * If we have more packets queued, schedule next ready event
@@ -600,7 +654,7 @@ ready_event(struct dn_flow_queue *q, str
 	 * ticks to go for the finish time of the packet.
 	 */
 	if ((pkt = q->head) != NULL) {	/* this implies bandwidth != 0 */
-		dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */
+		dn_key t = set_ticks(pkt, q, p); /* ticks i have to wait */
 
 		q->sched_time = curr_time;
 		heap_insert(&ready_heap, curr_time + t, (void *)q);
@@ -933,6 +987,12 @@ dummynet_send(struct mbuf *m)
 		case DN_TO_ETH_OUT:
 			ether_output_frame(pkt->ifp, m);
 			break;
+
+		case DN_TO_DROP:
+			/* drop the packet after some time */
+			m_freem(m);
+			break;
+
 		default:
 			printf("dummynet: bad switch %d!\n", pkt->dn_dir);
 			m_freem(m);
@@ -1367,8 +1427,10 @@ dummynet_io(struct mbuf **m0, int dir, s
 		/* Fixed-rate queue: just insert into the ready_heap. */
 		dn_key t = 0;
 
-		if (pipe->bandwidth && m->m_pkthdr.len * 8 * hz > q->numbytes)
-			t = SET_TICKS(m, q, pipe);
+		if (pipe->bandwidth) {
+			q->extra_bits = compute_extra_bits(m, pipe);
+			t = set_ticks(m, q, pipe);
+		}
 		q->sched_time = curr_time;
 		if (t == 0)		/* Must process it now. */
 			ready_event(q, &head, &tail);
@@ -1555,7 +1617,7 @@ dummynet_flush(void)
 		SLIST_FOREACH_SAFE(pipe, &pipehash[i], next, pipe1) {
 			SLIST_REMOVE(&pipehash[i], pipe, dn_pipe, next);
 			purge_pipe(pipe);
-			free(pipe, M_DUMMYNET);
+			free_pipe(pipe);
 		}
 	DUMMYNET_UNLOCK();
 }
@@ -1775,11 +1837,38 @@ config_pipe(struct dn_pipe *p)
 		pipe->delay = p->delay;
 		set_fs_parms(&(pipe->fs), pfs);
 
+		/* Handle changes in the delay profile. */
+		if (p->samples_no > 0) {
+			if (pipe->samples_no != p->samples_no) {
+				if (pipe->samples != NULL)
+					free(pipe->samples, M_DUMMYNET);
+				pipe->samples =
+				    malloc(p->samples_no*sizeof(dn_key),
+					M_DUMMYNET, M_NOWAIT | M_ZERO);
+				if (pipe->samples == NULL) {
+					DUMMYNET_UNLOCK();
+					printf("dummynet: no memory "
+						"for new samples\n");
+					return (ENOMEM);
+				}
+				pipe->samples_no = p->samples_no;
+			}
+
+			strncpy(pipe->name,p->name,sizeof(pipe->name));
+			pipe->loss_level = p->loss_level;
+			for (i = 0; i<pipe->samples_no; ++i)
+				pipe->samples[i] = p->samples[i];
+		} else if (pipe->samples != NULL) {
+			free(pipe->samples, M_DUMMYNET);
+			pipe->samples = NULL;
+			pipe->samples_no = 0;
+		}
+
 		if (pipe->fs.rq == NULL) {	/* a new pipe */
 			error = alloc_hash(&(pipe->fs), pfs);
 			if (error) {
 				DUMMYNET_UNLOCK();
-				free(pipe, M_DUMMYNET);
+				free_pipe(pipe);
 				return (error);
 			}
 			SLIST_INSERT_HEAD(&pipehash[HASH(pipe->pipe_nr)],
@@ -1957,7 +2046,7 @@ delete_pipe(struct dn_pipe *p)
 	pipe_remove_from_heap(&wfq_ready_heap, pipe);
 	DUMMYNET_UNLOCK();
 
-	free(pipe, M_DUMMYNET);
+	free_pipe(pipe);
     } else { /* this is a WF2Q queue (dn_flow_set) */
 	struct dn_flow_set *fs;
 
@@ -2095,6 +2184,7 @@ dummynet_get(struct sockopt *sopt)
 		pipe_bp->fs.next.sle_next = NULL;
 		pipe_bp->fs.pipe = NULL;
 		pipe_bp->fs.rq = NULL;
+		pipe_bp->samples = NULL;
 
 		bp += sizeof(*pipe) ;
 		bp = dn_copy_set(&(pipe->fs), bp);
@@ -2127,7 +2217,8 @@ static int
 ip_dn_ctl(struct sockopt *sopt)
 {
     int error = 0 ;
-    struct dn_pipe *p, tmp_pipe;
+    struct dn_pipe *p;
+    struct dn_pipe_max tmp_pipe;	/* pipe + large buffer */
 
     error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET);
     if (error)
@@ -2159,15 +2250,18 @@ ip_dn_ctl(struct sockopt *sopt)
 	break ;
 
     case IP_DUMMYNET_CONFIGURE :
-	p = &tmp_pipe ;
-	error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
+	p = (struct dn_pipe *)&tmp_pipe ;
+	error = sooptcopyin(sopt, p, sizeof(tmp_pipe), sizeof *p);
 	if (error)
 	    break ;
+	if (p->samples_no > 0)
+	    p->samples = &tmp_pipe.samples[0];
+
 	error = config_pipe(p);
 	break ;
 
     case IP_DUMMYNET_DEL :	/* remove a pipe or queue */
-	p = &tmp_pipe ;
+	p = (struct dn_pipe *)&tmp_pipe ;
 	error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
 	if (error)
 	    break ;

Modified: head/sys/netinet/ip_dummynet.h
==============================================================================
--- head/sys/netinet/ip_dummynet.h	Thu Apr  9 12:20:37 2009	(r190864)
+++ head/sys/netinet/ip_dummynet.h	Thu Apr  9 12:46:00 2009	(r190865)
@@ -204,7 +204,18 @@ struct dn_flow_queue {
     struct mbuf *head, *tail ;	/* queue of packets */
     u_int len ;
     u_int len_bytes ;
-    u_long numbytes ;		/* credit for transmission (dynamic queues) */
+
+    /*
+     * When we emulate MAC overheads, or channel unavailability due
+     * to other traffic on a shared medium, we augment the packet at
+     * the head of the queue with an 'extra_bits' field representsing
+     * the additional delay the packet will be subject to:
+     *		extra_bits = bw*unavailable_time.
+     * With large bandwidth and large delays, extra_bits (and also numbytes)
+     * can become very large, so better play safe and use 64 bit
+     */
+    dn_key numbytes ;		/* credit for transmission (dynamic queues) */
+    dn_key extra_bits;		/* extra bits simulating unavailable channel */
 
     u_int64_t tot_pkts ;	/* statistics counters	*/
     u_int64_t tot_bytes ;
@@ -252,6 +263,7 @@ struct dn_flow_set {
 #define DN_IS_GENTLE_RED	0x0004
 #define DN_QSIZE_IS_BYTES	0x0008	/* queue size is measured in bytes */
 #define DN_NOERROR		0x0010	/* do not report ENOBUFS on drops  */
+#define	DN_HAS_PROFILE		0x0020	/* the pipe has a delay profile. */
 #define DN_IS_PIPE		0x4000
 #define DN_IS_QUEUE		0x8000
 
@@ -324,7 +336,9 @@ struct dn_pipe {		/* a pipe */
 
     dn_key V ;			/* virtual time */
     int sum;			/* sum of weights of all active sessions */
-    int numbytes;		/* bits I can transmit (more or less). */
+
+    /* Same as in dn_flow_queue, numbytes can become large */
+    dn_key numbytes;		/* bits I can transmit (more or less). */
 
     dn_key sched_time ;		/* time pipe was scheduled in ready_heap */
 
@@ -337,7 +351,25 @@ struct dn_pipe {		/* a pipe */
     int ready ; /* set if ifp != NULL and we got a signal from it */
 
     struct dn_flow_set fs ; /* used with fixed-rate flows */
+
+    /* fields to simulate a delay profile */
+
+#define ED_MAX_NAME_LEN		32
+    char name[ED_MAX_NAME_LEN];
+    int loss_level;
+    int samples_no;
+    int *samples;
 };
+
+/* dn_pipe_max is used to pass pipe configuration from userland onto
+ * kernel space and back
+ */
+#define ED_MAX_SAMPLES_NO	1024
+struct dn_pipe_max {
+	struct dn_pipe pipe;
+	int samples[ED_MAX_SAMPLES_NO];
+};
+
 SLIST_HEAD(dn_pipe_head, dn_pipe);
 
 #ifdef _KERNEL



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200904091246.n39Ck05i004627>