From owner-svn-src-all@FreeBSD.ORG Sun Jun 1 07:28:26 2014 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 81FC790C; Sun, 1 Jun 2014 07:28:26 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 6E1132FA8; Sun, 1 Jun 2014 07:28:26 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.8/8.14.8) with ESMTP id s517SQs3099949; Sun, 1 Jun 2014 07:28:26 GMT (envelope-from hiren@svn.freebsd.org) Received: (from hiren@localhost) by svn.freebsd.org (8.14.8/8.14.8/Submit) id s517SPQ1099942; Sun, 1 Jun 2014 07:28:25 GMT (envelope-from hiren@svn.freebsd.org) Message-Id: <201406010728.s517SPQ1099942@svn.freebsd.org> From: Hiren Panchasara Date: Sun, 1 Jun 2014 07:28:25 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r266941 - in head: sbin/ipfw sys/netinet sys/netpfil/ipfw X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 01 Jun 2014 07:28:26 -0000 Author: hiren Date: Sun Jun 1 07:28:24 2014 New Revision: 266941 URL: http://svnweb.freebsd.org/changeset/base/266941 Log: ECN marking implenetation for dummynet. Changes include both DCTCP and RFC 3168 ECN marking methodology. DCTCP draft: http://tools.ietf.org/html/draft-bensley-tcpm-dctcp-00 Submitted by: Midori Kato (aoimidori27@gmail.com) Worked with: Lars Eggert (lars@netapp.com) Reviewed by: luigi, hiren Modified: head/sbin/ipfw/dummynet.c head/sbin/ipfw/ipfw.8 head/sbin/ipfw/ipfw2.h head/sys/netinet/ip_dummynet.h head/sys/netpfil/ipfw/ip_dn_glue.c head/sys/netpfil/ipfw/ip_dn_io.c head/sys/netpfil/ipfw/ip_dummynet.c Modified: head/sbin/ipfw/dummynet.c ============================================================================== --- head/sbin/ipfw/dummynet.c Sun Jun 1 05:02:48 2014 (r266940) +++ head/sbin/ipfw/dummynet.c Sun Jun 1 07:28:24 2014 (r266941) @@ -56,6 +56,7 @@ static struct _s_x dummynet_params[] = { { "sched_mask", TOK_SCHED_MASK }, { "flow_mask", TOK_FLOW_MASK }, { "droptail", TOK_DROPTAIL }, + { "ecn", TOK_ECN }, { "red", TOK_RED }, { "gred", TOK_GRED }, { "bw", TOK_BW }, @@ -239,7 +240,7 @@ print_flowset_parms(struct dn_fs *fs, ch else plr[0] = '\0'; - if (fs->flags & DN_IS_RED) /* RED parameters */ + if (fs->flags & DN_IS_RED) { /* RED parameters */ sprintf(red, "\n\t %cRED w_q %f min_th %d max_th %d max_p %f", (fs->flags & DN_IS_GENTLE_RED) ? 'G' : ' ', @@ -247,7 +248,9 @@ print_flowset_parms(struct dn_fs *fs, ch fs->min_th, fs->max_th, 1.0 * fs->max_p / (double)(1 << SCALE_RED)); - else + if (fs->flags & DN_IS_ECN) + strncat(red, " (ecn)", 6); + } else sprintf(red, "droptail"); if (prefix[0]) { @@ -1046,13 +1049,17 @@ end_mask: } if ((end = strsep(&av[0], "/"))) { double max_p = strtod(end, NULL); - if (max_p > 1 || max_p <= 0) - errx(EX_DATAERR, "0 < max_p <= 1"); + if (max_p > 1 || max_p < 0) + errx(EX_DATAERR, "0 <= max_p <= 1"); fs->max_p = (int)(max_p * (1 << SCALE_RED)); } ac--; av++; break; + case TOK_ECN: + fs->flags |= DN_IS_ECN; + break; + case TOK_DROPTAIL: NEED(fs, "droptail is only for flowsets"); fs->flags &= ~(DN_IS_RED|DN_IS_GENTLE_RED); @@ -1175,13 +1182,20 @@ end_mask: errx(EX_DATAERR, "2 <= queue size <= %ld", limit); } + if ((fs->flags & DN_IS_ECN) && !(fs->flags & DN_IS_RED)) + errx(EX_USAGE, "enable red/gred for ECN"); + if (fs->flags & DN_IS_RED) { size_t len; int lookup_depth, avg_pkt_size; - if (fs->min_th >= fs->max_th) + if (!(fs->flags & DN_IS_ECN) && (fs->min_th >= fs->max_th)) errx(EX_DATAERR, "min_th %d must be < than max_th %d", fs->min_th, fs->max_th); + else if ((fs->flags & DN_IS_ECN) && (fs->min_th > fs->max_th)) + errx(EX_DATAERR, "min_th %d must be =< than max_th %d", + fs->min_th, fs->max_th); + if (fs->max_th == 0) errx(EX_DATAERR, "max_th must be > 0"); Modified: head/sbin/ipfw/ipfw.8 ============================================================================== --- head/sbin/ipfw/ipfw.8 Sun Jun 1 05:02:48 2014 (r266940) +++ head/sbin/ipfw/ipfw.8 Sun Jun 1 07:28:24 2014 (r266941) @@ -1,7 +1,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 25, 2012 +.Dd May 31, 2014 .Dt IPFW 8 .Os .Sh NAME @@ -2441,22 +2441,23 @@ and control the maximum lengths that can be specified. .Pp .It Cm red | gred Ar w_q Ns / Ns Ar min_th Ns / Ns Ar max_th Ns / Ns Ar max_p +[ecn] Make use of the RED (Random Early Detection) queue management algorithm. .Ar w_q and .Ar max_p are floating -point numbers between 0 and 1 (0 not included), while +point numbers between 0 and 1 (inclusive), while .Ar min_th and .Ar max_th are integer numbers specifying thresholds for queue management (thresholds are computed in bytes if the queue has been defined in bytes, in slots otherwise). -The +The two parameters can also be of the same value if needed. The .Nm dummynet -also supports the gentle RED variant (gred). -Three +also supports the gentle RED variant (gred) and ECN (Explicit Congestion +Notification) as optional. Three .Xr sysctl 8 variables can be used to control the RED behaviour: .Bl -tag -width indent Modified: head/sbin/ipfw/ipfw2.h ============================================================================== --- head/sbin/ipfw/ipfw2.h Sun Jun 1 05:02:48 2014 (r266940) +++ head/sbin/ipfw/ipfw2.h Sun Jun 1 07:28:24 2014 (r266941) @@ -165,6 +165,7 @@ enum tokens { TOK_BURST, TOK_RED, TOK_GRED, + TOK_ECN, TOK_DROPTAIL, TOK_PROTO, /* dummynet tokens */ Modified: head/sys/netinet/ip_dummynet.h ============================================================================== --- head/sys/netinet/ip_dummynet.h Sun Jun 1 05:02:48 2014 (r266940) +++ head/sys/netinet/ip_dummynet.h Sun Jun 1 07:28:24 2014 (r266941) @@ -104,6 +104,7 @@ enum { /* user flags */ DN_HAS_PROFILE = 0x0010, /* a link has a profile */ DN_IS_RED = 0x0020, DN_IS_GENTLE_RED= 0x0040, + DN_IS_ECN = 0x0080, DN_PIPE_CMD = 0x1000, /* pipe config... */ }; Modified: head/sys/netpfil/ipfw/ip_dn_glue.c ============================================================================== --- head/sys/netpfil/ipfw/ip_dn_glue.c Sun Jun 1 05:02:48 2014 (r266940) +++ head/sys/netpfil/ipfw/ip_dn_glue.c Sun Jun 1 07:28:24 2014 (r266941) @@ -83,6 +83,7 @@ struct dn_flow_set { #define DNOLD_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */ #define DNOLD_NOERROR 0x0010 /* do not report ENOBUFS on drops */ #define DNOLD_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */ +#define DNOLD_IS_ECN 0x0040 #define DNOLD_IS_PIPE 0x4000 #define DNOLD_IS_QUEUE 0x8000 @@ -338,6 +339,8 @@ convertflags2new(int src) dst |= DN_IS_RED; if (src & DNOLD_IS_GENTLE_RED) dst |= DN_IS_GENTLE_RED; + if (src & DNOLD_IS_ECN) + dst |= DN_IS_ECN; if (src & DNOLD_HAS_PROFILE) dst |= DN_HAS_PROFILE; Modified: head/sys/netpfil/ipfw/ip_dn_io.c ============================================================================== --- head/sys/netpfil/ipfw/ip_dn_io.c Sun Jun 1 05:02:48 2014 (r266940) +++ head/sys/netpfil/ipfw/ip_dn_io.c Sun Jun 1 07:28:24 2014 (r266941) @@ -367,6 +367,8 @@ red_drops (struct dn_queue *q, int len) return (0); /* accept packet */ } if (q->avg >= fs->max_th) { /* average queue >= max threshold */ + if (fs->fs.flags & DN_IS_ECN) + return (1); if (fs->fs.flags & DN_IS_GENTLE_RED) { /* * According to Gentle-RED, if avg is greater than @@ -382,6 +384,8 @@ red_drops (struct dn_queue *q, int len) return (1); } } else if (q->avg > fs->min_th) { + if (fs->fs.flags & DN_IS_ECN) + return (1); /* * We compute p_b using the linear dropping function * p_b = c_1 * avg - c_2 @@ -414,6 +418,70 @@ red_drops (struct dn_queue *q, int len) } /* + * ECN/ECT Processing (partially adopted from altq) + */ +static int +ecn_mark(struct mbuf* m) +{ + struct ip *ip; + ip = mtod(m, struct ip *); + + switch (ip->ip_v) { + case IPVERSION: + { + u_int8_t otos; + int sum; + + if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) + return (0); /* not-ECT */ + if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) + return (1); /* already marked */ + + /* + * ecn-capable but not marked, + * mark CE and update checksum + */ + otos = ip->ip_tos; + ip->ip_tos |= IPTOS_ECN_CE; + /* + * update checksum (from RFC1624) + * HC' = ~(~HC + ~m + m') + */ + sum = ~ntohs(ip->ip_sum) & 0xffff; + sum += (~otos & 0xffff) + ip->ip_tos; + sum = (sum >> 16) + (sum & 0xffff); + sum += (sum >> 16); /* add carry */ + ip->ip_sum = htons(~sum & 0xffff); + return (1); + } +#ifdef INET6 + case (IPV6_VERSION >> 4): + { + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + u_int32_t flowlabel; + + flowlabel = ntohl(ip6->ip6_flow); + if ((flowlabel >> 28) != 6) + return (0); /* version mismatch! */ + if ((flowlabel & (IPTOS_ECN_MASK << 20)) == + (IPTOS_ECN_NOTECT << 20)) + return (0); /* not-ECT */ + if ((flowlabel & (IPTOS_ECN_MASK << 20)) == + (IPTOS_ECN_CE << 20)) + return (1); /* already marked */ + /* + * ecn-capable but not marked, mark CE + */ + flowlabel |= (IPTOS_ECN_CE << 20); + ip6->ip6_flow = htonl(flowlabel); + return (1); + } +#endif + } + return (0); +} + +/* * Enqueue a packet in q, subject to space and queue management policy * (whose parameters are in q->fs). * Update stats for the queue and the scheduler. @@ -444,8 +512,10 @@ dn_enqueue(struct dn_queue *q, struct mb goto drop; if (f->plr && random() < f->plr) goto drop; - if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len)) - goto drop; + if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len)) { + if (!(f->flags & DN_IS_ECN) || !ecn_mark(m)) + goto drop; + } if (f->flags & DN_QSIZE_BYTES) { if (q->ni.len_bytes > f->qsize) goto drop; @@ -457,14 +527,14 @@ dn_enqueue(struct dn_queue *q, struct mb q->ni.len_bytes += len; ni->length++; ni->len_bytes += len; - return 0; + return (0); drop: io_pkt_drop++; q->ni.drops++; ni->drops++; FREE_PKT(m); - return 1; + return (1); } /* Modified: head/sys/netpfil/ipfw/ip_dummynet.c ============================================================================== --- head/sys/netpfil/ipfw/ip_dummynet.c Sun Jun 1 05:02:48 2014 (r266940) +++ head/sys/netpfil/ipfw/ip_dummynet.c Sun Jun 1 07:28:24 2014 (r266941) @@ -1071,7 +1071,10 @@ config_red(struct dn_fsk *fs) fs->min_th = SCALE(fs->fs.min_th); fs->max_th = SCALE(fs->fs.max_th); - fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th); + if (fs->fs.max_th == fs->fs.min_th) + fs->c_1 = fs->max_p; + else + fs->c_1 = SCALE((int64_t)(fs->max_p)) / (fs->fs.max_th - fs->fs.min_th); fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th)); if (fs->fs.flags & DN_IS_GENTLE_RED) {