From owner-svn-src-all@FreeBSD.ORG Fri Feb 10 20:38:45 2012 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 7BFFB1065670; Fri, 10 Feb 2012 20:38:45 +0000 (UTC) (envelope-from tuexen@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 658CC8FC08; Fri, 10 Feb 2012 20:38:45 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q1AKcj40093298; Fri, 10 Feb 2012 20:38:45 GMT (envelope-from tuexen@svn.freebsd.org) Received: (from tuexen@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q1AKcjdY093293; Fri, 10 Feb 2012 20:38:45 GMT (envelope-from tuexen@svn.freebsd.org) Message-Id: <201202102038.q1AKcjdY093293@svn.freebsd.org> From: Michael Tuexen Date: Fri, 10 Feb 2012 20:38:45 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org X-SVN-Group: stable-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r231431 - stable/8/sys/netinet X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 10 Feb 2012 20:38:45 -0000 Author: tuexen Date: Fri Feb 10 20:38:44 2012 New Revision: 231431 URL: http://svn.freebsd.org/changeset/base/231431 Log: MFC r221460: Implement Resource Pooling V2 and an MPTCP like congestion control. Based on a patch received from Martin Becke. Modified: stable/8/sys/netinet/sctp.h stable/8/sys/netinet/sctp_cc_functions.c stable/8/sys/netinet/sctp_sysctl.h stable/8/sys/netinet/sctp_usrreq.c Directory Properties: stable/8/sys/ (props changed) stable/8/sys/amd64/include/xen/ (props changed) stable/8/sys/boot/ (props changed) stable/8/sys/cddl/contrib/opensolaris/ (props changed) stable/8/sys/contrib/dev/acpica/ (props changed) stable/8/sys/contrib/pf/ (props changed) stable/8/sys/dev/e1000/ (props changed) Modified: stable/8/sys/netinet/sctp.h ============================================================================== --- stable/8/sys/netinet/sctp.h Fri Feb 10 20:36:17 2012 (r231430) +++ stable/8/sys/netinet/sctp.h Fri Feb 10 20:38:44 2012 (r231431) @@ -265,6 +265,13 @@ struct sctp_paramhdr { #define SCTP_CC_OPT_USE_DCCC_ECN 0x00002001 #define SCTP_CC_OPT_STEADY_STEP 0x00002002 +#define SCTP_CMT_OFF 0 +#define SCTP_CMT_BASE 1 +#define SCTP_CMT_RPV1 2 +#define SCTP_CMT_RPV2 3 +#define SCTP_CMT_MPTCP 4 +#define SCTP_CMT_MAX SCTP_CMT_MPTCP + /* RS - Supported stream scheduling modules for pluggable * stream scheduling */ Modified: stable/8/sys/netinet/sctp_cc_functions.c ============================================================================== --- stable/8/sys/netinet/sctp_cc_functions.c Fri Feb 10 20:36:17 2012 (r231430) +++ stable/8/sys/netinet/sctp_cc_functions.c Fri Feb 10 20:38:44 2012 (r231431) @@ -47,6 +47,10 @@ #include __FBSDID("$FreeBSD$"); +#define SHIFT_MPTCP_MULTI_N 40 +#define SHIFT_MPTCP_MULTI_Z 16 +#define SHIFT_MPTCP_MULTI 8 + static void sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) { @@ -67,7 +71,8 @@ sctp_set_initial_cc_param(struct sctp_tc cwnd_in_mtu = assoc->max_burst; net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; } - if (stcb->asoc.sctp_cmt_on_off == 2) { + if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || + (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) { /* In case of resource pooling initialize appropriately */ net->cwnd /= assoc->numnets; if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) { @@ -91,14 +96,23 @@ sctp_cwnd_update_after_fr(struct sctp_tc { struct sctp_nets *net; uint32_t t_ssthresh, t_cwnd; + uint64_t t_ucwnd_sbw; /* MT FIXME: Don't compute this over and over again */ t_ssthresh = 0; t_cwnd = 0; - if (asoc->sctp_cmt_on_off == 2) { + t_ucwnd_sbw = 0; + if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) || + (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) { TAILQ_FOREACH(net, &asoc->nets, sctp_next) { t_ssthresh += net->ssthresh; t_cwnd += net->cwnd; + if (net->lastsa > 0) { + t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) net->lastsa; + } + } + if (t_ucwnd_sbw == 0) { + t_ucwnd_sbw = 1; } } /*- @@ -119,11 +133,37 @@ sctp_cwnd_update_after_fr(struct sctp_tc struct sctp_tmit_chunk *lchk; int old_cwnd = net->cwnd; - if (asoc->sctp_cmt_on_off == 2) { - net->ssthresh = (uint32_t) (((uint64_t) 4 * - (uint64_t) net->mtu * - (uint64_t) net->ssthresh) / - (uint64_t) t_ssthresh); + if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) || + (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) { + if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) { + net->ssthresh = (uint32_t) (((uint64_t) 4 * + (uint64_t) net->mtu * + (uint64_t) net->ssthresh) / + (uint64_t) t_ssthresh); + + } + if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2) { + uint32_t srtt; + + srtt = net->lastsa; + /* + * lastsa>>3; we don't need + * to devide ... + */ + if (srtt == 0) { + srtt = 1; + } + /* + * Short Version => Equal to + * Contel Version MBe + */ + net->ssthresh = (uint32_t) (((uint64_t) 4 * + (uint64_t) net->mtu * + (uint64_t) net->cwnd) / + ((uint64_t) srtt * + t_ucwnd_sbw)); + /* INCREASE FACTOR */ ; + } if ((net->cwnd > t_cwnd / 2) && (net->ssthresh < net->cwnd - t_cwnd / 2)) { net->ssthresh = net->cwnd - t_cwnd / 2; @@ -629,14 +669,47 @@ sctp_cwnd_update_after_sack_common(struc struct sctp_nets *net; int old_cwnd; uint32_t t_ssthresh, t_cwnd, incr; + uint64_t t_ucwnd_sbw; + uint64_t t_path_mptcp; + uint64_t mptcp_like_alpha; + uint32_t srtt; + uint64_t max_path; /* MT FIXME: Don't compute this over and over again */ t_ssthresh = 0; t_cwnd = 0; - if (stcb->asoc.sctp_cmt_on_off == 2) { + t_ucwnd_sbw = 0; + t_path_mptcp = 0; + mptcp_like_alpha = 1; + if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || + (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2) || + (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_MPTCP)) { + max_path = 0; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { t_ssthresh += net->ssthresh; t_cwnd += net->cwnd; + /* lastsa>>3; we don't need to devide ... */ + srtt = net->lastsa; + if (srtt > 0) { + uint64_t tmp; + + t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) srtt; + t_path_mptcp += (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_Z) / + (((uint64_t) net->mtu) * (uint64_t) srtt); + tmp = (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_N) / + ((uint64_t) net->mtu * (uint64_t) (srtt * srtt)); + if (tmp > max_path) { + max_path = tmp; + } + } + } + if (t_ucwnd_sbw == 0) { + t_ucwnd_sbw = 1; + } + if (t_path_mptcp > 0) { + mptcp_like_alpha = max_path / (t_path_mptcp * t_path_mptcp); + } else { + mptcp_like_alpha = 1; } } /******************************/ @@ -818,10 +891,11 @@ sctp_cwnd_update_after_sack_common(struc if (net->cwnd <= net->ssthresh) { /* We are in slow start */ if (net->flight_size + net->net_ack >= net->cwnd) { - old_cwnd = net->cwnd; - if (stcb->asoc.sctp_cmt_on_off == 2) { - uint32_t limit; + uint32_t limit; + old_cwnd = net->cwnd; + switch (asoc->sctp_cmt_on_off) { + case SCTP_CMT_RPV1: limit = (uint32_t) (((uint64_t) net->mtu * (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) * (uint64_t) net->ssthresh) / @@ -835,11 +909,56 @@ sctp_cwnd_update_after_sack_common(struc if (incr == 0) { incr = 1; } - } else { + break; + case SCTP_CMT_RPV2: + /* + * lastsa>>3; we don't need + * to divide ... + */ + srtt = net->lastsa; + if (srtt == 0) { + srtt = 1; + } + limit = (uint32_t) (((uint64_t) net->mtu * + (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) * + (uint64_t) net->cwnd) / + ((uint64_t) srtt * t_ucwnd_sbw)); + /* INCREASE FACTOR */ + incr = (uint32_t) (((uint64_t) net->net_ack * + (uint64_t) net->cwnd) / + ((uint64_t) srtt * t_ucwnd_sbw)); + /* INCREASE FACTOR */ + if (incr > limit) { + incr = limit; + } + if (incr == 0) { + incr = 1; + } + break; + case SCTP_CMT_MPTCP: + limit = (uint32_t) (((uint64_t) net->mtu * + mptcp_like_alpha * + (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) >> + SHIFT_MPTCP_MULTI); + incr = (uint32_t) (((uint64_t) net->net_ack * + mptcp_like_alpha) >> + SHIFT_MPTCP_MULTI); + if (incr > limit) { + incr = limit; + } + if (incr > net->net_ack) { + incr = net->net_ack; + } + if (incr > net->mtu) { + incr = net->mtu; + } + break; + default: incr = net->net_ack; if (incr > net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) { incr = net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable); } + break; } net->cwnd += incr; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { @@ -868,15 +987,44 @@ sctp_cwnd_update_after_sack_common(struc (net->partial_bytes_acked >= net->cwnd)) { net->partial_bytes_acked -= net->cwnd; old_cwnd = net->cwnd; - if (asoc->sctp_cmt_on_off == 2) { + switch (asoc->sctp_cmt_on_off) { + case SCTP_CMT_RPV1: incr = (uint32_t) (((uint64_t) net->mtu * (uint64_t) net->ssthresh) / (uint64_t) t_ssthresh); if (incr == 0) { incr = 1; } - } else { + break; + case SCTP_CMT_RPV2: + /* + * lastsa>>3; we don't need + * to divide ... + */ + srtt = net->lastsa; + if (srtt == 0) { + srtt = 1; + } + incr = (uint32_t) ((uint64_t) net->mtu * + (uint64_t) net->cwnd / + ((uint64_t) srtt * + t_ucwnd_sbw)); + /* INCREASE FACTOR */ + if (incr == 0) { + incr = 1; + } + break; + case SCTP_CMT_MPTCP: + incr = (uint32_t) ((mptcp_like_alpha * + (uint64_t) net->cwnd) >> + SHIFT_MPTCP_MULTI); + if (incr > net->mtu) { + incr = net->mtu; + } + break; + default: incr = net->mtu; + break; } net->cwnd += incr; SDT_PROBE(sctp, cwnd, net, ack, @@ -926,21 +1074,49 @@ sctp_cwnd_update_after_timeout(struct sc { int old_cwnd = net->cwnd; uint32_t t_ssthresh, t_cwnd; + uint64_t t_ucwnd_sbw; /* MT FIXME: Don't compute this over and over again */ t_ssthresh = 0; t_cwnd = 0; - if (stcb->asoc.sctp_cmt_on_off == 2) { + if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || + (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) { struct sctp_nets *lnet; + uint32_t srtt; + t_ucwnd_sbw = 0; TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) { t_ssthresh += lnet->ssthresh; t_cwnd += lnet->cwnd; + srtt = lnet->lastsa; + /* lastsa>>3; we don't need to divide ... */ + if (srtt > 0) { + t_ucwnd_sbw += (uint64_t) lnet->cwnd / (uint64_t) srtt; + } + } + if (t_ucwnd_sbw < 1) { + t_ucwnd_sbw = 1; + } + if (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) { + net->ssthresh = (uint32_t) (((uint64_t) 4 * + (uint64_t) net->mtu * + (uint64_t) net->ssthresh) / + (uint64_t) t_ssthresh); + } else { + uint64_t cc_delta; + + srtt = net->lastsa; + /* lastsa>>3; we don't need to divide ... */ + if (srtt == 0) { + srtt = 1; + } + cc_delta = t_ucwnd_sbw * (uint64_t) srtt / 2; + if (cc_delta < t_cwnd) { + net->ssthresh = (uint32_t) ((uint64_t) t_cwnd - cc_delta); + } else { + net->ssthresh = net->mtu; + } } - net->ssthresh = (uint32_t) (((uint64_t) 4 * - (uint64_t) net->mtu * - (uint64_t) net->ssthresh) / - (uint64_t) t_ssthresh); if ((net->cwnd > t_cwnd / 2) && (net->ssthresh < net->cwnd - t_cwnd / 2)) { net->ssthresh = net->cwnd - t_cwnd / 2; Modified: stable/8/sys/netinet/sctp_sysctl.h ============================================================================== --- stable/8/sys/netinet/sctp_sysctl.h Fri Feb 10 20:36:17 2012 (r231430) +++ stable/8/sys/netinet/sctp_sysctl.h Fri Feb 10 20:38:44 2012 (r231431) @@ -336,9 +336,9 @@ struct sctp_sysctl { /* cmt_on_off: CMT on/off flag */ #define SCTPCTL_CMT_ON_OFF_DESC "CMT settings" -#define SCTPCTL_CMT_ON_OFF_MIN 0 -#define SCTPCTL_CMT_ON_OFF_MAX 2 -#define SCTPCTL_CMT_ON_OFF_DEFAULT 0 +#define SCTPCTL_CMT_ON_OFF_MIN SCTP_CMT_OFF +#define SCTPCTL_CMT_ON_OFF_MAX SCTP_CMT_MAX +#define SCTPCTL_CMT_ON_OFF_DEFAULT SCTP_CMT_OFF /* EY - nr_sack_on_off: NR_SACK on/off flag */ #define SCTPCTL_NR_SACK_ON_OFF_DESC "NR_SACK on/off flag" Modified: stable/8/sys/netinet/sctp_usrreq.c ============================================================================== --- stable/8/sys/netinet/sctp_usrreq.c Fri Feb 10 20:36:17 2012 (r231430) +++ stable/8/sys/netinet/sctp_usrreq.c Fri Feb 10 20:38:44 2012 (r231431) @@ -2989,18 +2989,22 @@ sctp_setopt(struct socket *so, int optna SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { - stcb->asoc.sctp_cmt_on_off = av->assoc_value; - if (stcb->asoc.sctp_cmt_on_off > 2) { - stcb->asoc.sctp_cmt_on_off = 2; + if (av->assoc_value > SCTP_CMT_MAX) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } else { + stcb->asoc.sctp_cmt_on_off = av->assoc_value; } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - inp->sctp_cmt_on_off = av->assoc_value; - if (inp->sctp_cmt_on_off > 2) { - inp->sctp_cmt_on_off = 2; + if (av->assoc_value > SCTP_CMT_MAX) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } else { + SCTP_INP_WLOCK(inp); + inp->sctp_cmt_on_off = av->assoc_value; + SCTP_INP_WUNLOCK(inp); } - SCTP_INP_WUNLOCK(inp); } } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);