Date: Thu, 11 Feb 2010 15:03:56 +0000 (UTC) From: Lawrence Stewart <lstewart@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r203779 - projects/tcp_cc_head/sys/netinet Message-ID: <201002111503.o1BF3uUq007997@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: lstewart Date: Thu Feb 11 15:03:56 2010 New Revision: 203779 URL: http://svn.freebsd.org/changeset/base/203779 Log: Import David's integration work on the vegas and hd CC algorithms. Modified: projects/tcp_cc_head/sys/netinet/cc_hd.c projects/tcp_cc_head/sys/netinet/cc_vegas.c Modified: projects/tcp_cc_head/sys/netinet/cc_hd.c ============================================================================== --- projects/tcp_cc_head/sys/netinet/cc_hd.c Thu Feb 11 14:45:00 2010 (r203778) +++ projects/tcp_cc_head/sys/netinet/cc_hd.c Thu Feb 11 15:03:56 2010 (r203779) @@ -1,11 +1,14 @@ /*- * Copyright (c) 2009-2010 * Swinburne University of Technology, Melbourne, Australia + * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org> + * All rights reserved. * * This software was developed at the Centre for Advanced Internet * Architectures, Swinburne University, by David Hayes and Lawrence Stewart, - * made possible in part by a grant from the Cisco University Research Program - * Fund at Community Foundation Silicon Valley. + * made possible in part by grants from the FreeBSD Foundation and + * Cisco University Research Program Fund at Community Foundation + * Silicon Valley. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -43,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/kernel.h> +#include <sys/limits.h> #include <sys/lock.h> #include <sys/malloc.h> #include <sys/module.h> @@ -56,64 +60,87 @@ __FBSDID("$FreeBSD$"); #include <netinet/cc.h> #include <netinet/cc_module.h> +#include <netinet/h_ertt.h>i +#include <netinet/helper.h> #include <netinet/tcp_seq.h> #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> #define CAST_PTR_INT(X) (*((int*)(X))) +int hd_mod_init(void); +void hd_pre_fr(struct tcpcb *tp, struct tcphdr *th); +void hd_post_fr(struct tcpcb *tp, struct tcphdr *th); +void hd_ack_received(struct tcpcb *tp, struct tcphdr *th); + struct cc_algo hd_cc_algo = { .name = "hd", .mod_init = hd_mod_init, - .pre_fr = hd_pre_fr, - .post_fr = hd_post_fr, + .ack_received = hd_ack_received + /* the rest behaves as newreno */ + /* XXXLAS: Need to explicitly initialise to newreno funcs in mod_init */ }; +static VNET_DEFINE(uint32_t, hd_qthresh); +static VNET_DEFINE(uint32_t, hd_qmin); +static VNET_DEFINE(uint32_t, hd_pmax); +static VNET_DEFINE(int, ertt_id); + +#define V_hd_qthresh VNET(hd_qthresh) +#define V_hd_qmin VNET(hd_qmin) +#define V_hd_pmax VNET(hd_pmax) +#define V_ertt_id VNET(ertt_id) + static int hd_qthresh_handler(SYSCTL_HANDLER_ARGS) { - INIT_VNET_INET(TD_TO_VNET(req->td)); - int error, new; - - new = V_delaycc_queue_thresh; - error = sysctl_handle_int(oidp, &new, 0, req); - if (error == 0 && req->newptr) { - if (1000*new < hz) /* if less than kernel tick rate */ - error = EINVAL; - else - V_delaycc_queue_thresh = new*hz/1000; /* number of kernel ticks */ + if (req->newptr != NULL) { + if (CAST_PTR_INT(req->newptr) < 1 || CAST_PTR_INT(req->newptr) < V_hd_qmin) + return (EINVAL); } - return (error); + return sysctl_handle_int(oidp, arg1, arg2, req); + + /* INIT_VNET_INET(TD_TO_VNET(req->td)); */ + /* int error, new; */ + + /* new = V_hd_qthresh; */ + /* error = sysctl_handle_int(oidp, &new, 0, req); */ + /* if (error == 0 && req->newptr) { */ + /* if (new*hz < 1000) /\* if less than kernel tick rate *\/ */ + /* error = EINVAL; */ + /* else */ + /* V_hd_qthresh = new*hz/1000; /\* number of kernel ticks *\/ */ + /* } */ + /* return (error); */ } static int hd_qmin_handler(SYSCTL_HANDLER_ARGS) { - if(req-V>newptr != NULL) { - if(CAST_PTR_INT(req->newptr) < 1) + if (req->newptr != NULL) { + if (CAST_PTR_INT(req->newptr) > V_hd_qthresh) return (EINVAL); } - return sysctl_handle_int(oidp, arg1, arg2, req); -} + /* INIT_VNET_INET(TD_TO_VNET(req->td)); */ + /* int error, new; */ -static int -hd_pmax_handler(SYSCTL_HANDLER_ARGS) -{ - if(req->newptr != NULL) { - if(CAST_PTR_INT(req->newptr) == 0 || - CAST_PTR_INT(req->newptr) > 100) - return (EINVAL); - } - - return sysctl_handle_int(oidp, arg1, arg2, req); + /* new = V_hd_qmin; */ + /* error = sysctl_handle_int(oidp, &new, 0, req); */ + /* if (error == 0 && req->newptr) { */ + /* if (1000*new < hz) /\* if less than kernel tick rate *\/ */ + /* error = EINVAL; */ + /* else */ + /* V_hd_qmin = new*hz/1000; /\* number of kernel ticks *\/ */ + /* } */ + /* return (error); */ } static int -hd_wnd_backoff_handler(SYSCTL_HANDLER_ARGS) +hd_pmax_handler(SYSCTL_HANDLER_ARGS) { if(req->newptr != NULL) { if(CAST_PTR_INT(req->newptr) == 0 || @@ -124,120 +151,73 @@ hd_wnd_backoff_handler(SYSCTL_HANDLER_AR return sysctl_handle_int(oidp, arg1, arg2, req); } -/* Modifications to impliment the Hamilton delay based congestion control - algorithm -- David Hayes The key differences between delay_tcp_congestion_exp - and tcp_congestion_exp are: - - 1. instead of ssthresh being set to half cwnd, it is set to: - - delta * minrtt/rtt * cwnd. - - The basic back off factor is the ratio between the current measured rtt - and the lowest measured rtt. To ensure a good minrtt measurment, this - is modified by 0 < delta < 1 . Delta is the window_backoff_modifier/100. - (see D.Leith, R.Shorten, J.Heffner, L.Dunn, F.Baker - Delay-based AIMD Congestion Control, Proc. PFLDnet 2007) - - 2. snd_cwnd = snd_ssthresh. Since no packet has been lost - the normal fast recovery mechanism is not necessary. -*/ -/* invbeta*8 for interger arithmetic */ -static void inline -beta_tcp_congestion_exp(struct tcpcb *tp, int invbeta) -{ - u_int win; - if (invbeta < 8 || invbeta > 16) - invbeta=16; /* for safety, must reduce but not by more than 1/2 */ - - win = min(tp->snd_wnd, tp->snd_cwnd) * 8/ - invbeta / tp->t_maxseg; - if (win < 2) - win = 2; - tp->snd_ssthresh = win * tp->t_maxseg; - tp->snd_recover = tp->snd_max; - if (tp->t_flags & TF_ECN_PERMIT) - tp->t_flags |= TF_ECN_SND_CWR; - tp->snd_cwnd = tp->snd_ssthresh; - ENTER_DELAYRATERECOVERY(tp); -} /* Hamilto backoff function (see reference below) */ static int inline prob_backoff_func(int Qdly, int maxQdly) { - int p; - if (Qdly < V_delaycc_queue_thresh) - p = INT_MAX / 100 * V_delaycc_pmax - / (V_delaycc_queue_thresh - V_delaycc_queue_min) - * (Qdly - V_delaycc_queue_min); - else - if (Qdly > V_delaycc_queue_thresh) - p = INT_MAX / 100 * V_delaycc_pmax - / (maxQdly - V_delaycc_queue_thresh) - * (maxQdly - Qdly); - else - p = INT_MAX / 100 * V_delaycc_pmax; - return(p); + int p; + if (Qdly < V_hd_qthresh) + p = INT_MAX / 100 * V_hd_pmax + / (V_hd_qthresh - V_hd_qmin) + * (Qdly - V_hd_qmin); + else + if (Qdly > V_hd_qthresh) + p = INT_MAX / 100 * V_hd_pmax + / (maxQdly - V_hd_qthresh) + * (maxQdly - Qdly); + else + p = INT_MAX / 100 * V_hd_pmax; + return(p); } -/* half cwnd backoff - David Hayes */V +/* half cwnd backoff */ +/* XXXLAS: I don't think we need this. */ static void inline -tcp_congestion_exp(struct tcpcb *tp) +hd_congestion_exp(struct tcpcb *tp) { - u_int win, decr; - win = tp->snd_cwnd/tp->t_maxseg; - decr = win>>2; - win -= decr; - if (win < 2) - win = 2; - tp->snd_ssthresh = win * tp->t_maxseg; - tp->snd_recover = tp->snd_max; - if (tp->t_flags & TF_ECN_PERMIT) - tp->t_flags |= TF_ECN_SND_CWR; - tp->snd_cwnd = tp->snd_ssthresh; - ENTER_DELAYRATERECOVERY(tp); -} - -/* Hamilton delay based congestion control detection and response - David Hayes*/ -void -hamilton_delay_congestion(struct tcpcb *tp) + u_int win, decr; + win = min(tp->snd_wnd, tp->snd_cwnd) / tp->t_maxseg; + decr = win>>1; + win -= decr; + if (win < 2) + win = 2; + tp->snd_ssthresh = win * tp->t_maxseg; + tp->snd_recover = tp->snd_max; + if (tp->t_flags & TF_ECN_PERMIT) + tp->t_flags |= TF_ECN_SND_CWR; + tp->snd_cwnd = tp->snd_ssthresh; +} + +/* Hamilton delay based congestion control detection and response */ + void +hd_ack_received(struct tcpcb *tp, struct tcphdr *th) { - if (!IN_DELAYRATERECOVERY(tp) && !IN_FASTRECOVERY(tp)) { - struct enhanced_timing *e_t; - e_t = &tp->e_t; - - - if (e_t->rtt && e_t->minrtt && V_delaycc_window_backoff_modifier - && (V_delaycc_queue_thresh > 0)) { - int Qdly = e_t->rtt - e_t->minrtt; - if (mod_tests & HD_ProbBackoff && (Qdly > V_delaycc_queue_min)) { - /* based on algorithm developed at the Hamilton Institute, Ireland - See Lukasz Budzisz, Rade Stanojevic, Robert Shorton and Fred Baker, - "A stratagy for fair coexistence of loss and delay-based congestion - control algorithms", to be published IEEE Communication Letters 2009 */ - int p; - p = prob_backoff_func(Qdly, e_t->maxrtt - e_t->minrtt); - if (random() < p) { - tcp_congestion_exp(tp); /* halve cwnd */ - } - } else { - /* test for congestion using measured rtt as an indicator */ - if ((e_t->rtt - e_t->minrtt) > V_delaycc_queue_thresh) { - /* 8 factor to add precision */ - int invbeta = e_t->rtt *800 / e_t->minrtt / V_delaycc_window_backoff_modifier; - beta_tcp_congestion_exp(tp, invbeta); + + struct ertt *e_t = (struct ertt *)get_helper_dblock(tp->dblocks, + tp->n_dblocks, V_ertt_id); + + if (e_t->rtt && e_t->minrtt && (V_hd_qthresh > 0)) { + int Qdly = e_t->rtt - e_t->minrtt; + if (Qdly > V_hd_qmin) { + /* based on algorithm developed at the Hamilton Institute, Ireland + See Lukasz Budzisz, Rade Stanojevic, Robert Shorton and Fred Baker, + "A stratagy for fair coexistence of loss and delay-based congestion + control algorithms", to be published IEEE Communication Letters 2009 */ + int p; + p = prob_backoff_func(Qdly, e_t->maxrtt - e_t->minrtt); + if (random() < p) { + hd_congestion_exp(tp); /* halve cwnd */ + } + } } - } - } - } } int hd_mod_init(void) { - hd_cc_algo.ack_received = newreno_cc_algo.ack_received; + V_ertt_id = get_helper_id("ertt"); return (0); } @@ -245,24 +225,20 @@ SYSCTL_DECL(_net_inet_tcp_cc_hd); SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, hd, CTLFLAG_RW, NULL, "Hamilton delay-based congestion control related settings"); -SYSCTL_OID(_net_inet_tcp_cc_hd, OID_AUTO, window_backoff_modifier, - CTLTYPE_UINT|CTLFLAG_RW, &hd_wnd_backoff_modifier, 0, - &hd_wnd_backoff_handler, "IU", - "percentage - When Hamilton delay based congestion control is used, this sets the percent modification to the multiplicative decrease factor"); - -SYSCTL_OID(_net_inet_tcp_cc_hd, OID_AUTO, queue_threshold - CTLTYPE_UINT|CTLFLAG_RW, &hd_qthresh, 0, +SYSCTL_OID(_net_inet_tcp_cc_hd, OID_AUTO, queue_threshold, + CTLTYPE_UINT|CTLFLAG_RW, &V_hd_qthresh, 20, &hd_qthresh_handler, "IU", - "Entered in milliseconds, but converted to kernel ticks - When Hamilton delay based congestion control is used, this sets the queueing congestion threshold"); + "Queueing congestion threshold in ticks"); -SYSCTL_OID(_net_inet_tcp_cc_hd, OID_AUTO, pmax - CTLTYPE_UINT|CTLFLAG_RW, &hd_pmax, 0, +SYSCTL_OID(_net_inet_tcp_cc_hd, OID_AUTO, pmax, + CTLTYPE_UINT|CTLFLAG_RW, &V_hd_pmax, 5, &hd_pmax_handler, "IU", - "percentage - When Hamilton delay based congestion control is used, this sets the minimum queueing delay for the probabilistic backoff function"); + "Per packet maximum backoff probability as a percentage"); -SYSCTL_OID(_net_inet_tcp_cc_hd, OID_AUTO, queue_min - CTLTYPE_UINT|CTLFLAG_RW, &hd_qmin, 0, +SYSCTL_OID(_net_inet_tcp_cc_hd, OID_AUTO, queue_min, + CTLTYPE_UINT|CTLFLAG_RW, &V_hd_qmin, 5, &hd_qmin_handler, "IU", - "Entered in milliseconds, but converted to kernel ticks - When Hamilton delay based congestion control is used, this sets the minimum queueing delay for the probabilistic backoff function"); + "Minimum queueing delay threshold in ticks"); DECLARE_CC_MODULE(hd, &hd_cc_algo); +MODULE_DEPEND(hd, ertt, 1, 1, 1); Modified: projects/tcp_cc_head/sys/netinet/cc_vegas.c ============================================================================== --- projects/tcp_cc_head/sys/netinet/cc_vegas.c Thu Feb 11 14:45:00 2010 (r203778) +++ projects/tcp_cc_head/sys/netinet/cc_vegas.c Thu Feb 11 15:03:56 2010 (r203779) @@ -1,11 +1,14 @@ /*- * Copyright (c) 2009-2010 * Swinburne University of Technology, Melbourne, Australia + * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org> + * All rights reserved. * * This software was developed at the Centre for Advanced Internet * Architectures, Swinburne University, by David Hayes and Lawrence Stewart, - * made possible in part by a grant from the Cisco University Research Program - * Fund at Community Foundation Silicon Valley. + * made possible in part by grants from the FreeBSD Foundation and + * Cisco University Research Program Fund at Community Foundation + * Silicon Valley. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -56,6 +59,8 @@ __FBSDID("$FreeBSD$"); #include <netinet/cc.h> #include <netinet/cc_module.h> +#include <netinet/h_ertt.h> +#include <netinet/helper.h> #include <netinet/tcp_seq.h> #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> @@ -63,26 +68,42 @@ __FBSDID("$FreeBSD$"); #define CAST_PTR_INT(X) (*((int*)(X))) int vegas_mod_init(void); -void vegas_pre_fr(struct tcpcb *tp, struct tcphdr *th); -void vegas_post_fr(struct tcpcb *tp, struct tcphdr *th); +int vegas_cb_init(struct tcpcb *tp); +void vegas_cb_destroy(struct tcpcb *tp); +void vegas_ack_received(struct tcpcb *tp, struct tcphdr *th); +void vegas_conn_init(struct tcpcb *tp); + +struct vegas { + int rtt_ctr; /*counts rtts for vegas slow start */ +}; + +MALLOC_DECLARE(M_VEGAS); +MALLOC_DEFINE(M_VEGAS, "vegas data", + "Per connection data required for the VEGAS congestion algorithm"); /* function pointers for various hooks into the TCP stack */ struct cc_algo vegas_cc_algo = { .name = "vegas", .mod_init = vegas_mod_init, - .pre_fr = vegas_pre_fr, - .post_fr = vegas_post_fr, + .ack_received = vegas_ack_received, + .cb_init = vegas_cb_init, + .cb_destroy = vegas_cb_destroy + /* newreno fastrecovery and timout mechanisms are used in this implementation */ }; -static uint32_t vegas_alpha = 1; -static uint32_t vegas_beta = 3; +static VNET_DEFINE(uint32_t, vegas_alpha); +static VNET_DEFINE(uint32_t, vegas_beta); +static VNET_DEFINE(int, ertt_id); +#define V_vegas_alpha VNET(vegas_alpha) +#define V_vegas_beta VNET(vegas_beta) +#define V_ertt_id VNET(ertt_id) static int vegas_alpha_handler(SYSCTL_HANDLER_ARGS) { if(req->newptr != NULL) { if(CAST_PTR_INT(req->newptr) < 1 || - CAST_PTR_INT(req->newptr) > vegas_beta) + CAST_PTR_INT(req->newptr) > V_vegas_beta) return (EINVAL); } @@ -94,7 +115,7 @@ vegas_beta_handler(SYSCTL_HANDLER_ARGS) { if(req->newptr != NULL) { if(CAST_PTR_INT(req->newptr) < 1 || - CAST_PTR_INT(req->newptr) <= vegas_alpha) + CAST_PTR_INT(req->newptr) < V_vegas_alpha) return (EINVAL); } @@ -102,38 +123,76 @@ vegas_beta_handler(SYSCTL_HANDLER_ARGS) } void -vegas_post_fr(struct tcpcb *tp, struct tcphdr *th) +vegas_ack_received(struct tcpcb *tp, struct tcphdr *th) { -/* struct enhanced_timing *e_t; - struct rateinfo *r_i; - e_t = &tp->e_t; - r_i = &e_t->r_i; - - if (!IN_FASTRECOVERY(tp) && (tp->e_t.flags & DRCC_NEW_MEASUREMENT)) { - - long diff = r_i->expected_tx_rate - r_i->actual_tx_rate; - if (diff < V_ratecc_vegas_alpha*tp->t_maxseg/e_t->minrtt) - tp->snd_cwnd = min(tp->snd_cwnd + tp->t_maxseg, TCP_MAXWIN<<tp->snd_scale); - else if (diff > V_ratecc_vegas_beta*tp->t_maxseg/e_t->minrtt) - tp->snd_cwnd = max(2*tp->t_maxseg,tp->snd_cwnd-tp->t_maxseg); - - e_t->flags &= ~DRCC_NEW_MEASUREMENT; + struct ertt *e_t = (struct ertt *) get_helper_dblock(tp->dblocks, + tp->n_dblocks, V_ertt_id); + struct vegas *vegas_data = CC_DATA(tp); + long expected_tx_rate, actual_tx_rate; + + if (!IN_FASTRECOVERY(tp) && (e_t->flags & ERTT_NEW_MEASUREMENT)) { + + expected_tx_rate = e_t->marked_snd_cwnd/e_t->minrtt; + actual_tx_rate = e_t->bytes_tx_in_marked_rtt/e_t->markedpkt_rtt; + + long ndiff = (expected_tx_rate - actual_tx_rate)*e_t->minrtt/tp->t_maxseg; + + + if (ndiff < V_vegas_alpha) + if (tp->snd_cwnd < tp->snd_ssthresh) { + vegas_data->rtt_ctr += 1; + if (vegas_data->rtt_ctr > 1) { + newreno_cc_algo.ack_received(tp, th); /* reno slow start every second RTT */ + vegas_data->rtt_ctr = 0; + } + } else { + tp->snd_cwnd = min(tp->snd_cwnd + tp->t_maxseg, TCP_MAXWIN<<tp->snd_scale); + } + else if (ndiff > V_vegas_beta) { + tp->snd_cwnd = max(2*tp->t_maxseg,tp->snd_cwnd-tp->t_maxseg); + if (tp->snd_cwnd < tp->snd_ssthresh) + tp->snd_ssthresh = tp->snd_cwnd; /* exit slow start */ + e_t->flags &= ~ERTT_NEW_MEASUREMENT; + } } -*/ } +/* Create struct to store VEGAS specific data */ +int +vegas_cb_init(struct tcpcb *tp) +{ + struct vegas *vegas_data; + + vegas_data = malloc(sizeof(struct vegas), M_VEGAS, M_NOWAIT); + + if (vegas_data == NULL) + return (ENOMEM); + + vegas_data->rtt_ctr = 1; + + CC_DATA(tp) = vegas_data; + + return (0); +} + +/* + * Free the struct used to store VEGAS specific data for the specified + * TCP control block. + */ void -vegas_pre_fr(struct tcpcb *tp, struct tcphdr *th) +vegas_cb_destroy(struct tcpcb *tp) { - //EXIT_RATE_AVOID(tp); - //EXIT_DELAYRATERECOVERY(tp); + if (CC_DATA(tp) != NULL) + free(CC_DATA(tp), M_VEGAS); } int vegas_mod_init(void) { - vegas_cc_algo.ack_received = newreno_cc_algo.ack_received; + V_vegas_alpha = 1; + V_vegas_beta = 3; + V_ertt_id = get_helper_id("ertt"); return (0); } @@ -142,13 +201,14 @@ SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, "VEGAS related settings"); SYSCTL_OID(_net_inet_tcp_cc_vegas, OID_AUTO, vegas_alpha, - CTLTYPE_UINT|CTLFLAG_RW, &vegas_alpha, 0, + CTLTYPE_UINT|CTLFLAG_RW, &V_vegas_alpha, 0, &vegas_alpha_handler, "IU", "vegas alpha parameter - Entered in terms of number \"buffers\" (0 < alpha < beta)"); SYSCTL_OID(_net_inet_tcp_cc_vegas, OID_AUTO, vegas_beta, - CTLTYPE_UINT|CTLFLAG_RW, &vegas_beta, 0, + CTLTYPE_UINT|CTLFLAG_RW, &V_vegas_beta, 0, &vegas_beta_handler, "IU", "vegas beta parameter - Entered in terms of number \"buffers\" (0 < alpha < beta)"); DECLARE_CC_MODULE(vegas, &vegas_cc_algo); +MODULE_DEPEND(vegas, ertt, 1, 1, 1);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201002111503.o1BF3uUq007997>