Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 8 Feb 2005 07:20:17 GMT
From:      Dan Nelson <dnelson@allantgroup.com>
To:        freebsd-bugs@FreeBSD.org
Subject:   Re: kern/75122: [PATCH] Incorrect inflight bandwidth calculation on first packet
Message-ID:  <200502080720.j187KGr5038302@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help
The following reply was made to PR kern/75122; it has been noted by GNATS.

From: Dan Nelson <dnelson@allantgroup.com>
To: Uwe Doering <gemini@geminix.org>
Cc: FreeBSD-gnats-submit@FreeBSD.org, freebsd-bugs@FreeBSD.org
Subject: Re: kern/75122: [PATCH] Incorrect inflight bandwidth calculation on	first packet
Date: Tue, 8 Feb 2005 01:15:22 -0600

 In the last episode (Dec 22), Dan Nelson said:
 > I am pretty sure only useful acks get to this point.  I've changed that
 > bit of code to just be
 > 
 > +       /*
 > +        * Ignore pure window update acks.
 > +        */
 > +       if (ack_seq == tp->t_bw_rtseq)
 > +               return;
 > 
 > and added another check for negative sequences that increments a sysctl
 > counter.  So far netstat -s has counted 567 out-of-order packets but my
 > counter is still at 0.
 
 And after 40 days of uptime on my mail/squid servers, the counter is
 still at zero, so the code works perfectly for me.  It'd be nice if
 this was committed before 5.4.  Final patch follows:
 
 Index: tcp_hostcache.c
 ===================================================================
 RCS file: /home/ncvs/src/sys/netinet/tcp_hostcache.c,v
 retrieving revision 1.7.2.1
 diff -u -p -r1.7.2.1 tcp_hostcache.c
 --- tcp_hostcache.c	31 Jan 2005 23:26:36 -0000	1.7.2.1
 +++ tcp_hostcache.c	8 Feb 2005 07:09:31 -0000
 @@ -175,7 +175,7 @@ SYSCTL_INT(_net_inet_tcp_hostcache, OID_
       &tcp_hostcache.purgeall, 0, "Expire all entires on next purge run");
  
  SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list,
 -	CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0,
 +	CTLTYPE_STRING | CTLFLAG_RD, 0, 0,
  	sysctl_tcp_hc_list, "A", "List of all hostcache entries");
  
  
 @@ -676,7 +676,7 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
  				(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
  			    msec(hc_entry->rmx_rttvar *
  				(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
 -			    hc_entry->rmx_bandwidth * 8,
 +			    hc_entry->rmx_bandwidth,
  			    hc_entry->rmx_cwnd,
  			    hc_entry->rmx_sendpipe,
  			    hc_entry->rmx_recvpipe,
 Index: tcp_subr.c
 ===================================================================
 RCS file: /home/ncvs/src/sys/netinet/tcp_subr.c,v
 retrieving revision 1.201.2.12
 diff -u -p -r1.201.2.12 tcp_subr.c
 --- tcp_subr.c	31 Jan 2005 23:26:36 -0000	1.201.2.12
 +++ tcp_subr.c	8 Feb 2005 07:09:33 -0000
 @@ -628,7 +632,6 @@ tcp_newtcpcb(inp)
  	tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
  	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
  	tp->t_rcvtime = ticks;
 -	tp->t_bw_rtttime = ticks;
  	/*
  	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
  	 * because the socket may be bound to an IPv6 wildcard address,
 @@ -1934,9 +1937,10 @@ tcp_twrespond(struct tcptw *tw, int flag
  void
  tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq)
  {
 -	u_long bw;
 +	u_long bw, avgbw;
  	u_long bwnd;
  	int save_ticks;
 +	int delta_ticks;
  
  	INP_LOCK_ASSERT(tp->t_inpcb);
  
 @@ -1951,29 +1955,50 @@ tcp_xmit_bandwidth_limit(struct tcpcb *t
  	}
  
  	/*
 +	  * Validate the delta time.  If a connection is new or has been idle
 +	  * a long time we have to reset the bandwidth calculator.
 +	  */
 +	save_ticks = ticks;
 +	delta_ticks = save_ticks - tp->t_bw_rtttime;
 +	if (tp->t_bw_rtttime == 0 || delta_ticks < 0 || delta_ticks > hz * 10) {
 +		tp->t_bw_rtttime = ticks;
 +		tp->t_bw_rtseq = ack_seq;
 +		if (tp->snd_bandwidth == 0)
 +			tp->snd_bandwidth = tcp_inflight_min;
 +		return;
 +	}
 +	if (delta_ticks == 0)
 +		return;
 +	
 +	/*
 +	 * Ignore pure window update acks.
 +	 */
 +	if (ack_seq == tp->t_bw_rtseq)
 +		return;
 +
 +	/* Sanity check */
 +	if (SEQ_LT(ack_seq, tp->t_bw_rtseq)) {
 +		if (tcp_inflight_debug > 0) {
 +			printf ("%p invalid sequence: %u vs %u",
 +				tp, ack_seq, tp->t_bw_rtseq);
 +		}
 +	}
 +	
 +	/*
  	 * Figure out the bandwidth.  Due to the tick granularity this
  	 * is a very rough number and it MUST be averaged over a fairly
  	 * long period of time.  XXX we need to take into account a link
  	 * that is not using all available bandwidth, but for now our
  	 * slop will ramp us up if this case occurs and the bandwidth later
  	 * increases.
 -	 *
 -	 * Note: if ticks rollover 'bw' may wind up negative.  We must
 -	 * effectively reset t_bw_rtttime for this case.
  	 */
 -	save_ticks = ticks;
 -	if ((u_int)(save_ticks - tp->t_bw_rtttime) < 1)
 -		return;
  
 -	bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz /
 -	    (save_ticks - tp->t_bw_rtttime);
 +	bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz / delta_ticks;
 +
  	tp->t_bw_rtttime = save_ticks;
  	tp->t_bw_rtseq = ack_seq;
 -	if (tp->t_bw_rtttime == 0 || (int)bw < 0)
 -		return;
 -	bw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4;
  
 -	tp->snd_bandwidth = bw;
 +	avgbw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4;
  
  	/*
  	 * Calculate the semi-static bandwidth delay product, plus two maximal
 @@ -2004,22 +2030,25 @@ tcp_xmit_bandwidth_limit(struct tcpcb *t
  	 *	    no other choice.
  	 */
  #define USERTT	((tp->t_srtt + tp->t_rttbest) / 2)
 -	bwnd = (int64_t)bw * USERTT / (hz << TCP_RTT_SHIFT) + tcp_inflight_stab * tp->t_maxseg / 10;
 +	bwnd = (int64_t)avgbw * USERTT / (hz << TCP_RTT_SHIFT) + tcp_inflight_stab * tp->t_maxseg / 10;
  #undef USERTT
  
  	if (tcp_inflight_debug > 0) {
  		static int ltime;
  		if ((u_int)(ticks - ltime) >= hz / tcp_inflight_debug) {
  			ltime = ticks;
 -			printf("%p bw %ld rttbest %d srtt %d bwnd %ld\n",
 +			printf("%p bw %lu(%lu,%lu) rttbest %d srtt %d bwnd %ld\n",
  			    tp,
 -			    bw,
 +			    avgbw, tp->snd_bandwidth, bw,
  			    tp->t_rttbest,
  			    tp->t_srtt,
  			    bwnd
  			);
  		}
  	}
 +	
 +	tp->snd_bandwidth = avgbw;
 +	
  	if ((long)bwnd < tcp_inflight_min)
  		bwnd = tcp_inflight_min;
  	if (bwnd > tcp_inflight_max)
 Index: tcp_usrreq.c
 ===================================================================
 RCS file: /home/ncvs/src/sys/netinet/tcp_usrreq.c,v
 retrieving revision 1.107.2.1
 diff -u -p -r1.107.2.1 tcp_usrreq.c
 --- tcp_usrreq.c	31 Jan 2005 23:26:37 -0000	1.107.2.1
 +++ tcp_usrreq.c	8 Feb 2005 07:09:34 -0000
 @@ -864,7 +864,6 @@ tcp_connect(tp, nam, td)
  	tp->t_state = TCPS_SYN_SENT;
  	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
  	tp->iss = tcp_new_isn(tp);
 -	tp->t_bw_rtseq = tp->iss;
  	tcp_sendseqinit(tp);
  
  	/*
 @@ -959,7 +958,6 @@ tcp6_connect(tp, nam, td)
  	tp->t_state = TCPS_SYN_SENT;
  	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
  	tp->iss = tcp_new_isn(tp);
 -	tp->t_bw_rtseq = tp->iss;
  	tcp_sendseqinit(tp);
  
  	/*
 
 
 -- 
 	Dan Nelson
 	dnelson@allantgroup.com



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200502080720.j187KGr5038302>