mirror of
https://git.FreeBSD.org/src.git
synced 2024-12-16 10:20:30 +00:00
A number of performance-reducing flaws fixed based on comments
from Larry Peterson &co. at Arizona: - Header prediction for ACKs did not exclude Fast Retransmit/Recovery. - srtt calculation tended to get ``stuck'' and could never decrease when below 8. It still can't, but the scaling factors are adjusted so that this artifact does not cause as bad an effect on the RTO value as it used to. The paper also points out the incr/8 error that has been long since fixed, and the problems with ACKing frequency resulting from the use of options which I suspect to be fixed already as well (as part of the T/TCP work). Obtained from: Brakmo & Peterson, ``Performance Problems in BSD4.4 TCP''
This commit is contained in:
parent
90c9787151
commit
233e8c18e8
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=14753
@ -31,7 +31,7 @@
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
|
||||
* $Id: tcp_input.c,v 1.37 1996/02/26 21:47:10 guido Exp $
|
||||
* $Id: tcp_input.c,v 1.38 1996/03/11 15:13:29 davidg Exp $
|
||||
*/
|
||||
|
||||
#ifndef TUBA_INCLUDE
|
||||
@ -492,7 +492,8 @@ tcp_input(m, iphlen)
|
||||
if (ti->ti_len == 0) {
|
||||
if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
|
||||
SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
|
||||
tp->snd_cwnd >= tp->snd_wnd) {
|
||||
tp->snd_cwnd >= tp->snd_wnd &&
|
||||
tp->t_dupacks < tcprexmtthresh) {
|
||||
/*
|
||||
* this is a pure ack for outstanding data.
|
||||
*/
|
||||
@ -1257,7 +1258,7 @@ tcp_input(m, iphlen)
|
||||
* If the congestion window was inflated to account
|
||||
* for the other side's cached packets, retract it.
|
||||
*/
|
||||
if (tp->t_dupacks > tcprexmtthresh &&
|
||||
if (tp->t_dupacks >= tcprexmtthresh &&
|
||||
tp->snd_cwnd > tp->snd_ssthresh)
|
||||
tp->snd_cwnd = tp->snd_ssthresh;
|
||||
tp->t_dupacks = 0;
|
||||
@ -1819,6 +1820,7 @@ tcp_xmit_timer(tp, rtt)
|
||||
register struct tcpcb *tp;
|
||||
short rtt;
|
||||
{
|
||||
#ifdef notdef
|
||||
register short delta;
|
||||
|
||||
tcpstat.tcps_rttupdated++;
|
||||
@ -1858,6 +1860,50 @@ tcp_xmit_timer(tp, rtt)
|
||||
tp->t_srtt = rtt << TCP_RTT_SHIFT;
|
||||
tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
|
||||
}
|
||||
#else /* Peterson paper */
|
||||
register int delta;
|
||||
|
||||
tcpstat.tcps_rttupdated++;
|
||||
tp->t_rttupdated++;
|
||||
if (tp->t_srtt != 0) {
|
||||
/*
|
||||
* srtt is stored as fixed point with 5 bits after the
|
||||
* binary point (i.e., scaled by 8). The following magic
|
||||
* is equivalent to the smoothing algorithm in rfc793 with
|
||||
* an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
|
||||
* point). Adjust rtt to origin 0.
|
||||
*/
|
||||
delta = ((rtt - 1) << TCP_DELTA_SHIFT)
|
||||
- (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
|
||||
|
||||
if ((tp->t_srtt += delta) <= 0)
|
||||
tp->t_srtt = 1;
|
||||
|
||||
/*
|
||||
* We accumulate a smoothed rtt variance (actually, a
|
||||
* smoothed mean difference), then set the retransmit
|
||||
* timer to smoothed rtt + 4 times the smoothed variance.
|
||||
* rttvar is stored as fixed point with 4 bits after the
|
||||
* binary point (scaled by 16). The following is
|
||||
* equivalent to rfc793 smoothing with an alpha of .75
|
||||
* (rttvar = rttvar*3/4 + |delta| / 4). This replaces
|
||||
* rfc793's wired-in beta.
|
||||
*/
|
||||
if (delta < 0)
|
||||
delta = -delta;
|
||||
delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
|
||||
if ((tp->t_rttvar += delta) <= 0)
|
||||
tp->t_rttvar = 1;
|
||||
} else {
|
||||
/*
|
||||
* No rtt measurement yet - use the unsmoothed rtt.
|
||||
* Set the variance to half the rtt (so our first
|
||||
* retransmit happens at 3*rtt).
|
||||
*/
|
||||
tp->t_srtt = rtt << TCP_RTT_SHIFT;
|
||||
tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
|
||||
}
|
||||
#endif
|
||||
tp->t_rtt = 0;
|
||||
tp->t_rxtshift = 0;
|
||||
|
||||
@ -1872,8 +1918,13 @@ tcp_xmit_timer(tp, rtt)
|
||||
* statistical, we have to test that we don't drop below
|
||||
* the minimum feasible timer (which is 2 ticks).
|
||||
*/
|
||||
#ifdef notdef
|
||||
TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
|
||||
tp->t_rttmin, TCPTV_REXMTMAX);
|
||||
#else /* Peterson */
|
||||
TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
|
||||
max(tp->t_rttmin, TCPTV_MIN + rtt - 1), TCPTV_REXMTMAX);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We received an ack for a packet that wasn't retransmitted;
|
||||
|
@ -31,7 +31,7 @@
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
|
||||
* $Id: tcp_input.c,v 1.37 1996/02/26 21:47:10 guido Exp $
|
||||
* $Id: tcp_input.c,v 1.38 1996/03/11 15:13:29 davidg Exp $
|
||||
*/
|
||||
|
||||
#ifndef TUBA_INCLUDE
|
||||
@ -492,7 +492,8 @@ tcp_input(m, iphlen)
|
||||
if (ti->ti_len == 0) {
|
||||
if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
|
||||
SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
|
||||
tp->snd_cwnd >= tp->snd_wnd) {
|
||||
tp->snd_cwnd >= tp->snd_wnd &&
|
||||
tp->t_dupacks < tcprexmtthresh) {
|
||||
/*
|
||||
* this is a pure ack for outstanding data.
|
||||
*/
|
||||
@ -1257,7 +1258,7 @@ tcp_input(m, iphlen)
|
||||
* If the congestion window was inflated to account
|
||||
* for the other side's cached packets, retract it.
|
||||
*/
|
||||
if (tp->t_dupacks > tcprexmtthresh &&
|
||||
if (tp->t_dupacks >= tcprexmtthresh &&
|
||||
tp->snd_cwnd > tp->snd_ssthresh)
|
||||
tp->snd_cwnd = tp->snd_ssthresh;
|
||||
tp->t_dupacks = 0;
|
||||
@ -1819,6 +1820,7 @@ tcp_xmit_timer(tp, rtt)
|
||||
register struct tcpcb *tp;
|
||||
short rtt;
|
||||
{
|
||||
#ifdef notdef
|
||||
register short delta;
|
||||
|
||||
tcpstat.tcps_rttupdated++;
|
||||
@ -1858,6 +1860,50 @@ tcp_xmit_timer(tp, rtt)
|
||||
tp->t_srtt = rtt << TCP_RTT_SHIFT;
|
||||
tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
|
||||
}
|
||||
#else /* Peterson paper */
|
||||
register int delta;
|
||||
|
||||
tcpstat.tcps_rttupdated++;
|
||||
tp->t_rttupdated++;
|
||||
if (tp->t_srtt != 0) {
|
||||
/*
|
||||
* srtt is stored as fixed point with 5 bits after the
|
||||
* binary point (i.e., scaled by 8). The following magic
|
||||
* is equivalent to the smoothing algorithm in rfc793 with
|
||||
* an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
|
||||
* point). Adjust rtt to origin 0.
|
||||
*/
|
||||
delta = ((rtt - 1) << TCP_DELTA_SHIFT)
|
||||
- (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
|
||||
|
||||
if ((tp->t_srtt += delta) <= 0)
|
||||
tp->t_srtt = 1;
|
||||
|
||||
/*
|
||||
* We accumulate a smoothed rtt variance (actually, a
|
||||
* smoothed mean difference), then set the retransmit
|
||||
* timer to smoothed rtt + 4 times the smoothed variance.
|
||||
* rttvar is stored as fixed point with 4 bits after the
|
||||
* binary point (scaled by 16). The following is
|
||||
* equivalent to rfc793 smoothing with an alpha of .75
|
||||
* (rttvar = rttvar*3/4 + |delta| / 4). This replaces
|
||||
* rfc793's wired-in beta.
|
||||
*/
|
||||
if (delta < 0)
|
||||
delta = -delta;
|
||||
delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
|
||||
if ((tp->t_rttvar += delta) <= 0)
|
||||
tp->t_rttvar = 1;
|
||||
} else {
|
||||
/*
|
||||
* No rtt measurement yet - use the unsmoothed rtt.
|
||||
* Set the variance to half the rtt (so our first
|
||||
* retransmit happens at 3*rtt).
|
||||
*/
|
||||
tp->t_srtt = rtt << TCP_RTT_SHIFT;
|
||||
tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
|
||||
}
|
||||
#endif
|
||||
tp->t_rtt = 0;
|
||||
tp->t_rxtshift = 0;
|
||||
|
||||
@ -1872,8 +1918,13 @@ tcp_xmit_timer(tp, rtt)
|
||||
* statistical, we have to test that we don't drop below
|
||||
* the minimum feasible timer (which is 2 ticks).
|
||||
*/
|
||||
#ifdef notdef
|
||||
TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
|
||||
tp->t_rttmin, TCPTV_REXMTMAX);
|
||||
#else /* Peterson */
|
||||
TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
|
||||
max(tp->t_rttmin, TCPTV_MIN + rtt - 1), TCPTV_REXMTMAX);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We received an ack for a packet that wasn't retransmitted;
|
||||
|
@ -31,7 +31,7 @@
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
|
||||
* $Id: tcp_var.h,v 1.29 1996/02/26 21:47:13 guido Exp $
|
||||
* $Id: tcp_var.h,v 1.30 1996/02/27 15:12:53 bde Exp $
|
||||
*/
|
||||
|
||||
#ifndef _NETINET_TCP_VAR_H_
|
||||
@ -191,10 +191,18 @@ struct rmxp_tao {
|
||||
* and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the
|
||||
* binary point, and is smoothed with an ALPHA of 0.75.
|
||||
*/
|
||||
#ifdef notdef
|
||||
#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */
|
||||
#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */
|
||||
#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */
|
||||
#define TCP_RTTVAR_SHIFT 2 /* shift for rttvar; 2 bits */
|
||||
#else
|
||||
#define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */
|
||||
#define TCP_RTT_SHIFT 5 /* shift for srtt; 3 bits frac. */
|
||||
#define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 2 bits */
|
||||
#define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 2 bits */
|
||||
#define TCP_DELTA_SHIFT 2 /* see tcp_input.c */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The initial retransmission should happen at rtt + 4 * rttvar.
|
||||
@ -206,11 +214,25 @@ struct rmxp_tao {
|
||||
* 1.5 tick we need. But, because the bias is
|
||||
* statistical, we have to test that we don't drop below
|
||||
* the minimum feasible timer (which is 2 ticks).
|
||||
#ifdef notdef
|
||||
* This macro assumes that the value of TCP_RTTVAR_SCALE
|
||||
* is the same as the multiplier for rttvar.
|
||||
#else
|
||||
* This version of the macro adapted from a paper by Lawrence
|
||||
* Brakmo and Larry Peterson which outlines a problem caused
|
||||
* by insufficient precision in the original implementation,
|
||||
* which results in inappropriately large RTO values for very
|
||||
* fast networks.
|
||||
#endif
|
||||
*/
|
||||
#ifdef notdef
|
||||
#define TCP_REXMTVAL(tp) \
|
||||
(((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar)
|
||||
#else
|
||||
#define TCP_REXMTVAL(tp) \
|
||||
((((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_RTTVAR_SHIFT)) \
|
||||
+ ((tp)->t_rttvar) >> TCP_RTTVAR_SHIFT))
|
||||
#endif
|
||||
|
||||
/* XXX
|
||||
* We want to avoid doing m_pullup on incoming packets but that
|
||||
|
Loading…
Reference in New Issue
Block a user