mirror of
https://git.FreeBSD.org/src.git
synced 2024-12-14 10:09:48 +00:00
Implement TCP bandwidth delay product window limiting, similar to (but
not meant to duplicate) TCP/Vegas. Add four sysctls and default the implementation to 'off'. net.inet.tcp.inflight_enable enable algorithm (defaults to 0=off) net.inet.tcp.inflight_debug debugging (defaults to 1=on) net.inet.tcp.inflight_min minimum window limit net.inet.tcp.inflight_max maximum window limit MFC after: 1 week
This commit is contained in:
parent
fecfd395b0
commit
1fcc99b5de
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=102017
@ -991,6 +991,7 @@ tcp_input(m, off0)
|
||||
SEQ_GT(th->th_ack, tp->t_rtseq))
|
||||
tcp_xmit_timer(tp,
|
||||
ticks - tp->t_rtttime);
|
||||
tcp_xmit_bandwidth_limit(tp, th->th_ack);
|
||||
acked = th->th_ack - tp->snd_una;
|
||||
tcpstat.tcps_rcvackpack++;
|
||||
tcpstat.tcps_rcvackbyte += acked;
|
||||
@ -1810,6 +1811,7 @@ tcp_input(m, off0)
|
||||
tcp_xmit_timer(tp, ticks - to.to_tsecr + 1);
|
||||
else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq))
|
||||
tcp_xmit_timer(tp, ticks - tp->t_rtttime);
|
||||
tcp_xmit_bandwidth_limit(tp, th->th_ack);
|
||||
|
||||
/*
|
||||
* If all outstanding data is acked, stop retransmit
|
||||
@ -2438,6 +2440,8 @@ tcp_xmit_timer(tp, rtt)
|
||||
delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
|
||||
if ((tp->t_rttvar += delta) <= 0)
|
||||
tp->t_rttvar = 1;
|
||||
if (tp->t_rttbest > tp->t_srtt + tp->t_rttvar)
|
||||
tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
|
||||
} else {
|
||||
/*
|
||||
* No rtt measurement yet - use the unsmoothed rtt.
|
||||
@ -2446,6 +2450,7 @@ tcp_xmit_timer(tp, rtt)
|
||||
*/
|
||||
tp->t_srtt = rtt << TCP_RTT_SHIFT;
|
||||
tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
|
||||
tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
|
||||
}
|
||||
tp->t_rtttime = 0;
|
||||
tp->t_rxtshift = 0;
|
||||
@ -2573,6 +2578,7 @@ tcp_mss(tp, offer)
|
||||
if (rt->rt_rmx.rmx_locks & RTV_RTT)
|
||||
tp->t_rttmin = rtt / (RTM_RTTUNIT / hz);
|
||||
tp->t_srtt = rtt / (RTM_RTTUNIT / (hz * TCP_RTT_SCALE));
|
||||
tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE;
|
||||
tcpstat.tcps_usedrtt++;
|
||||
if (rt->rt_rmx.rmx_rttvar) {
|
||||
tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
|
||||
|
@ -168,6 +168,7 @@ tcp_output(struct tcpcb *tp)
|
||||
sendalot = 0;
|
||||
off = tp->snd_nxt - tp->snd_una;
|
||||
win = min(tp->snd_wnd, tp->snd_cwnd);
|
||||
win = min(win, tp->snd_bwnd);
|
||||
|
||||
flags = tcp_outflags[tp->t_state];
|
||||
/*
|
||||
|
@ -991,6 +991,7 @@ tcp_input(m, off0)
|
||||
SEQ_GT(th->th_ack, tp->t_rtseq))
|
||||
tcp_xmit_timer(tp,
|
||||
ticks - tp->t_rtttime);
|
||||
tcp_xmit_bandwidth_limit(tp, th->th_ack);
|
||||
acked = th->th_ack - tp->snd_una;
|
||||
tcpstat.tcps_rcvackpack++;
|
||||
tcpstat.tcps_rcvackbyte += acked;
|
||||
@ -1810,6 +1811,7 @@ tcp_input(m, off0)
|
||||
tcp_xmit_timer(tp, ticks - to.to_tsecr + 1);
|
||||
else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq))
|
||||
tcp_xmit_timer(tp, ticks - tp->t_rtttime);
|
||||
tcp_xmit_bandwidth_limit(tp, th->th_ack);
|
||||
|
||||
/*
|
||||
* If all outstanding data is acked, stop retransmit
|
||||
@ -2438,6 +2440,8 @@ tcp_xmit_timer(tp, rtt)
|
||||
delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
|
||||
if ((tp->t_rttvar += delta) <= 0)
|
||||
tp->t_rttvar = 1;
|
||||
if (tp->t_rttbest > tp->t_srtt + tp->t_rttvar)
|
||||
tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
|
||||
} else {
|
||||
/*
|
||||
* No rtt measurement yet - use the unsmoothed rtt.
|
||||
@ -2446,6 +2450,7 @@ tcp_xmit_timer(tp, rtt)
|
||||
*/
|
||||
tp->t_srtt = rtt << TCP_RTT_SHIFT;
|
||||
tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
|
||||
tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
|
||||
}
|
||||
tp->t_rtttime = 0;
|
||||
tp->t_rxtshift = 0;
|
||||
@ -2573,6 +2578,7 @@ tcp_mss(tp, offer)
|
||||
if (rt->rt_rmx.rmx_locks & RTV_RTT)
|
||||
tp->t_rttmin = rtt / (RTM_RTTUNIT / hz);
|
||||
tp->t_srtt = rtt / (RTM_RTTUNIT / (hz * TCP_RTT_SCALE));
|
||||
tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE;
|
||||
tcpstat.tcps_usedrtt++;
|
||||
if (rt->rt_rmx.rmx_rttvar) {
|
||||
tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
|
||||
|
@ -146,6 +146,27 @@ static int tcp_isn_reseed_interval = 0;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW,
|
||||
&tcp_isn_reseed_interval, 0, "Seconds between reseeding of ISN secret");
|
||||
|
||||
/*
|
||||
* TCP bandwidth limiting sysctls. Note that the default lower bound of
|
||||
* 1024 exists only for debugging. A good production default would be
|
||||
* something like 6100.
|
||||
*/
|
||||
static int tcp_inflight_enable = 0;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_enable, CTLFLAG_RW,
|
||||
&tcp_inflight_enable, 0, "Enable automatic TCP inflight data limiting");
|
||||
|
||||
static int tcp_inflight_debug = 1;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_debug, CTLFLAG_RW,
|
||||
&tcp_inflight_debug, 0, "Debug TCP inflight calculations");
|
||||
|
||||
static int tcp_inflight_min = 1024;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_min, CTLFLAG_RW,
|
||||
&tcp_inflight_min, 0, "Lower-bound for TCP inflight window");
|
||||
|
||||
static int tcp_inflight_max = TCP_MAXWIN << TCP_MAX_WINSHIFT;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_max, CTLFLAG_RW,
|
||||
&tcp_inflight_max, 0, "Upper-bound for TCP inflight window");
|
||||
|
||||
static void tcp_cleartaocache(void);
|
||||
static struct inpcb *tcp_notify(struct inpcb *, int);
|
||||
|
||||
@ -566,8 +587,10 @@ tcp_newtcpcb(inp)
|
||||
tp->t_rttmin = tcp_rexmit_min;
|
||||
tp->t_rxtcur = TCPTV_RTOBASE;
|
||||
tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
|
||||
tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
|
||||
tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
|
||||
tp->t_rcvtime = ticks;
|
||||
tp->t_bw_rtttime = ticks;
|
||||
/*
|
||||
* IPv4 TTL initialization is necessary for an IPv6 socket as well,
|
||||
* because the socket may be bound to an IPv6 wildcard address,
|
||||
@ -1531,3 +1554,138 @@ static void
|
||||
tcp_cleartaocache()
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING
|
||||
*
|
||||
* This code attempts to calculate the bandwidth-delay product as a
|
||||
* means of determining the optimal window size to maximize bandwidth,
|
||||
* minimize RTT, and avoid the over-allocation of buffers on interfaces and
|
||||
* routers. This code also does a fairly good job keeping RTTs in check
|
||||
* across slow links like modems. We implement an algorithm which is very
|
||||
* similar (but not meant to be) TCP/Vegas. The code operates on the
|
||||
* transmitter side of a TCP connection and so only effects the transmit
|
||||
* side of the connection.
|
||||
*
|
||||
* BACKGROUND: TCP makes no provision for the management of buffer space
|
||||
* at the end points or at the intermediate routers and switches. A TCP
|
||||
* stream, whether using NewReno or not, will eventually buffer as
|
||||
* many packets as it is able and the only reason this typically works is
|
||||
* due to the fairly small default buffers made available for a connection
|
||||
* (typicaly 16K or 32K). As machines use larger windows and/or window
|
||||
* scaling it is now fairly easy for even a single TCP connection to blow-out
|
||||
* all available buffer space not only on the local interface, but on
|
||||
* intermediate routers and switches as well. NewReno makes a misguided
|
||||
* attempt to 'solve' this problem by waiting for an actual failure to occur,
|
||||
* then backing off, then steadily increasing the window again until another
|
||||
* failure occurs, ad-infinitum. This results in terrible oscillation that
|
||||
* is only made worse as network loads increase and the idea of intentionally
|
||||
* blowing out network buffers is, frankly, a terrible way to manage network
|
||||
* resources.
|
||||
*
|
||||
* It is far better to limit the transmit window prior to the failure
|
||||
* condition being achieved. There are two general ways to do this: First
|
||||
* you can 'scan' through different transmit window sizes and locate the
|
||||
* point where the RTT stops increasing, indicating that you have filled the
|
||||
* pipe, then scan backwards until you note that RTT stops decreasing, then
|
||||
* repeat ad-infinitum. This method works in principle but has severe
|
||||
* implementation issues due to RTT variances, timer granularity, and
|
||||
* instability in the algorithm which can lead to many false positives and
|
||||
* create oscillations as well as interact badly with other TCP streams
|
||||
* implementing the same algorithm.
|
||||
*
|
||||
* The second method is to limit the window to the bandwidth delay product
|
||||
* of the link. This is the method we implement. RTT variances and our
|
||||
* own manipulation of the congestion window, bwnd, can potentially
|
||||
* destabilize the algorithm. For this reason we have to stabilize the
|
||||
* elements used to calculate the window. We do this by using the minimum
|
||||
* observed RTT, the long term average of the observed bandwidth, and
|
||||
* by adding two segments worth of slop. It isn't perfect but it is able
|
||||
* to react to changing conditions and gives us a very stable basis on
|
||||
* which to extend the algorithm.
|
||||
*/
|
||||
void
|
||||
tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq)
|
||||
{
|
||||
u_long bw;
|
||||
u_long bwnd;
|
||||
int save_ticks;
|
||||
|
||||
/*
|
||||
* If inflight_enable is disabled in the middle of a tcp connection,
|
||||
* make sure snd_bwnd is effectively disabled.
|
||||
*/
|
||||
if (tcp_inflight_enable == 0) {
|
||||
tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
|
||||
tp->snd_bandwidth = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Figure out the bandwidth. Due to the tick granularity this
|
||||
* is a very rough number and it MUST be averaged over a fairly
|
||||
* long period of time. XXX we need to take into account a link
|
||||
* that is not using all available bandwidth, but for now our
|
||||
* slop will ramp us up if this case occurs and the bandwidth later
|
||||
* increases.
|
||||
*/
|
||||
save_ticks = ticks;
|
||||
if ((u_int)(save_ticks - tp->t_bw_rtttime) < 1)
|
||||
return;
|
||||
|
||||
bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz /
|
||||
(save_ticks - tp->t_bw_rtttime);
|
||||
tp->t_bw_rtttime = save_ticks;
|
||||
tp->t_bw_rtseq = ack_seq;
|
||||
if (tp->t_bw_rtttime == 0)
|
||||
return;
|
||||
bw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4;
|
||||
|
||||
tp->snd_bandwidth = bw;
|
||||
|
||||
/*
|
||||
* Calculate the semi-static bandwidth delay product, plus two maximal
|
||||
* segments. The additional slop puts us squarely in the sweet
|
||||
* spot and also handles the bandwidth run-up case. Without the
|
||||
* slop we could be locking ourselves into a lower bandwidth.
|
||||
*
|
||||
* Situations Handled:
|
||||
* (1) Prevents over-queueing of packets on LANs, especially on
|
||||
* high speed LANs, allowing larger TCP buffers to be
|
||||
* specified, and also does a good job preventing
|
||||
* over-queueing of packets over choke points like modems
|
||||
* (at least for the transmit side).
|
||||
*
|
||||
* (2) Is able to handle changing network loads (bandwidth
|
||||
* drops so bwnd drops, bandwidth increases so bwnd
|
||||
* increases).
|
||||
*
|
||||
* (3) Theoretically should stabilize in the face of multiple
|
||||
* connections implementing the same algorithm (this may need
|
||||
* a little work).
|
||||
*/
|
||||
#define USERTT ((tp->t_srtt + tp->t_rttbest) / 2)
|
||||
bwnd = (int64_t)bw * USERTT / (hz << TCP_RTT_SHIFT) + 2 * tp->t_maxseg;
|
||||
|
||||
if (tcp_inflight_debug > 0) {
|
||||
static int ltime;
|
||||
if ((u_int)(ticks - ltime) >= hz / tcp_inflight_debug) {
|
||||
ltime = ticks;
|
||||
printf("%p bw %ld rttbest %d srtt %d bwnd %ld\n",
|
||||
tp,
|
||||
bw,
|
||||
tp->t_rttbest,
|
||||
tp->t_srtt,
|
||||
bwnd
|
||||
);
|
||||
}
|
||||
}
|
||||
if ((long)bwnd < tcp_inflight_min)
|
||||
bwnd = tcp_inflight_min;
|
||||
if (bwnd > tcp_inflight_max)
|
||||
bwnd = tcp_inflight_max;
|
||||
if ((long)bwnd < tp->t_maxseg * 2)
|
||||
bwnd = tp->t_maxseg * 2;
|
||||
tp->snd_bwnd = bwnd;
|
||||
}
|
||||
|
||||
|
@ -146,6 +146,27 @@ static int tcp_isn_reseed_interval = 0;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW,
|
||||
&tcp_isn_reseed_interval, 0, "Seconds between reseeding of ISN secret");
|
||||
|
||||
/*
|
||||
* TCP bandwidth limiting sysctls. Note that the default lower bound of
|
||||
* 1024 exists only for debugging. A good production default would be
|
||||
* something like 6100.
|
||||
*/
|
||||
static int tcp_inflight_enable = 0;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_enable, CTLFLAG_RW,
|
||||
&tcp_inflight_enable, 0, "Enable automatic TCP inflight data limiting");
|
||||
|
||||
static int tcp_inflight_debug = 1;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_debug, CTLFLAG_RW,
|
||||
&tcp_inflight_debug, 0, "Debug TCP inflight calculations");
|
||||
|
||||
static int tcp_inflight_min = 1024;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_min, CTLFLAG_RW,
|
||||
&tcp_inflight_min, 0, "Lower-bound for TCP inflight window");
|
||||
|
||||
static int tcp_inflight_max = TCP_MAXWIN << TCP_MAX_WINSHIFT;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_max, CTLFLAG_RW,
|
||||
&tcp_inflight_max, 0, "Upper-bound for TCP inflight window");
|
||||
|
||||
static void tcp_cleartaocache(void);
|
||||
static struct inpcb *tcp_notify(struct inpcb *, int);
|
||||
|
||||
@ -566,8 +587,10 @@ tcp_newtcpcb(inp)
|
||||
tp->t_rttmin = tcp_rexmit_min;
|
||||
tp->t_rxtcur = TCPTV_RTOBASE;
|
||||
tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
|
||||
tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
|
||||
tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
|
||||
tp->t_rcvtime = ticks;
|
||||
tp->t_bw_rtttime = ticks;
|
||||
/*
|
||||
* IPv4 TTL initialization is necessary for an IPv6 socket as well,
|
||||
* because the socket may be bound to an IPv6 wildcard address,
|
||||
@ -1531,3 +1554,138 @@ static void
|
||||
tcp_cleartaocache()
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING
|
||||
*
|
||||
* This code attempts to calculate the bandwidth-delay product as a
|
||||
* means of determining the optimal window size to maximize bandwidth,
|
||||
* minimize RTT, and avoid the over-allocation of buffers on interfaces and
|
||||
* routers. This code also does a fairly good job keeping RTTs in check
|
||||
* across slow links like modems. We implement an algorithm which is very
|
||||
* similar (but not meant to be) TCP/Vegas. The code operates on the
|
||||
* transmitter side of a TCP connection and so only effects the transmit
|
||||
* side of the connection.
|
||||
*
|
||||
* BACKGROUND: TCP makes no provision for the management of buffer space
|
||||
* at the end points or at the intermediate routers and switches. A TCP
|
||||
* stream, whether using NewReno or not, will eventually buffer as
|
||||
* many packets as it is able and the only reason this typically works is
|
||||
* due to the fairly small default buffers made available for a connection
|
||||
* (typicaly 16K or 32K). As machines use larger windows and/or window
|
||||
* scaling it is now fairly easy for even a single TCP connection to blow-out
|
||||
* all available buffer space not only on the local interface, but on
|
||||
* intermediate routers and switches as well. NewReno makes a misguided
|
||||
* attempt to 'solve' this problem by waiting for an actual failure to occur,
|
||||
* then backing off, then steadily increasing the window again until another
|
||||
* failure occurs, ad-infinitum. This results in terrible oscillation that
|
||||
* is only made worse as network loads increase and the idea of intentionally
|
||||
* blowing out network buffers is, frankly, a terrible way to manage network
|
||||
* resources.
|
||||
*
|
||||
* It is far better to limit the transmit window prior to the failure
|
||||
* condition being achieved. There are two general ways to do this: First
|
||||
* you can 'scan' through different transmit window sizes and locate the
|
||||
* point where the RTT stops increasing, indicating that you have filled the
|
||||
* pipe, then scan backwards until you note that RTT stops decreasing, then
|
||||
* repeat ad-infinitum. This method works in principle but has severe
|
||||
* implementation issues due to RTT variances, timer granularity, and
|
||||
* instability in the algorithm which can lead to many false positives and
|
||||
* create oscillations as well as interact badly with other TCP streams
|
||||
* implementing the same algorithm.
|
||||
*
|
||||
* The second method is to limit the window to the bandwidth delay product
|
||||
* of the link. This is the method we implement. RTT variances and our
|
||||
* own manipulation of the congestion window, bwnd, can potentially
|
||||
* destabilize the algorithm. For this reason we have to stabilize the
|
||||
* elements used to calculate the window. We do this by using the minimum
|
||||
* observed RTT, the long term average of the observed bandwidth, and
|
||||
* by adding two segments worth of slop. It isn't perfect but it is able
|
||||
* to react to changing conditions and gives us a very stable basis on
|
||||
* which to extend the algorithm.
|
||||
*/
|
||||
void
|
||||
tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq)
|
||||
{
|
||||
u_long bw;
|
||||
u_long bwnd;
|
||||
int save_ticks;
|
||||
|
||||
/*
|
||||
* If inflight_enable is disabled in the middle of a tcp connection,
|
||||
* make sure snd_bwnd is effectively disabled.
|
||||
*/
|
||||
if (tcp_inflight_enable == 0) {
|
||||
tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
|
||||
tp->snd_bandwidth = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Figure out the bandwidth. Due to the tick granularity this
|
||||
* is a very rough number and it MUST be averaged over a fairly
|
||||
* long period of time. XXX we need to take into account a link
|
||||
* that is not using all available bandwidth, but for now our
|
||||
* slop will ramp us up if this case occurs and the bandwidth later
|
||||
* increases.
|
||||
*/
|
||||
save_ticks = ticks;
|
||||
if ((u_int)(save_ticks - tp->t_bw_rtttime) < 1)
|
||||
return;
|
||||
|
||||
bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz /
|
||||
(save_ticks - tp->t_bw_rtttime);
|
||||
tp->t_bw_rtttime = save_ticks;
|
||||
tp->t_bw_rtseq = ack_seq;
|
||||
if (tp->t_bw_rtttime == 0)
|
||||
return;
|
||||
bw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4;
|
||||
|
||||
tp->snd_bandwidth = bw;
|
||||
|
||||
/*
|
||||
* Calculate the semi-static bandwidth delay product, plus two maximal
|
||||
* segments. The additional slop puts us squarely in the sweet
|
||||
* spot and also handles the bandwidth run-up case. Without the
|
||||
* slop we could be locking ourselves into a lower bandwidth.
|
||||
*
|
||||
* Situations Handled:
|
||||
* (1) Prevents over-queueing of packets on LANs, especially on
|
||||
* high speed LANs, allowing larger TCP buffers to be
|
||||
* specified, and also does a good job preventing
|
||||
* over-queueing of packets over choke points like modems
|
||||
* (at least for the transmit side).
|
||||
*
|
||||
* (2) Is able to handle changing network loads (bandwidth
|
||||
* drops so bwnd drops, bandwidth increases so bwnd
|
||||
* increases).
|
||||
*
|
||||
* (3) Theoretically should stabilize in the face of multiple
|
||||
* connections implementing the same algorithm (this may need
|
||||
* a little work).
|
||||
*/
|
||||
#define USERTT ((tp->t_srtt + tp->t_rttbest) / 2)
|
||||
bwnd = (int64_t)bw * USERTT / (hz << TCP_RTT_SHIFT) + 2 * tp->t_maxseg;
|
||||
|
||||
if (tcp_inflight_debug > 0) {
|
||||
static int ltime;
|
||||
if ((u_int)(ticks - ltime) >= hz / tcp_inflight_debug) {
|
||||
ltime = ticks;
|
||||
printf("%p bw %ld rttbest %d srtt %d bwnd %ld\n",
|
||||
tp,
|
||||
bw,
|
||||
tp->t_rttbest,
|
||||
tp->t_srtt,
|
||||
bwnd
|
||||
);
|
||||
}
|
||||
}
|
||||
if ((long)bwnd < tcp_inflight_min)
|
||||
bwnd = tcp_inflight_min;
|
||||
if (bwnd > tcp_inflight_max)
|
||||
bwnd = tcp_inflight_max;
|
||||
if ((long)bwnd < tp->t_maxseg * 2)
|
||||
bwnd = tp->t_maxseg * 2;
|
||||
tp->snd_bwnd = bwnd;
|
||||
}
|
||||
|
||||
|
@ -875,6 +875,7 @@ tcp_connect(tp, nam, td)
|
||||
tp->t_state = TCPS_SYN_SENT;
|
||||
callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
|
||||
tp->iss = tcp_new_isn(tp);
|
||||
tp->t_bw_rtseq = tp->iss;
|
||||
tcp_sendseqinit(tp);
|
||||
|
||||
/*
|
||||
@ -961,6 +962,7 @@ tcp6_connect(tp, nam, td)
|
||||
tp->t_state = TCPS_SYN_SENT;
|
||||
callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
|
||||
tp->iss = tcp_new_isn(tp);
|
||||
tp->t_bw_rtseq = tp->iss;
|
||||
tcp_sendseqinit(tp);
|
||||
|
||||
/*
|
||||
|
@ -124,10 +124,12 @@ struct tcpcb {
|
||||
|
||||
u_long snd_wnd; /* send window */
|
||||
u_long snd_cwnd; /* congestion-controlled window */
|
||||
u_long snd_bwnd; /* bandwidth-controlled window */
|
||||
u_long snd_ssthresh; /* snd_cwnd size threshold for
|
||||
* for slow start exponential to
|
||||
* linear switch
|
||||
*/
|
||||
u_long snd_bandwidth; /* calculated bandwidth or 0 */
|
||||
tcp_seq snd_recover; /* for use in fast recovery */
|
||||
|
||||
u_int t_maxopd; /* mss plus options */
|
||||
@ -137,6 +139,9 @@ struct tcpcb {
|
||||
int t_rtttime; /* round trip time */
|
||||
tcp_seq t_rtseq; /* sequence number being timed */
|
||||
|
||||
int t_bw_rtttime; /* used for bandwidth calculation */
|
||||
tcp_seq t_bw_rtseq; /* used for bandwidth calculation */
|
||||
|
||||
int t_rxtcur; /* current retransmit value (ticks) */
|
||||
u_int t_maxseg; /* maximum segment size */
|
||||
int t_srtt; /* smoothed round-trip time */
|
||||
@ -144,6 +149,7 @@ struct tcpcb {
|
||||
|
||||
int t_rxtshift; /* log(2) of rexmt exp. backoff */
|
||||
u_int t_rttmin; /* minimum rtt allowed */
|
||||
u_int t_rttbest; /* best rtt we've seen */
|
||||
u_long t_rttupdated; /* number of times rtt sampled */
|
||||
u_long max_sndwnd; /* largest window peer has offered */
|
||||
|
||||
@ -473,6 +479,7 @@ void tcp_fillheaders(struct tcpcb *, void *, void *);
|
||||
struct tcpcb *
|
||||
tcp_timers(struct tcpcb *, int);
|
||||
void tcp_trace(int, int, struct tcpcb *, void *, struct tcphdr *, int);
|
||||
void tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq);
|
||||
void syncache_init(void);
|
||||
void syncache_unreach(struct in_conninfo *, struct tcphdr *);
|
||||
int syncache_expand(struct in_conninfo *, struct tcphdr *,
|
||||
|
Loading…
Reference in New Issue
Block a user