tcp: include RFC6675 IsLost() in pipe calculation

Add more accounting while processing SACK data, to
keep track of when a packet is deemed lost using
the RFC6675 guidance.

Together with PRR (RFC6972) this allows a sender to
retransmit presumed lost packets faster, and loss
recovery to complete earlier.

Reviewed By: cc, rrs, #transport
Sponsored by: NetApp, Inc.
Differential Revision: https://reviews.freebsd.org/D39299
This commit is contained in:
Richard Scheffenegger 2023-10-09 11:55:14 +02:00
parent 35b3be81f1
commit e2c6a6d29b
3 changed files with 74 additions and 8 deletions

View File

@ -4048,7 +4048,8 @@ tcp_compute_pipe(struct tcpcb *tp)
if (tp->t_fb->tfb_compute_pipe == NULL) {
return (tp->snd_max - tp->snd_una +
tp->sackhint.sack_bytes_rexmit -
tp->sackhint.sacked_bytes);
tp->sackhint.sacked_bytes -
tp->sackhint.lost_bytes);
} else {
return((*tp->t_fb->tfb_compute_pipe)(tp));
}

View File

@ -555,6 +555,11 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
int i, j, num_sack_blks, sack_changed;
int delivered_data, left_edge_delta;
tcp_seq loss_hiack = 0;
int loss_thresh = 0;
int loss_sblks = 0;
int notlost_bytes = 0;
INP_WLOCK_ASSERT(tptoinpcb(tp));
num_sack_blks = 0;
@ -637,6 +642,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
*/
tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack);
tp->sackhint.sacked_bytes = 0; /* reset */
tp->sackhint.hole_bytes = 0;
}
/*
* In the while-loop below, incoming SACK blocks (sack_blocks[]) and
@ -661,13 +667,17 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
*/
if (((temp = TAILQ_LAST(&tp->snd_holes, sackhole_head)) != NULL) &&
SEQ_LEQ(tp->snd_fack,temp->end)) {
tp->sackhint.hole_bytes -= temp->end - temp->start;
temp->start = SEQ_MAX(tp->snd_fack, SEQ_MAX(tp->snd_una, th_ack));
temp->end = sblkp->start;
temp->rxmit = temp->start;
delivered_data += sblkp->end - sblkp->start;
tp->sackhint.hole_bytes += temp->end - temp->start;
KASSERT(tp->sackhint.hole_bytes >= 0,
("sackhint hole bytes >= 0"));
tp->snd_fack = sblkp->end;
sblkp--;
sack_changed = 1;
sblkp--;
} else {
/*
* Append a new SACK hole at the tail. If the
@ -678,10 +688,11 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL);
if (temp != NULL) {
delivered_data += sblkp->end - sblkp->start;
tp->sackhint.hole_bytes += temp->end - temp->start;
tp->snd_fack = sblkp->end;
sack_changed = 1;
/* Go to the previous sack block. */
sblkp--;
sack_changed = 1;
} else {
/*
* We failed to add a new hole based on the current
@ -709,11 +720,29 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
sack_changed = 1;
}
cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */
loss_hiack = tp->snd_fack;
/*
* Since the incoming sack blocks are sorted, we can process them
* making one sweep of the scoreboard.
*/
while (sblkp >= sack_blocks && cur != NULL) {
while (cur != NULL) {
if (!(sblkp >= sack_blocks)) {
if (((loss_sblks >= tcprexmtthresh) ||
(loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg)))
break;
loss_thresh += loss_hiack - cur->end;
loss_hiack = cur->start;
loss_sblks++;
if (!((loss_sblks >= tcprexmtthresh) ||
(loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg))) {
notlost_bytes += cur->end - cur->start;
} else {
break;
}
cur = TAILQ_PREV(cur, sackhole_head, scblink);
continue;
}
if (SEQ_GEQ(sblkp->start, cur->end)) {
/*
* SACKs data beyond the current hole. Go to the
@ -727,6 +756,12 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
* SACKs data before the current hole. Go to the
* previous hole.
*/
loss_thresh += loss_hiack - cur->end;
loss_hiack = cur->start;
loss_sblks++;
if (!((loss_sblks >= tcprexmtthresh) ||
(loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg)))
notlost_bytes += cur->end - cur->start;
cur = TAILQ_PREV(cur, sackhole_head, scblink);
continue;
}
@ -742,6 +777,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
delivered_data += (cur->end - cur->start);
temp = cur;
cur = TAILQ_PREV(cur, sackhole_head, scblink);
tp->sackhint.hole_bytes -= temp->end - temp->start;
tcp_sackhole_remove(tp, temp);
/*
* The sack block may ack all or part of the
@ -752,6 +788,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
} else {
/* Move start of hole forward. */
delivered_data += (sblkp->end - cur->start);
tp->sackhint.hole_bytes -= sblkp->end - cur->start;
cur->start = sblkp->end;
cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
}
@ -760,6 +797,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
if (SEQ_GEQ(sblkp->end, cur->end)) {
/* Move end of hole backward. */
delivered_data += (cur->end - sblkp->start);
tp->sackhint.hole_bytes -= cur->end - sblkp->start;
cur->end = sblkp->start;
cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
if ((tp->t_flags & TF_LRD) && SEQ_GEQ(cur->rxmit, cur->end))
@ -778,6 +816,13 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
(SEQ_MIN(temp->rxmit,
temp->end) - temp->start);
}
tp->sackhint.hole_bytes -= sblkp->end - sblkp->start;
loss_thresh += loss_hiack - temp->end;
loss_hiack = temp->start;
loss_sblks++;
if (!((loss_sblks >= tcprexmtthresh) ||
(loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg)))
notlost_bytes += temp->end - temp->start;
cur->end = sblkp->start;
cur->rxmit = SEQ_MIN(cur->rxmit,
cur->end);
@ -794,11 +839,25 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
* we're done with the sack block or the sack hole.
* Accordingly, we advance one or the other.
*/
if (SEQ_LEQ(sblkp->start, cur->start))
if (SEQ_LEQ(sblkp->start, cur->start)) {
loss_thresh += loss_hiack - cur->end;
loss_hiack = cur->start;
loss_sblks++;
if (!((loss_sblks >= tcprexmtthresh) ||
(loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg)))
notlost_bytes += cur->end - cur->start;
cur = TAILQ_PREV(cur, sackhole_head, scblink);
else
} else {
sblkp--;
}
}
KASSERT(!(TAILQ_EMPTY(&tp->snd_holes) && (tp->sackhint.hole_bytes != 0)),
("SACK scoreboard empty, but accounting non-zero\n"));
KASSERT(notlost_bytes <= tp->sackhint.hole_bytes,
("SACK: more bytes marked notlost than in scoreboard holes"));
if (!(to->to_flags & TOF_SACK))
/*
* If this ACK did not contain any
@ -811,6 +870,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
sack_changed = 0;
tp->sackhint.delivered_data = delivered_data;
tp->sackhint.sacked_bytes += delivered_data - left_edge_delta;
tp->sackhint.lost_bytes = tp->sackhint.hole_bytes - notlost_bytes;
KASSERT((delivered_data >= 0), ("delivered_data < 0"));
KASSERT((tp->sackhint.sacked_bytes >= 0), ("sacked_bytes < 0"));
return (sack_changed);
@ -845,6 +905,7 @@ tcp_free_sackholes(struct tcpcb *tp)
void
tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
{
struct sackhole *temp;
int num_segs = 1;
u_int maxseg = tcp_maxseg(tp);
@ -891,8 +952,10 @@ tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
highdata = SEQ_MIN(highdata, tp->snd_recover);
if (th->th_ack != highdata) {
tp->snd_fack = th->th_ack;
(void)tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack,
highdata - maxseg), highdata, NULL);
if ((temp = tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack,
highdata - maxseg), highdata, NULL)) != NULL)
tp->sackhint.hole_bytes += temp->end -
temp->start;
}
}
(void) tcp_output(tp);

View File

@ -128,6 +128,8 @@ struct sackhint {
uint32_t recover_fs; /* Flight Size at the start of Loss recovery */
uint32_t prr_delivered; /* Total bytes delivered using PRR */
uint32_t prr_out; /* Bytes sent during IN_RECOVERY */
uint32_t hole_bytes; /* current number of bytes in scoreboard holes */
uint32_t lost_bytes; /* number of rfc6675 IsLost() bytes */
};
#define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)