1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-10-19 02:29:40 +00:00

This change represents a substantial restructure of the way we

reassembly inbound tcp segments. The old algorithm just blindly
dropped in segments without coalescing. This meant that every
segment could take up greater and greater room on the linked list
of segments. This of course is now subject to a tighter limit (100)
of segments which in a high BDP situation will cause us to be a
lot more in-efficent as we drop segments beyond 100 entries that
we receive. What this restructure does is cause the reassembly
buffer to coalesce segments putting an emphasis on the two
common cases (which avoid walking the list of segments) i.e.
where we add to the back of the queue of segments and where we
add to the front. We also have the reassembly buffer supporting
a couple of debug options (black box logging as well as counters
for code coverage). These are compiled out by default but can
be added by uncommenting the defines.

Sponsored by:	Netflix Inc.
Differential Revision:	https://reviews.freebsd.org/D16626
This commit is contained in:
Randall Stewart 2018-08-20 12:43:18 +00:00
parent a800b45c18
commit c28440db29
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=338102
7 changed files with 884 additions and 143 deletions

View File

@ -1734,7 +1734,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->snd_nxt == tp->snd_max &&
tiwin && tiwin == tp->snd_wnd &&
((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
LIST_EMPTY(&tp->t_segq) &&
SEGQ_EMPTY(tp) &&
((to.to_flags & TOF_TS) == 0 ||
TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
@ -2440,7 +2440,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* later; if not, do so now to pass queued data to user.
*/
if (tlen == 0 && (thflags & TH_FIN) == 0)
(void) tcp_reass(tp, (struct tcphdr *)0, 0,
(void) tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
(struct mbuf *)0);
tp->snd_wl1 = th->th_seq - 1;
/* FALLTHROUGH */
@ -3017,7 +3017,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* fast retransmit can work).
*/
if (th->th_seq == tp->rcv_nxt &&
LIST_EMPTY(&tp->t_segq) &&
SEGQ_EMPTY(tp) &&
(TCPS_HAVEESTABLISHED(tp->t_state) ||
tfo_syn)) {
if (DELAY_ACK(tp, tlen) || tfo_syn)
@ -3042,7 +3042,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* m_adj() doesn't actually frees any mbufs
* when trimming from the head.
*/
thflags = tcp_reass(tp, th, &tlen, m);
thflags = tcp_reass(tp, th, &save_start, &tlen, m);
tp->t_flags |= TF_ACKNOW;
}
if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))

View File

@ -217,7 +217,9 @@ enum tcp_log_events {
BBR_LOG_REDUCE, /* old bbr log reduce for 4.1 and earlier 46*/
TCP_LOG_RTT, /* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */
BBR_LOG_SETTINGS_CHG, /* Settings changed for loss response 48 */
TCP_LOG_END /* End (keep at end) 49 */
BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining 49 */
TCP_LOG_REASS, /* Reassembly buffer logging 50 */
TCP_LOG_END /* End (keep at end) 51 */
};
enum tcp_log_states {

File diff suppressed because it is too large Load Diff

View File

@ -4780,7 +4780,7 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
* segments are out of order (so fast retransmit can work).
*/
if (th->th_seq == tp->rcv_nxt &&
LIST_EMPTY(&tp->t_segq) &&
SEGQ_EMPTY(tp) &&
(TCPS_HAVEESTABLISHED(tp->t_state) ||
tfo_syn)) {
if (DELAY_ACK(tp, tlen) || tfo_syn) {
@ -4808,7 +4808,7 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
* m_adj() doesn't actually frees any mbufs when
* trimming from the head.
*/
thflags = tcp_reass(tp, th, &tlen, m);
thflags = tcp_reass(tp, th, &save_start, &tlen, m);
tp->t_flags |= TF_ACKNOW;
}
if (tlen > 0)
@ -5509,7 +5509,7 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
* not, do so now to pass queued data to user.
*/
if (tlen == 0 && (thflags & TH_FIN) == 0)
(void)tcp_reass(tp, (struct tcphdr *)0, 0,
(void) tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
(struct mbuf *)0);
tp->snd_wl1 = th->th_seq - 1;
if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
@ -5574,7 +5574,7 @@ rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if (__predict_true(((to->to_flags & TOF_SACK) == 0)) &&
__predict_true((thflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK) &&
__predict_true(LIST_EMPTY(&tp->t_segq)) &&
__predict_true(SEGQ_EMPTY(tp)) &&
__predict_true(th->th_seq == tp->rcv_nxt)) {
struct tcp_rack *rack;

View File

@ -1626,7 +1626,7 @@ tcp_newtcpcb(struct inpcb *inp)
tp->t_vnet = inp->inp_vnet;
#endif
tp->t_timers = &tm->tt;
/* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */
TAILQ_INIT(&tp->t_segq);
tp->t_maxseg =
#ifdef INET6
isipv6 ? V_tcp_v6mssdflt :

View File

@ -2525,7 +2525,7 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
db_print_indent(indent);
db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n",
LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
db_print_indent(indent);
db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n",

View File

@ -46,12 +46,15 @@
#if defined(_KERNEL) || defined(_WANT_TCPCB)
/* TCP segment queue entry */
struct tseg_qent {
LIST_ENTRY(tseg_qent) tqe_q;
TAILQ_ENTRY(tseg_qent) tqe_q;
struct mbuf *tqe_m; /* mbuf contains packet */
struct mbuf *tqe_last; /* last mbuf in chain */
tcp_seq tqe_start; /* TCP Sequence number start */
int tqe_len; /* TCP segment data length */
struct tcphdr *tqe_th; /* a pointer to tcp header */
struct mbuf *tqe_m; /* mbuf contains packet */
uint32_t tqe_flags; /* The flags from the th->th_flags */
uint32_t tqe_mbuf_cnt; /* Count of mbuf overhead */
};
LIST_HEAD(tsegqe_head, tseg_qent);
TAILQ_HEAD(tsegqe_head, tseg_qent);
struct sackblk {
tcp_seq start; /* start seq no. of sack block */
@ -79,6 +82,8 @@ struct sackhint {
uint64_t _pad[1]; /* TBD */
};
#define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)
STAILQ_HEAD(tcp_log_stailq, tcp_log_mem);
/*
@ -131,6 +136,7 @@ struct tcpcb {
/* Cache line 3 */
tcp_seq rcv_up; /* receive urgent pointer */
int t_segqlen; /* segment reassembly queue length */
uint32_t t_segqmbuflen; /* Count of bytes mbufs on all entries */
struct tsegqe_head t_segq; /* segment reassembly queue */
struct mbuf *t_in_pkt;
struct mbuf *t_tail_pkt;
@ -837,7 +843,7 @@ char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
const void *);
char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
const void *);
int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
int tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *, struct mbuf *);
void tcp_reass_global_init(void);
void tcp_reass_flush(struct tcpcb *);
void tcp_dooptions(struct tcpopt *, u_char *, int, int);