mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-15 15:06:42 +00:00
This change represents a substantial restructure of the way we
reassembly inbound tcp segments. The old algorithm just blindly dropped in segments without coalescing. This meant that every segment could take up greater and greater room on the linked list of segments. This of course is now subject to a tighter limit (100) of segments which in a high BDP situation will cause us to be a lot more in-efficent as we drop segments beyond 100 entries that we receive. What this restructure does is cause the reassembly buffer to coalesce segments putting an emphasis on the two common cases (which avoid walking the list of segments) i.e. where we add to the back of the queue of segments and where we add to the front. We also have the reassembly buffer supporting a couple of debug options (black box logging as well as counters for code coverage). These are compiled out by default but can be added by uncommenting the defines. Sponsored by: Netflix Inc. Differential Revision: https://reviews.freebsd.org/D16626
This commit is contained in:
parent
a800b45c18
commit
c28440db29
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=338102
@ -1734,7 +1734,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
tp->snd_nxt == tp->snd_max &&
|
||||
tiwin && tiwin == tp->snd_wnd &&
|
||||
((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
|
||||
LIST_EMPTY(&tp->t_segq) &&
|
||||
SEGQ_EMPTY(tp) &&
|
||||
((to.to_flags & TOF_TS) == 0 ||
|
||||
TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
|
||||
|
||||
@ -2440,7 +2440,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* later; if not, do so now to pass queued data to user.
|
||||
*/
|
||||
if (tlen == 0 && (thflags & TH_FIN) == 0)
|
||||
(void) tcp_reass(tp, (struct tcphdr *)0, 0,
|
||||
(void) tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
|
||||
(struct mbuf *)0);
|
||||
tp->snd_wl1 = th->th_seq - 1;
|
||||
/* FALLTHROUGH */
|
||||
@ -3017,7 +3017,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* fast retransmit can work).
|
||||
*/
|
||||
if (th->th_seq == tp->rcv_nxt &&
|
||||
LIST_EMPTY(&tp->t_segq) &&
|
||||
SEGQ_EMPTY(tp) &&
|
||||
(TCPS_HAVEESTABLISHED(tp->t_state) ||
|
||||
tfo_syn)) {
|
||||
if (DELAY_ACK(tp, tlen) || tfo_syn)
|
||||
@ -3042,7 +3042,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* m_adj() doesn't actually frees any mbufs
|
||||
* when trimming from the head.
|
||||
*/
|
||||
thflags = tcp_reass(tp, th, &tlen, m);
|
||||
thflags = tcp_reass(tp, th, &save_start, &tlen, m);
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
}
|
||||
if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
|
||||
|
@ -217,7 +217,9 @@ enum tcp_log_events {
|
||||
BBR_LOG_REDUCE, /* old bbr log reduce for 4.1 and earlier 46*/
|
||||
TCP_LOG_RTT, /* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */
|
||||
BBR_LOG_SETTINGS_CHG, /* Settings changed for loss response 48 */
|
||||
TCP_LOG_END /* End (keep at end) 49 */
|
||||
BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining 49 */
|
||||
TCP_LOG_REASS, /* Reassembly buffer logging 50 */
|
||||
TCP_LOG_END /* End (keep at end) 51 */
|
||||
};
|
||||
|
||||
enum tcp_log_states {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -4780,7 +4780,7 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* segments are out of order (so fast retransmit can work).
|
||||
*/
|
||||
if (th->th_seq == tp->rcv_nxt &&
|
||||
LIST_EMPTY(&tp->t_segq) &&
|
||||
SEGQ_EMPTY(tp) &&
|
||||
(TCPS_HAVEESTABLISHED(tp->t_state) ||
|
||||
tfo_syn)) {
|
||||
if (DELAY_ACK(tp, tlen) || tfo_syn) {
|
||||
@ -4808,7 +4808,7 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* m_adj() doesn't actually frees any mbufs when
|
||||
* trimming from the head.
|
||||
*/
|
||||
thflags = tcp_reass(tp, th, &tlen, m);
|
||||
thflags = tcp_reass(tp, th, &save_start, &tlen, m);
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
}
|
||||
if (tlen > 0)
|
||||
@ -5509,7 +5509,7 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* not, do so now to pass queued data to user.
|
||||
*/
|
||||
if (tlen == 0 && (thflags & TH_FIN) == 0)
|
||||
(void)tcp_reass(tp, (struct tcphdr *)0, 0,
|
||||
(void) tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
|
||||
(struct mbuf *)0);
|
||||
tp->snd_wl1 = th->th_seq - 1;
|
||||
if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
|
||||
@ -5574,7 +5574,7 @@ rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
*/
|
||||
if (__predict_true(((to->to_flags & TOF_SACK) == 0)) &&
|
||||
__predict_true((thflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK) &&
|
||||
__predict_true(LIST_EMPTY(&tp->t_segq)) &&
|
||||
__predict_true(SEGQ_EMPTY(tp)) &&
|
||||
__predict_true(th->th_seq == tp->rcv_nxt)) {
|
||||
struct tcp_rack *rack;
|
||||
|
||||
|
@ -1626,7 +1626,7 @@ tcp_newtcpcb(struct inpcb *inp)
|
||||
tp->t_vnet = inp->inp_vnet;
|
||||
#endif
|
||||
tp->t_timers = &tm->tt;
|
||||
/* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */
|
||||
TAILQ_INIT(&tp->t_segq);
|
||||
tp->t_maxseg =
|
||||
#ifdef INET6
|
||||
isipv6 ? V_tcp_v6mssdflt :
|
||||
|
@ -2525,7 +2525,7 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
|
||||
|
||||
db_print_indent(indent);
|
||||
db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n",
|
||||
LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
|
||||
TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
|
||||
|
||||
db_print_indent(indent);
|
||||
db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n",
|
||||
|
@ -46,12 +46,15 @@
|
||||
#if defined(_KERNEL) || defined(_WANT_TCPCB)
|
||||
/* TCP segment queue entry */
|
||||
struct tseg_qent {
|
||||
LIST_ENTRY(tseg_qent) tqe_q;
|
||||
TAILQ_ENTRY(tseg_qent) tqe_q;
|
||||
struct mbuf *tqe_m; /* mbuf contains packet */
|
||||
struct mbuf *tqe_last; /* last mbuf in chain */
|
||||
tcp_seq tqe_start; /* TCP Sequence number start */
|
||||
int tqe_len; /* TCP segment data length */
|
||||
struct tcphdr *tqe_th; /* a pointer to tcp header */
|
||||
struct mbuf *tqe_m; /* mbuf contains packet */
|
||||
uint32_t tqe_flags; /* The flags from the th->th_flags */
|
||||
uint32_t tqe_mbuf_cnt; /* Count of mbuf overhead */
|
||||
};
|
||||
LIST_HEAD(tsegqe_head, tseg_qent);
|
||||
TAILQ_HEAD(tsegqe_head, tseg_qent);
|
||||
|
||||
struct sackblk {
|
||||
tcp_seq start; /* start seq no. of sack block */
|
||||
@ -79,6 +82,8 @@ struct sackhint {
|
||||
uint64_t _pad[1]; /* TBD */
|
||||
};
|
||||
|
||||
#define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)
|
||||
|
||||
STAILQ_HEAD(tcp_log_stailq, tcp_log_mem);
|
||||
|
||||
/*
|
||||
@ -131,6 +136,7 @@ struct tcpcb {
|
||||
/* Cache line 3 */
|
||||
tcp_seq rcv_up; /* receive urgent pointer */
|
||||
int t_segqlen; /* segment reassembly queue length */
|
||||
uint32_t t_segqmbuflen; /* Count of bytes mbufs on all entries */
|
||||
struct tsegqe_head t_segq; /* segment reassembly queue */
|
||||
struct mbuf *t_in_pkt;
|
||||
struct mbuf *t_tail_pkt;
|
||||
@ -837,7 +843,7 @@ char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
|
||||
const void *);
|
||||
char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
|
||||
const void *);
|
||||
int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
|
||||
int tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *, struct mbuf *);
|
||||
void tcp_reass_global_init(void);
|
||||
void tcp_reass_flush(struct tcpcb *);
|
||||
void tcp_dooptions(struct tcpopt *, u_char *, int, int);
|
||||
|
Loading…
Reference in New Issue
Block a user