1
0
mirror of https://git.FreeBSD.org/src.git synced 2025-01-05 12:56:08 +00:00

tcp: add support for TCP over UDP

Adding support for TCP over UDP allows communication with
TCP stacks which can be implemented in userspace without
requiring special priviledges or specific support by the OS.
This is joint work with rrs.

Reviewed by:		rrs
Sponsored by:		Netflix, Inc.
MFC after:		1 week
Differential Revision:	https://reviews.freebsd.org/D29469
This commit is contained in:
Michael Tuexen 2021-04-18 16:08:08 +02:00
parent 136f6b6c0c
commit 9e644c2300
18 changed files with 821 additions and 158 deletions

View File

@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
.Dd April 17, 2021
.Dd April 18, 2021
.Dt TCP 4
.Os
.Sh NAME
@ -329,6 +329,9 @@ currently executing.
This is typically used after a process or thread inherits a listen
socket from its parent, and sets its CPU affinity to a particular core.
.El
.It Dv TCP_REMOTE_UDP_ENCAPS_PORT
Set and get the remote UDP encapsulation port.
It can only be set on a closed TCP socket.
.El
.Pp
The option level for the
@ -755,6 +758,16 @@ A CSV list of template_spec=percent key-value pairs which controls the per
template sampling rates when
.Xr stats 3
sampling is enabled.
.It Va udp_tunneling_port
The local UDP encapsulation port.
A value of 0 indicates that UDP encapsulation is disabled.
The default is 0.
.It Va udp_tunneling_overhead
The overhead taken into account when using UDP encapsulation.
Since MSS clamping by middleboxes will most likely not work, values larger than
8 (the size of the UDP header) are also supported.
Supported values are between 8 and 1024.
The default is 8.
.El
.Sh ERRORS
A socket operation may fail with one of the following errors returned:

View File

@ -183,6 +183,7 @@ struct tcphdr {
#define TCP_RXTLS_MODE 42 /* Receive TLS mode */
#define TCP_CONGESTION 64 /* get/set congestion control algorithm */
#define TCP_CCALGOOPT 65 /* get/set cc algorithm specific options */
#define TCP_REMOTE_UDP_ENCAPS_PORT 71 /* Enable TCP over UDP tunneling via the specified port */
#define TCP_DELACK 72 /* socket option for delayed ack */
#define TCP_FIN_IS_RST 73 /* A fin from the peer is treated has a RST */
#define TCP_LOG_LIMIT 74 /* Limit to number of records in tcp-log */

View File

@ -123,6 +123,7 @@ __FBSDID("$FreeBSD$");
#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
#endif
#include <netinet/udp.h>
#include <netipsec/ipsec_support.h>
@ -567,7 +568,7 @@ cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
*/
#ifdef INET6
int
tcp6_input(struct mbuf **mp, int *offp, int proto)
tcp6_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
{
struct mbuf *m;
struct in6_ifaddr *ia6;
@ -597,12 +598,19 @@ tcp6_input(struct mbuf **mp, int *offp, int proto)
}
*mp = m;
return (tcp_input(mp, offp, proto));
return (tcp_input_with_port(mp, offp, proto, port));
}
int
tcp6_input(struct mbuf **mp, int *offp, int proto)
{
return(tcp6_input_with_port(mp, offp, proto, 0));
}
#endif /* INET6 */
int
tcp_input(struct mbuf **mp, int *offp, int proto)
tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
{
struct mbuf *m = *mp;
struct tcphdr *th = NULL;
@ -659,6 +667,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
ip6 = mtod(m, struct ip6_hdr *);
th = (struct tcphdr *)((caddr_t)ip6 + off0);
tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
if (port)
goto skip6_csum;
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
th->th_sum = m->m_pkthdr.csum_data;
@ -672,7 +682,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
TCPSTAT_INC(tcps_rcvbadsum);
goto drop;
}
skip6_csum:
/*
* Be proactive about unspecified IPv6 address in source.
* As we use all-zero to indicate unbounded/unconnected pcb,
@ -713,6 +723,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
tlen = ntohs(ip->ip_len) - off0;
iptos = ip->ip_tos;
if (port)
goto skip_csum;
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
th->th_sum = m->m_pkthdr.csum_data;
@ -742,8 +754,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
ip->ip_v = IPVERSION;
ip->ip_hl = off0 >> 2;
}
if (th->th_sum) {
skip_csum:
if (th->th_sum && (port == 0)) {
TCPSTAT_INC(tcps_rcvbadsum);
goto drop;
}
@ -1004,6 +1016,11 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
goto dropwithreset;
}
if ((tp->t_port != port) && (tp->t_state > TCPS_LISTEN)) {
rstreason = BANDLIM_RST_CLOSEDPORT;
goto dropwithreset;
}
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE) {
tcp_offload_input(tp, m);
@ -1074,7 +1091,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
* NB: syncache_expand() doesn't unlock
* inp and tcpinfo locks.
*/
rstreason = syncache_expand(&inc, &to, th, &so, m);
rstreason = syncache_expand(&inc, &to, th, &so, m, port);
if (rstreason < 0) {
/*
* A failing TCP MD5 signature comparison
@ -1156,7 +1173,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
* causes.
*/
if (thflags & TH_RST) {
syncache_chkrst(&inc, th, m);
syncache_chkrst(&inc, th, m, port);
goto dropunlock;
}
/*
@ -1178,7 +1195,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
log(LOG_DEBUG, "%s; %s: Listen socket: "
"SYN|ACK invalid, segment rejected\n",
s, __func__);
syncache_badack(&inc); /* XXX: Not needed! */
syncache_badack(&inc, port); /* XXX: Not needed! */
TCPSTAT_INC(tcps_badsyn);
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
@ -1337,7 +1354,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
TCP_PROBE3(debug__input, tp, th, m);
tcp_dooptions(&to, optp, optlen, TO_SYN);
if ((so = syncache_add(&inc, &to, th, inp, so, m, NULL, NULL,
iptos)) != NULL)
iptos, port)) != NULL)
goto tfo_socket_result;
/*
@ -1468,6 +1485,12 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
return (newsize);
}
int
tcp_input(struct mbuf **mp, int *offp, int proto)
{
return(tcp_input_with_port(mp, offp, proto, 0));
}
void
tcp_handle_wakeup(struct tcpcb *tp, struct socket *so)
{
@ -3672,11 +3695,13 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
sizeof (struct tcpiphdr);
#else
const size_t min_protoh = sizeof(struct tcpiphdr);
size_t min_protoh = sizeof(struct tcpiphdr);
#endif
INP_WLOCK_ASSERT(tp->t_inpcb);
if (tp->t_port)
min_protoh += V_tcp_udp_tunneling_overhead;
if (mtuoffer != -1) {
KASSERT(offer == -1, ("%s: conflict", __func__));
offer = mtuoffer - min_protoh;

View File

@ -101,6 +101,8 @@ __FBSDID("$FreeBSD$");
#include <netipsec/ipsec_support.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <machine/in_cksum.h>
#include <security/mac/mac_framework.h>
@ -207,7 +209,7 @@ tcp_output(struct tcpcb *tp)
#endif
struct tcphdr *th;
u_char opt[TCP_MAXOLEN];
unsigned ipoptlen, optlen, hdrlen;
unsigned ipoptlen, optlen, hdrlen, ulen;
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
unsigned ipsec_optlen = 0;
#endif
@ -216,6 +218,7 @@ tcp_output(struct tcpcb *tp)
struct sackhole *p;
int tso, mtu;
struct tcpopt to;
struct udphdr *udp = NULL;
unsigned int wanted_cookie = 0;
unsigned int dont_sendalot = 0;
#if 0
@ -558,6 +561,7 @@ tcp_output(struct tcpcb *tp)
#endif
if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
(tp->t_port == 0) &&
((tp->t_flags & TF_SIGNATURE) == 0) &&
tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
ipoptlen == 0 && !(flags & TH_SYN))
@ -800,6 +804,8 @@ tcp_output(struct tcpcb *tp)
/* Maximum segment size. */
if (flags & TH_SYN) {
to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
if (tp->t_port)
to.to_mss -= V_tcp_udp_tunneling_overhead;
to.to_flags |= TOF_MSS;
/*
@ -887,7 +893,14 @@ tcp_output(struct tcpcb *tp)
!(to.to_flags & TOF_FASTOPEN))
len = 0;
}
if (tp->t_port) {
if (V_tcp_udp_tunneling_port == 0) {
/* The port was removed?? */
SOCKBUF_UNLOCK(&so->so_snd);
return (EHOSTUNREACH);
}
hdrlen += sizeof(struct udphdr);
}
/*
* Adjust data length if insertion of options will
* bump the packet length beyond the t_maxseg length.
@ -1140,8 +1153,17 @@ tcp_output(struct tcpcb *tp)
#ifdef INET6
if (isipv6) {
ip6 = mtod(m, struct ip6_hdr *);
th = (struct tcphdr *)(ip6 + 1);
tcpip_fillheaders(tp->t_inpcb, ip6, th);
if (tp->t_port) {
udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + sizeof(struct ip6_hdr));
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
udp->uh_dport = tp->t_port;
ulen = hdrlen + len - sizeof(struct ip6_hdr);
udp->uh_ulen = htons(ulen);
th = (struct tcphdr *)(udp + 1);
} else {
th = (struct tcphdr *)(ip6 + 1);
}
tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip6, th);
} else
#endif /* INET6 */
{
@ -1149,8 +1171,16 @@ tcp_output(struct tcpcb *tp)
#ifdef TCPDEBUG
ipov = (struct ipovly *)ip;
#endif
th = (struct tcphdr *)(ip + 1);
tcpip_fillheaders(tp->t_inpcb, ip, th);
if (tp->t_port) {
udp = (struct udphdr *)((caddr_t)ip + ipoptlen + sizeof(struct ip));
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
udp->uh_dport = tp->t_port;
ulen = hdrlen + len - sizeof(struct ip);
udp->uh_ulen = htons(ulen);
th = (struct tcphdr *)(udp + 1);
} else
th = (struct tcphdr *)(ip + 1);
tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip, th);
}
/*
@ -1309,7 +1339,6 @@ tcp_output(struct tcpcb *tp)
* checksum extended header and data.
*/
m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
if (to.to_flags & TOF_SIGNATURE) {
@ -1336,9 +1365,19 @@ tcp_output(struct tcpcb *tp)
* There is no need to fill in ip6_plen right now.
* It will be filled later by ip6_output.
*/
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) +
optlen + len, IPPROTO_TCP, 0);
if (tp->t_port) {
m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
th->th_sum = htons(0);
UDPSTAT_INC(udps_opackets);
} else {
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
th->th_sum = in6_cksum_pseudo(ip6,
sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP,
0);
}
}
#endif
#if defined(INET6) && defined(INET)
@ -1346,9 +1385,20 @@ tcp_output(struct tcpcb *tp)
#endif
#ifdef INET
{
m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen));
if (tp->t_port) {
m->m_pkthdr.csum_flags = CSUM_UDP;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
th->th_sum = htons(0);
UDPSTAT_INC(udps_opackets);
} else {
m->m_pkthdr.csum_flags = CSUM_TCP;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
th->th_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
IPPROTO_TCP + len + optlen));
}
/* IP version must be set here for ipv4/ipv6 checking later */
KASSERT(ip->ip_v == IPVERSION,
@ -1473,8 +1523,10 @@ tcp_output(struct tcpcb *tp)
* NB: Don't set DF on small MTU/MSS to have a safe fallback.
*/
if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
ip->ip_off |= htons(IP_DF);
tp->t_flags2 |= TF2_PLPMTU_PMTUD;
if (tp->t_port == 0 || len < V_tcp_minmss) {
ip->ip_off |= htons(IP_DF);
}
} else {
tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
}

View File

@ -11969,14 +11969,10 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
#endif
struct tcp_bbr *bbr;
struct tcphdr *th;
#ifdef NETFLIX_TCPOUDP
struct udphdr *udp = NULL;
#endif
u_char opt[TCP_MAXOLEN];
unsigned ipoptlen, optlen, hdrlen;
#ifdef NETFLIX_TCPOUDP
unsigned ulen;
#endif
uint32_t bbr_seq;
uint32_t delay_calc=0;
uint8_t doing_tlp = 0;
@ -12991,10 +12987,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
/* Maximum segment size. */
if (flags & TH_SYN) {
to.to_mss = tcp_mssopt(&inp->inp_inc);
#ifdef NETFLIX_TCPOUDP
if (tp->t_port)
to.to_mss -= V_tcp_udp_tunneling_overhead;
#endif
to.to_flags |= TOF_MSS;
/*
* On SYN or SYN|ACK transmits on TFO connections,
@ -13063,7 +13057,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
!(to.to_flags & TOF_FASTOPEN))
len = 0;
}
#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
if (V_tcp_udp_tunneling_port == 0) {
/* The port was removed?? */
@ -13072,7 +13065,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
}
hdrlen += sizeof(struct udphdr);
}
#endif
#ifdef INET6
if (isipv6)
ipoptlen = ip6_optlen(tp->t_inpcb);
@ -13408,7 +13400,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
#ifdef INET6
if (isipv6) {
ip6 = mtod(m, struct ip6_hdr *);
#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + sizeof(struct ip6_hdr));
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
@ -13417,17 +13408,9 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
udp->uh_ulen = htons(ulen);
th = (struct tcphdr *)(udp + 1);
} else {
#endif
th = (struct tcphdr *)(ip6 + 1);
#ifdef NETFLIX_TCPOUDP
}
#endif
tcpip_fillheaders(inp,
#ifdef NETFLIX_TCPOUDP
tp->t_port,
#endif
ip6, th);
tcpip_fillheaders(inp, tp->t_port, ip6, th);
} else
#endif /* INET6 */
{
@ -13435,7 +13418,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
#ifdef TCPDEBUG
ipov = (struct ipovly *)ip;
#endif
#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
udp = (struct udphdr *)((caddr_t)ip + ipoptlen + sizeof(struct ip));
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
@ -13443,14 +13425,10 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
ulen = hdrlen + len - sizeof(struct ip);
udp->uh_ulen = htons(ulen);
th = (struct tcphdr *)(udp + 1);
} else
#endif
} else {
th = (struct tcphdr *)(ip + 1);
tcpip_fillheaders(inp,
#ifdef NETFLIX_TCPOUDP
tp->t_port,
#endif
ip, th);
}
tcpip_fillheaders(inp, tp->t_port, ip, th);
}
/*
* If we are doing retransmissions, then snd_nxt will not reflect
@ -13600,7 +13578,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
* ip6_plen is not need to be filled now, and will be filled
* in ip6_output.
*/
#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
@ -13608,14 +13585,11 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
th->th_sum = htons(0);
UDPSTAT_INC(udps_opackets);
} else {
#endif
csum_flags = m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) +
optlen + len, IPPROTO_TCP, 0);
#ifdef NETFLIX_TCPOUDP
}
#endif
}
#endif
#if defined(INET6) && defined(INET)
@ -13623,7 +13597,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
#endif
#ifdef INET
{
#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
m->m_pkthdr.csum_flags = CSUM_UDP;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
@ -13632,15 +13605,12 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
th->th_sum = htons(0);
UDPSTAT_INC(udps_opackets);
} else {
#endif
csum_flags = m->m_pkthdr.csum_flags = CSUM_TCP;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
th->th_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
IPPROTO_TCP + len + optlen));
#ifdef NETFLIX_TCPOUDP
}
#endif
/* IP version must be set here for ipv4/ipv6 checking later */
KASSERT(ip->ip_v == IPVERSION,
("%s: IP version incorrect: %d", __func__, ip->ip_v));

View File

@ -13008,10 +13008,8 @@ rack_output(struct tcpcb *tp)
if (flags & TH_SYN) {
tp->snd_nxt = tp->iss;
to.to_mss = tcp_mssopt(&inp->inp_inc);
#ifdef NETFLIX_TCPOUDP
if (tp->t_port)
to.to_mss -= V_tcp_udp_tunneling_overhead;
#endif
to.to_flags |= TOF_MSS;
/*
@ -13088,7 +13086,6 @@ rack_output(struct tcpcb *tp)
!(to.to_flags & TOF_FASTOPEN))
len = 0;
}
#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
if (V_tcp_udp_tunneling_port == 0) {
/* The port was removed?? */
@ -13097,7 +13094,6 @@ rack_output(struct tcpcb *tp)
}
hdrlen += sizeof(struct udphdr);
}
#endif
#ifdef INET6
if (isipv6)
ipoptlen = ip6_optlen(tp->t_inpcb);
@ -13372,7 +13368,6 @@ rack_output(struct tcpcb *tp)
#ifdef INET6
if (isipv6) {
ip6 = mtod(m, struct ip6_hdr *);
#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + sizeof(struct ip6_hdr));
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
@ -13380,14 +13375,10 @@ rack_output(struct tcpcb *tp)
ulen = hdrlen + len - sizeof(struct ip6_hdr);
udp->uh_ulen = htons(ulen);
th = (struct tcphdr *)(udp + 1);
} else
#endif
} else {
th = (struct tcphdr *)(ip6 + 1);
tcpip_fillheaders(inp,
#ifdef NETFLIX_TCPOUDP
tp->t_port,
#endif
ip6, th);
}
tcpip_fillheaders(inp, tp->t_port, ip6, th);
} else
#endif /* INET6 */
{
@ -13395,7 +13386,6 @@ rack_output(struct tcpcb *tp)
#ifdef TCPDEBUG
ipov = (struct ipovly *)ip;
#endif
#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
udp = (struct udphdr *)((caddr_t)ip + ipoptlen + sizeof(struct ip));
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
@ -13403,14 +13393,10 @@ rack_output(struct tcpcb *tp)
ulen = hdrlen + len - sizeof(struct ip);
udp->uh_ulen = htons(ulen);
th = (struct tcphdr *)(udp + 1);
} else
#endif
} else {
th = (struct tcphdr *)(ip + 1);
tcpip_fillheaders(inp,
#ifdef NETFLIX_TCPOUDP
tp->t_port,
#endif
ip, th);
}
tcpip_fillheaders(inp, tp->t_port, ip, th);
}
/*
* Fill in fields, remembering maximum advertised window for use in

View File

@ -126,6 +126,8 @@ __FBSDID("$FreeBSD$");
#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
#endif
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <netipsec/ipsec_support.h>
@ -501,6 +503,80 @@ tcp_switch_back_to_default(struct tcpcb *tp)
}
}
static void
tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp,
const struct sockaddr *sa, void *ctx)
{
struct ip *iph;
#ifdef INET6
struct ip6_hdr *ip6;
#endif
struct udphdr *uh;
struct tcphdr *th;
int thlen;
uint16_t port;
TCPSTAT_INC(tcps_tunneled_pkts);
if ((m->m_flags & M_PKTHDR) == 0) {
/* Can't handle one that is not a pkt hdr */
TCPSTAT_INC(tcps_tunneled_errs);
goto out;
}
thlen = sizeof(struct tcphdr);
if (m->m_len < off + sizeof(struct udphdr) + thlen &&
(m = m_pullup(m, off + sizeof(struct udphdr) + thlen)) == NULL) {
TCPSTAT_INC(tcps_tunneled_errs);
goto out;
}
iph = mtod(m, struct ip *);
uh = (struct udphdr *)((caddr_t)iph + off);
th = (struct tcphdr *)(uh + 1);
thlen = th->th_off << 2;
if (m->m_len < off + sizeof(struct udphdr) + thlen) {
m = m_pullup(m, off + sizeof(struct udphdr) + thlen);
if (m == NULL) {
TCPSTAT_INC(tcps_tunneled_errs);
goto out;
} else {
iph = mtod(m, struct ip *);
uh = (struct udphdr *)((caddr_t)iph + off);
th = (struct tcphdr *)(uh + 1);
}
}
m->m_pkthdr.tcp_tun_port = port = uh->uh_sport;
bcopy(th, uh, m->m_len - off);
m->m_len -= sizeof(struct udphdr);
m->m_pkthdr.len -= sizeof(struct udphdr);
/*
* We use the same algorithm for
* both UDP and TCP for c-sum. So
* the code in tcp_input will skip
* the checksum. So we do nothing
* with the flag (m->m_pkthdr.csum_flags).
*/
switch (iph->ip_v) {
#ifdef INET
case IPVERSION:
iph->ip_len = htons(ntohs(iph->ip_len) - sizeof(struct udphdr));
tcp_input_with_port(&m, &off, IPPROTO_TCP, port);
break;
#endif
#ifdef INET6
case IPV6_VERSION >> 4:
ip6 = mtod(m, struct ip6_hdr *);
ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - sizeof(struct udphdr));
tcp6_input_with_port(&m, &off, IPPROTO_TCP, port);
break;
#endif
default:
goto out;
break;
}
return;
out:
m_freem(m);
}
static int
sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
{
@ -598,6 +674,183 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available,
NULL, 0, sysctl_net_inet_list_available, "A",
"list available TCP Function sets");
VNET_DEFINE(int, tcp_udp_tunneling_port) = TCP_TUNNELING_PORT_DEFAULT;
#ifdef INET
VNET_DEFINE(struct socket *, udp4_tun_socket) = NULL;
#define V_udp4_tun_socket VNET(udp4_tun_socket)
#endif
#ifdef INET6
VNET_DEFINE(struct socket *, udp6_tun_socket) = NULL;
#define V_udp6_tun_socket VNET(udp6_tun_socket)
#endif
static void
tcp_over_udp_stop(void)
{
/*
* This function assumes sysctl caller holds inp_rinfo_lock()
* for writting!
*/
#ifdef INET
if (V_udp4_tun_socket != NULL) {
soclose(V_udp4_tun_socket);
V_udp4_tun_socket = NULL;
}
#endif
#ifdef INET6
if (V_udp6_tun_socket != NULL) {
soclose(V_udp6_tun_socket);
V_udp6_tun_socket = NULL;
}
#endif
}
static int
tcp_over_udp_start(void)
{
uint16_t port;
int ret;
#ifdef INET
struct sockaddr_in sin;
#endif
#ifdef INET6
struct sockaddr_in6 sin6;
#endif
/*
* This function assumes sysctl caller holds inp_info_rlock()
* for writting!
*/
port = V_tcp_udp_tunneling_port;
if (ntohs(port) == 0) {
/* Must have a port set */
return (EINVAL);
}
#ifdef INET
if (V_udp4_tun_socket != NULL) {
/* Already running -- must stop first */
return (EALREADY);
}
#endif
#ifdef INET6
if (V_udp6_tun_socket != NULL) {
/* Already running -- must stop first */
return (EALREADY);
}
#endif
#ifdef INET
if ((ret = socreate(PF_INET, &V_udp4_tun_socket,
SOCK_DGRAM, IPPROTO_UDP,
curthread->td_ucred, curthread))) {
tcp_over_udp_stop();
return (ret);
}
/* Call the special UDP hook. */
if ((ret = udp_set_kernel_tunneling(V_udp4_tun_socket,
tcp_recv_udp_tunneled_packet,
tcp_ctlinput_viaudp,
NULL))) {
tcp_over_udp_stop();
return (ret);
}
/* Ok, we have a socket, bind it to the port. */
memset(&sin, 0, sizeof(struct sockaddr_in));
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_family = AF_INET;
sin.sin_port = htons(port);
if ((ret = sobind(V_udp4_tun_socket,
(struct sockaddr *)&sin, curthread))) {
tcp_over_udp_stop();
return (ret);
}
#endif
#ifdef INET6
if ((ret = socreate(PF_INET6, &V_udp6_tun_socket,
SOCK_DGRAM, IPPROTO_UDP,
curthread->td_ucred, curthread))) {
tcp_over_udp_stop();
return (ret);
}
/* Call the special UDP hook. */
if ((ret = udp_set_kernel_tunneling(V_udp6_tun_socket,
tcp_recv_udp_tunneled_packet,
tcp6_ctlinput_viaudp,
NULL))) {
tcp_over_udp_stop();
return (ret);
}
/* Ok, we have a socket, bind it to the port. */
memset(&sin6, 0, sizeof(struct sockaddr_in6));
sin6.sin6_len = sizeof(struct sockaddr_in6);
sin6.sin6_family = AF_INET6;
sin6.sin6_port = htons(port);
if ((ret = sobind(V_udp6_tun_socket,
(struct sockaddr *)&sin6, curthread))) {
tcp_over_udp_stop();
return (ret);
}
#endif
return (0);
}
static int
sysctl_net_inet_tcp_udp_tunneling_port_check(SYSCTL_HANDLER_ARGS)
{
int error;
uint32_t old, new;
old = V_tcp_udp_tunneling_port;
new = old;
error = sysctl_handle_int(oidp, &new, 0, req);
if ((error == 0) &&
(req->newptr != NULL)) {
if ((new < TCP_TUNNELING_PORT_MIN) ||
(new > TCP_TUNNELING_PORT_MAX)) {
error = EINVAL;
} else {
V_tcp_udp_tunneling_port = new;
if (old != 0) {
tcp_over_udp_stop();
}
if (new != 0) {
error = tcp_over_udp_start();
}
}
}
return (error);
}
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_port,
CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
&VNET_NAME(tcp_udp_tunneling_port),
0, &sysctl_net_inet_tcp_udp_tunneling_port_check, "IU",
"Tunneling port for tcp over udp");
VNET_DEFINE(int, tcp_udp_tunneling_overhead) = TCP_TUNNELING_OVERHEAD_DEFAULT;
static int
sysctl_net_inet_tcp_udp_tunneling_overhead_check(SYSCTL_HANDLER_ARGS)
{
int error, new;
new = V_tcp_udp_tunneling_overhead;
error = sysctl_handle_int(oidp, &new, 0, req);
if (error == 0 && req->newptr) {
if ((new < TCP_TUNNELING_OVERHEAD_MIN) ||
(new > TCP_TUNNELING_OVERHEAD_MAX))
error = EINVAL;
else
V_tcp_udp_tunneling_overhead = new;
}
return (error);
}
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_overhead,
CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
&VNET_NAME(tcp_udp_tunneling_overhead),
0, &sysctl_net_inet_tcp_udp_tunneling_overhead_check, "IU",
"MSS reduction when using tcp over udp");
/*
* Exports one (struct tcp_function_info) for each alias/name.
*/
@ -1314,7 +1567,7 @@ tcp_fini(void *xtp)
* of the tcpcb each time to conserve mbufs.
*/
void
tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
tcpip_fillheaders(struct inpcb *inp, uint16_t port, void *ip_ptr, void *tcp_ptr)
{
struct tcphdr *th = (struct tcphdr *)tcp_ptr;
@ -1329,7 +1582,10 @@ tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
(inp->inp_flow & IPV6_FLOWINFO_MASK);
ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
(IPV6_VERSION & IPV6_VERSION_MASK);
ip6->ip6_nxt = IPPROTO_TCP;
if (port == 0)
ip6->ip6_nxt = IPPROTO_TCP;
else
ip6->ip6_nxt = IPPROTO_UDP;
ip6->ip6_plen = htons(sizeof(struct tcphdr));
ip6->ip6_src = inp->in6p_laddr;
ip6->ip6_dst = inp->in6p_faddr;
@ -1351,7 +1607,10 @@ tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
ip->ip_off = 0;
ip->ip_ttl = inp->inp_ip_ttl;
ip->ip_sum = 0;
ip->ip_p = IPPROTO_TCP;
if (port == 0)
ip->ip_p = IPPROTO_TCP;
else
ip->ip_p = IPPROTO_UDP;
ip->ip_src = inp->inp_laddr;
ip->ip_dst = inp->inp_faddr;
}
@ -1381,7 +1640,7 @@ tcpip_maketemplate(struct inpcb *inp)
t = malloc(sizeof(*t), M_TEMP, M_NOWAIT);
if (t == NULL)
return (NULL);
tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t);
tcpip_fillheaders(inp, 0, (void *)&t->tt_ipgen, (void *)&t->tt_t);
return (t);
}
@ -1407,14 +1666,16 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
struct inpcb *inp;
struct ip *ip;
struct mbuf *optm;
struct udphdr *uh = NULL;
struct tcphdr *nth;
u_char *optp;
#ifdef INET6
struct ip6_hdr *ip6;
int isipv6;
#endif /* INET6 */
int optlen, tlen, win;
int optlen, tlen, win, ulen;
bool incl_opts;
uint16_t port;
KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
NET_EPOCH_ASSERT();
@ -1432,6 +1693,19 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
} else
inp = NULL;
if (m != NULL) {
#ifdef INET6
if (isipv6 && ip6 && (ip6->ip6_nxt == IPPROTO_UDP))
port = m->m_pkthdr.tcp_tun_port;
else
#endif
if (ip && (ip->ip_p == IPPROTO_UDP))
port = m->m_pkthdr.tcp_tun_port;
else
port = 0;
} else
port = tp->t_port;
incl_opts = false;
win = 0;
if (tp != NULL) {
@ -1454,16 +1728,30 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
sizeof(struct ip6_hdr));
ip6 = mtod(m, struct ip6_hdr *);
nth = (struct tcphdr *)(ip6 + 1);
if (port) {
/* Insert a UDP header */
uh = (struct udphdr *)nth;
uh->uh_sport = htons(V_tcp_udp_tunneling_port);
uh->uh_dport = port;
nth = (struct tcphdr *)(uh + 1);
}
} else
#endif /* INET6 */
{
bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
ip = mtod(m, struct ip *);
nth = (struct tcphdr *)(ip + 1);
if (port) {
/* Insert a UDP header */
uh = (struct udphdr *)nth;
uh->uh_sport = htons(V_tcp_udp_tunneling_port);
uh->uh_dport = port;
nth = (struct tcphdr *)(uh + 1);
}
}
bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
flags = TH_ACK;
} else if (!M_WRITABLE(m)) {
} else if ((!M_WRITABLE(m)) || (port != 0)) {
struct mbuf *n;
/* Can't reuse 'm', allocate a new mbuf. */
@ -1489,6 +1777,13 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
ip6 = mtod(n, struct ip6_hdr *);
xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
nth = (struct tcphdr *)(ip6 + 1);
if (port) {
/* Insert a UDP header */
uh = (struct udphdr *)nth;
uh->uh_sport = htons(V_tcp_udp_tunneling_port);
uh->uh_dport = port;
nth = (struct tcphdr *)(uh + 1);
}
} else
#endif /* INET6 */
{
@ -1496,6 +1791,13 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
ip = mtod(n, struct ip *);
xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
nth = (struct tcphdr *)(ip + 1);
if (port) {
/* Insert a UDP header */
uh = (struct udphdr *)nth;
uh->uh_sport = htons(V_tcp_udp_tunneling_port);
uh->uh_dport = port;
nth = (struct tcphdr *)(uh + 1);
}
}
bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
xchg(nth->th_dport, nth->th_sport, uint16_t);
@ -1544,6 +1846,8 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
#ifdef INET
tlen = sizeof (struct tcpiphdr);
#endif
if (port)
tlen += sizeof (struct udphdr);
#ifdef INVARIANTS
m->m_len = 0;
KASSERT(M_TRAILINGSPACE(m) >= tlen,
@ -1587,9 +1891,16 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
optlen = 0;
#ifdef INET6
if (isipv6) {
if (uh) {
ulen = tlen - sizeof(struct ip6_hdr);
uh->uh_ulen = htons(ulen);
}
ip6->ip6_flow = 0;
ip6->ip6_vfc = IPV6_VERSION;
ip6->ip6_nxt = IPPROTO_TCP;
if (port)
ip6->ip6_nxt = IPPROTO_UDP;
else
ip6->ip6_nxt = IPPROTO_TCP;
ip6->ip6_plen = htons(tlen - sizeof(*ip6));
}
#endif
@ -1598,8 +1909,17 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
#endif
#ifdef INET
{
if (uh) {
ulen = tlen - sizeof(struct ip);
uh->uh_ulen = htons(ulen);
}
ip->ip_len = htons(tlen);
ip->ip_ttl = V_ip_defttl;
if (port) {
ip->ip_p = IPPROTO_UDP;
} else {
ip->ip_p = IPPROTO_TCP;
}
if (V_path_mtu_discovery)
ip->ip_off |= htons(IP_DF);
}
@ -1643,12 +1963,19 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
}
#endif
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
nth->th_sum = in6_cksum_pseudo(ip6,
tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0);
if (port) {
m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
uh->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
nth->th_sum = 0;
} else {
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
nth->th_sum = in6_cksum_pseudo(ip6,
tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0);
}
ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb :
NULL, NULL);
}
@ -1658,9 +1985,18 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
#endif
#ifdef INET
{
m->m_pkthdr.csum_flags = CSUM_TCP;
nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
if (port) {
uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(ulen + IPPROTO_UDP));
m->m_pkthdr.csum_flags = CSUM_UDP;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
nth->th_sum = 0;
} else {
m->m_pkthdr.csum_flags = CSUM_TCP;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
}
}
#endif /* INET */
#ifdef TCPDEBUG
@ -2460,8 +2796,8 @@ SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred,
#endif /* INET6 */
#ifdef INET
void
tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
static void
tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port)
{
struct ip *ip = vip;
struct tcphdr *th;
@ -2515,6 +2851,9 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
!(inp->inp_flags & INP_DROPPED) &&
!(inp->inp_socket == NULL)) {
tp = intotcpcb(inp);
if (tp->t_port != port) {
goto out;
}
if (SEQ_GEQ(ntohl(icmp_tcp_seq), tp->snd_una) &&
SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) {
if (cmd == PRC_MSGSIZE) {
@ -2561,17 +2900,61 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
inc.inc_lport = th->th_sport;
inc.inc_faddr = faddr;
inc.inc_laddr = ip->ip_src;
syncache_unreach(&inc, icmp_tcp_seq);
syncache_unreach(&inc, icmp_tcp_seq, port);
}
out:
if (inp != NULL)
INP_WUNLOCK(inp);
}
void
tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
{
tcp_ctlinput_with_port(cmd, sa, vip, htons(0));
}
void
tcp_ctlinput_viaudp(int cmd, struct sockaddr *sa, void *vip, void *unused)
{
/* Its a tunneled TCP over UDP icmp */
struct ip *outer_ip, *inner_ip;
struct icmp *icmp;
struct udphdr *udp;
struct tcphdr *th, ttemp;
int i_hlen, o_len;
uint16_t port;
inner_ip = (struct ip *)vip;
icmp = (struct icmp *)((caddr_t)inner_ip -
(sizeof(struct icmp) - sizeof(struct ip)));
outer_ip = (struct ip *)((caddr_t)icmp - sizeof(struct ip));
i_hlen = inner_ip->ip_hl << 2;
o_len = ntohs(outer_ip->ip_len);
if (o_len <
(sizeof(struct ip) + 8 + i_hlen + sizeof(struct udphdr) + offsetof(struct tcphdr, th_ack))) {
/* Not enough data present */
return;
}
/* Ok lets strip out the inner udphdr header by copying up on top of it the tcp hdr */
udp = (struct udphdr *)(((caddr_t)inner_ip) + i_hlen);
if (ntohs(udp->uh_sport) != V_tcp_udp_tunneling_port) {
return;
}
port = udp->uh_dport;
th = (struct tcphdr *)(udp + 1);
memcpy(&ttemp, th, sizeof(struct tcphdr));
memcpy(udp, &ttemp, sizeof(struct tcphdr));
/* Now adjust down the size of the outer IP header */
o_len -= sizeof(struct udphdr);
outer_ip->ip_len = htons(o_len);
/* Now call in to the normal handling code */
tcp_ctlinput_with_port(cmd, sa, vip, port);
}
#endif /* INET */
#ifdef INET6
void
tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
static void
tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port)
{
struct in6_addr *dst;
struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
@ -2661,6 +3044,9 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
!(inp->inp_flags & INP_DROPPED) &&
!(inp->inp_socket == NULL)) {
tp = intotcpcb(inp);
if (tp->t_port != port) {
goto out;
}
if (SEQ_GEQ(ntohl(icmp_tcp_seq), tp->snd_una) &&
SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) {
if (cmd == PRC_MSGSIZE) {
@ -2710,12 +3096,45 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
inc.inc_lport = t_ports.th_sport;
inc.inc6_faddr = *dst;
inc.inc6_laddr = ip6->ip6_src;
syncache_unreach(&inc, icmp_tcp_seq);
syncache_unreach(&inc, icmp_tcp_seq, port);
}
out:
if (inp != NULL)
INP_WUNLOCK(inp);
}
void
tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
{
tcp6_ctlinput_with_port(cmd, sa, d, htons(0));
}
void
tcp6_ctlinput_viaudp(int cmd, struct sockaddr *sa, void *d, void *unused)
{
struct ip6ctlparam *ip6cp;
struct mbuf *m;
struct udphdr *udp;
uint16_t port;
ip6cp = (struct ip6ctlparam *)d;
m = m_pulldown(ip6cp->ip6c_m, ip6cp->ip6c_off, sizeof(struct udphdr), NULL);
if (m == NULL) {
return;
}
udp = mtod(m, struct udphdr *);
if (ntohs(udp->uh_sport) != V_tcp_udp_tunneling_port) {
return;
}
port = udp->uh_dport;
m_adj(m, sizeof(struct udphdr));
if ((m->m_flags & M_PKTHDR) == 0) {
ip6cp->ip6c_m->m_pkthdr.len -= sizeof(struct udphdr);
}
/* Now call in to the normal handling code */
tcp6_ctlinput_with_port(cmd, sa, d, port);
}
#endif /* INET6 */
static uint32_t
@ -3448,11 +3867,13 @@ void
tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *xt)
{
struct tcpcb *tp = intotcpcb(inp);
struct tcptw *tw = intotw(inp);
sbintime_t now;
bzero(xt, sizeof(*xt));
if (inp->inp_flags & INP_TIMEWAIT) {
xt->t_state = TCPS_TIME_WAIT;
xt->xt_encaps_port = tw->t_port;
} else {
xt->t_state = tp->t_state;
xt->t_logstate = tp->t_logstate;
@ -3484,6 +3905,7 @@ tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *xt)
#undef COPYTIMER
xt->t_rcvtime = 1000 * (ticks - tp->t_rcvtime) / hz;
xt->xt_encaps_port = tp->t_port;
bcopy(tp->t_fb->tfb_tcp_block_name, xt->xt_stack,
TCP_FUNCTION_NAME_LEN_MAX);
bcopy(CC_ALGO(tp)->name, xt->xt_cc,

View File

@ -96,6 +96,8 @@ __FBSDID("$FreeBSD$");
#ifdef TCP_OFFLOAD
#include <netinet/toecore.h>
#endif
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <netipsec/ipsec_support.h>
@ -143,14 +145,14 @@ static tcp_seq syncookie_generate(struct syncache_head *, struct syncache *);
static struct syncache
*syncookie_lookup(struct in_conninfo *, struct syncache_head *,
struct syncache *, struct tcphdr *, struct tcpopt *,
struct socket *);
struct socket *, uint16_t);
static void syncache_pause(struct in_conninfo *);
static void syncache_unpause(void *);
static void syncookie_reseed(void *);
#ifdef INVARIANTS
static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
struct socket *lso);
struct socket *lso, uint16_t port);
#endif
/*
@ -610,7 +612,8 @@ syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
* If required send a challenge ACK.
*/
void
syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m)
syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m,
uint16_t port)
{
struct syncache *sc;
struct syncache_head *sch;
@ -650,6 +653,16 @@ syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m)
goto done;
}
/* The remote UDP encaps port does not match. */
if (sc->sc_port != port) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: Spurious RST with matching "
"syncache entry but non-matching UDP encaps port, "
"segment ignored\n", s, __func__);
TCPSTAT_INC(tcps_badrst);
goto done;
}
/*
* If the RST bit is set, check the sequence number to see
* if this is a valid reset segment.
@ -716,7 +729,7 @@ syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m)
}
void
syncache_badack(struct in_conninfo *inc)
syncache_badack(struct in_conninfo *inc, uint16_t port)
{
struct syncache *sc;
struct syncache_head *sch;
@ -725,7 +738,7 @@ syncache_badack(struct in_conninfo *inc)
return;
sc = syncache_lookup(inc, &sch); /* returns locked sch */
SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
if ((sc != NULL) && (sc->sc_port == port)) {
syncache_drop(sc, sch);
TCPSTAT_INC(tcps_sc_badack);
}
@ -733,7 +746,7 @@ syncache_badack(struct in_conninfo *inc)
}
void
syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq)
syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq, uint16_t port)
{
struct syncache *sc;
struct syncache_head *sch;
@ -745,6 +758,10 @@ syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq)
if (sc == NULL)
goto done;
/* If the port != sc_port, then it's a bogus ICMP msg */
if (port != sc->sc_port)
goto done;
/* If the sequence number != sc_iss, then it's a bogus ICMP msg */
if (ntohl(th_seq) != sc->sc_iss)
goto done;
@ -951,6 +968,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
tcp_state_change(tp, TCPS_SYN_RECEIVED);
tp->iss = sc->sc_iss;
tp->irs = sc->sc_irs;
tp->t_port = sc->sc_port;
tcp_rcvseqinit(tp);
tcp_sendseqinit(tp);
blk = sototcpcb(lso)->t_fb;
@ -1071,7 +1089,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
*/
int
syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct socket **lsop, struct mbuf *m)
struct socket **lsop, struct mbuf *m, uint16_t port)
{
struct syncache *sc;
struct syncache_head *sch;
@ -1099,7 +1117,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* values with the reconstructed values from the cookie.
*/
if (sc != NULL)
syncookie_cmp(inc, sch, sc, th, to, *lsop);
syncookie_cmp(inc, sch, sc, th, to, *lsop, port);
#endif
if (sc == NULL) {
@ -1133,7 +1151,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
goto failed;
}
bzero(&scs, sizeof(scs));
sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop);
sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop, port);
if (locked)
SCH_UNLOCK(sch);
if (sc == NULL) {
@ -1160,6 +1178,10 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
}
#endif /* TCP_SIGNATURE */
} else {
if (sc->sc_port != port) {
SCH_UNLOCK(sch);
return (0);
}
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
/*
* If listening socket requested TCP digests, check that
@ -1380,7 +1402,7 @@ syncache_tfo_expand(struct syncache *sc, struct socket *lso, struct mbuf *m,
struct socket *
syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct inpcb *inp, struct socket *so, struct mbuf *m, void *tod,
void *todctx, uint8_t iptos)
void *todctx, uint8_t iptos, uint16_t port)
{
struct tcpcb *tp;
struct socket *rv = NULL;
@ -1640,6 +1662,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sc->sc_label = maclabel;
#endif
sc->sc_cred = cred;
sc->sc_port = port;
cred = NULL;
sc->sc_ipopts = ipopts;
bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
@ -1797,8 +1820,9 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
struct ip *ip = NULL;
struct mbuf *m;
struct tcphdr *th = NULL;
struct udphdr *udp = NULL;
int optlen, error = 0; /* Make compiler happy */
u_int16_t hlen, tlen, mssopt;
u_int16_t hlen, tlen, mssopt, ulen;
struct tcpopt to;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
@ -1812,9 +1836,14 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
#endif
sizeof(struct ip);
tlen = hlen + sizeof(struct tcphdr);
if (sc->sc_port) {
tlen += sizeof(struct udphdr);
}
/* Determine MSS we advertize to other end of connection. */
mssopt = max(tcp_mssopt(&sc->sc_inc), V_tcp_minmss);
mssopt = tcp_mssopt(&sc->sc_inc);
if (sc->sc_port)
mssopt -= V_tcp_udp_tunneling_overhead;
mssopt = max(mssopt, V_tcp_minmss);
/* XXX: Assume that the entire packet will fit in a header mbuf. */
KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN,
@ -1836,7 +1865,6 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
if (sc->sc_inc.inc_flags & INC_ISIPV6) {
ip6 = mtod(m, struct ip6_hdr *);
ip6->ip6_vfc = IPV6_VERSION;
ip6->ip6_nxt = IPPROTO_TCP;
ip6->ip6_src = sc->sc_inc.inc6_laddr;
ip6->ip6_dst = sc->sc_inc.inc6_faddr;
ip6->ip6_plen = htons(tlen - hlen);
@ -1844,9 +1872,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
/* Zero out traffic class and flow label. */
ip6->ip6_flow &= ~IPV6_FLOWINFO_MASK;
ip6->ip6_flow |= sc->sc_flowlabel;
if (sc->sc_port != 0) {
ip6->ip6_nxt = IPPROTO_UDP;
udp = (struct udphdr *)(ip6 + 1);
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
udp->uh_dport = sc->sc_port;
ulen = (tlen - sizeof(struct ip6_hdr));
th = (struct tcphdr *)(udp + 1);
} else {
ip6->ip6_nxt = IPPROTO_TCP;
th = (struct tcphdr *)(ip6 + 1);
}
ip6->ip6_flow |= htonl(sc->sc_ip_tos << 20);
th = (struct tcphdr *)(ip6 + 1);
}
#endif
#if defined(INET6) && defined(INET)
@ -1861,7 +1898,6 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
ip->ip_id = 0;
ip->ip_off = 0;
ip->ip_sum = 0;
ip->ip_p = IPPROTO_TCP;
ip->ip_src = sc->sc_inc.inc_laddr;
ip->ip_dst = sc->sc_inc.inc_faddr;
ip->ip_ttl = sc->sc_ip_ttl;
@ -1876,8 +1912,17 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
*/
if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
ip->ip_off |= htons(IP_DF);
th = (struct tcphdr *)(ip + 1);
if (sc->sc_port == 0) {
ip->ip_p = IPPROTO_TCP;
th = (struct tcphdr *)(ip + 1);
} else {
ip->ip_p = IPPROTO_UDP;
udp = (struct udphdr *)(ip + 1);
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
udp->uh_dport = sc->sc_port;
ulen = (tlen - sizeof(struct ip));
th = (struct tcphdr *)(udp + 1);
}
}
#endif /* INET */
th->th_sport = sc->sc_inc.inc_lport;
@ -1957,8 +2002,11 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
} else
optlen = 0;
if (udp) {
ulen += optlen;
udp->uh_ulen = htons(ulen);
}
M_SETFIB(m, sc->sc_inc.inc_fibnum);
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
/*
* If we have peer's SYN and it has a flowid, then let's assign it to
* our SYN|ACK. ip6_output() and ip_output() will not assign flowid
@ -1970,9 +2018,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
}
#ifdef INET6
if (sc->sc_inc.inc_flags & INC_ISIPV6) {
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
IPPROTO_TCP, 0);
if (sc->sc_port) {
m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
udp->uh_sum = in6_cksum_pseudo(ip6, ulen,
IPPROTO_UDP, 0);
th->th_sum = htons(0);
} else {
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
IPPROTO_TCP, 0);
}
ip6->ip6_hlim = sc->sc_ip_ttl;
#ifdef TCP_OFFLOAD
if (ADDED_BY_TOE(sc)) {
@ -1992,9 +2049,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
#endif
#ifdef INET
{
m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(tlen + optlen - hlen + IPPROTO_TCP));
if (sc->sc_port) {
m->m_pkthdr.csum_flags = CSUM_UDP;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
th->th_sum = htons(0);
} else {
m->m_pkthdr.csum_flags = CSUM_TCP;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(tlen + optlen - hlen + IPPROTO_TCP));
}
#ifdef TCP_OFFLOAD
if (ADDED_BY_TOE(sc)) {
struct toedev *tod = sc->sc_tod;
@ -2224,7 +2290,7 @@ syncookie_generate(struct syncache_head *sch, struct syncache *sc)
static struct syncache *
syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
struct socket *lso)
struct socket *lso, uint16_t port)
{
uint32_t hash;
uint8_t *secbits;
@ -2310,6 +2376,8 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
sc->sc_rxmits = 0;
sc->sc_port = port;
TCPSTAT_INC(tcps_sc_recvcookie);
return (sc);
}
@ -2318,13 +2386,13 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
static int
syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
struct socket *lso)
struct socket *lso, uint16_t port)
{
struct syncache scs, *scx;
char *s;
bzero(&scs, sizeof(scs));
scx = syncookie_lookup(inc, sch, &scs, th, to, lso);
scx = syncookie_lookup(inc, sch, &scs, th, to, lso, port);
if ((s = tcp_log_addrs(inc, th, NULL, NULL)) == NULL)
return (0);
@ -2510,6 +2578,7 @@ syncache_pcblist(struct sysctl_req *req)
xt.xt_inp.inp_vflag = INP_IPV6;
else
xt.xt_inp.inp_vflag = INP_IPV4;
xt.xt_encaps_port = sc->sc_port;
bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc,
sizeof (struct in_conninfo));
error = SYSCTL_OUT(req, &xt, sizeof xt);

View File

@ -40,14 +40,15 @@ void syncache_init(void);
#ifdef VIMAGE
void syncache_destroy(void);
#endif
void syncache_unreach(struct in_conninfo *, tcp_seq);
void syncache_unreach(struct in_conninfo *, tcp_seq, uint16_t);
int syncache_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
struct tcphdr *, struct socket **, struct mbuf *, uint16_t);
struct socket * syncache_add(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct inpcb *, struct socket *, struct mbuf *,
void *, void *, uint8_t);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *);
void syncache_badack(struct in_conninfo *);
void *, void *, uint8_t, uint16_t);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *,
uint16_t);
void syncache_badack(struct in_conninfo *, uint16_t);
int syncache_pcblist(struct sysctl_req *);
struct syncache {
@ -55,6 +56,7 @@ struct syncache {
struct in_conninfo sc_inc; /* addresses */
int sc_rxttime; /* retransmit time */
u_int16_t sc_rxmits; /* retransmit counter */
u_int16_t sc_port; /* remote UDP encaps port */
u_int32_t sc_tsreflect; /* timestamp to reflect */
u_int32_t sc_tsoff; /* ts offset w/ syncookies */
u_int32_t sc_flowlabel; /* IPv6 flowlabel */

View File

@ -93,6 +93,8 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6protosw.h>
#endif
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <machine/in_cksum.h>
#include <security/mac/mac_framework.h>
@ -318,6 +320,7 @@ tcp_twstart(struct tcpcb *tp)
}
tw->snd_nxt = tp->snd_nxt;
tw->t_port = tp->t_port;
tw->rcv_nxt = tp->rcv_nxt;
tw->iss = tp->iss;
tw->irs = tp->irs;
@ -436,12 +439,32 @@ tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
* while in TIME_WAIT, drop the old connection
* and start over if the sequence numbers
* are above the previous ones.
* Allow UDP port number changes in this case.
*/
if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) {
tcp_twclose(tw, 0);
return (1);
}
/*
* Send RST if UDP port numbers don't match
*/
if (tw->t_port != m->m_pkthdr.tcp_tun_port) {
if (th->th_flags & TH_ACK) {
tcp_respond(NULL, mtod(m, void *), th, m,
(tcp_seq)0, th->th_ack, TH_RST);
} else {
if (th->th_flags & TH_SYN)
tlen++;
if (th->th_flags & TH_FIN)
tlen++;
tcp_respond(NULL, mtod(m, void *), th, m,
th->th_seq+tlen, (tcp_seq)0, TH_RST|TH_ACK);
}
INP_WUNLOCK(inp);
return (0);
}
/*
* Drop the segment if it does not contain an ACK.
*/
@ -555,13 +578,14 @@ tcp_twrespond(struct tcptw *tw, int flags)
#ifdef INET
struct ip *ip = NULL;
#endif
u_int hdrlen, optlen;
u_int hdrlen, optlen, ulen;
int error = 0; /* Keep compiler happy */
struct tcpopt to;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
#endif
struct udphdr *udp = NULL;
hdrlen = 0; /* Keep compiler happy */
INP_WLOCK_ASSERT(inp);
@ -579,8 +603,16 @@ tcp_twrespond(struct tcptw *tw, int flags)
if (isipv6) {
hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
ip6 = mtod(m, struct ip6_hdr *);
th = (struct tcphdr *)(ip6 + 1);
tcpip_fillheaders(inp, ip6, th);
if (tw->t_port) {
udp = (struct udphdr *)(ip6 + 1);
hdrlen += sizeof(struct udphdr);
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
udp->uh_dport = tw->t_port;
ulen = (hdrlen - sizeof(struct ip6_hdr));
th = (struct tcphdr *)(udp + 1);
} else
th = (struct tcphdr *)(ip6 + 1);
tcpip_fillheaders(inp, tw->t_port, ip6, th);
}
#endif
#if defined(INET6) && defined(INET)
@ -590,8 +622,16 @@ tcp_twrespond(struct tcptw *tw, int flags)
{
hdrlen = sizeof(struct tcpiphdr);
ip = mtod(m, struct ip *);
th = (struct tcphdr *)(ip + 1);
tcpip_fillheaders(inp, ip, th);
if (tw->t_port) {
udp = (struct udphdr *)(ip + 1);
hdrlen += sizeof(struct udphdr);
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
udp->uh_dport = tw->t_port;
ulen = (hdrlen - sizeof(struct ip));
th = (struct tcphdr *)(udp + 1);
} else
th = (struct tcphdr *)(ip + 1);
tcpip_fillheaders(inp, tw->t_port, ip, th);
}
#endif
to.to_flags = 0;
@ -607,6 +647,10 @@ tcp_twrespond(struct tcptw *tw, int flags)
}
optlen = tcp_addoptions(&to, (u_char *)(th + 1));
if (udp) {
ulen += optlen;
udp->uh_ulen = htons(ulen);
}
m->m_len = hdrlen + optlen;
m->m_pkthdr.len = m->m_len;
@ -618,12 +662,19 @@ tcp_twrespond(struct tcptw *tw, int flags)
th->th_flags = flags;
th->th_win = htons(tw->last_win);
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
th->th_sum = in6_cksum_pseudo(ip6,
sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0);
if (tw->t_port) {
m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
th->th_sum = htons(0);
} else {
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
th->th_sum = in6_cksum_pseudo(ip6,
sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0);
}
ip6->ip6_hlim = in6_selecthlim(inp, NULL);
TCP_PROBE5(send, NULL, NULL, ip6, NULL, th);
error = ip6_output(m, inp->in6p_outputopts, NULL,
@ -635,9 +686,18 @@ tcp_twrespond(struct tcptw *tw, int flags)
#endif
#ifdef INET
{
m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
if (tw->t_port) {
m->m_pkthdr.csum_flags = CSUM_UDP;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
th->th_sum = htons(0);
} else {
m->m_pkthdr.csum_flags = CSUM_TCP;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
}
ip->ip_len = htons(m->m_pkthdr.len);
if (V_path_mtu_discovery)
ip->ip_off |= htons(IP_DF);

View File

@ -2049,6 +2049,31 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
}
goto unlock_and_done;
case TCP_REMOTE_UDP_ENCAPS_PORT:
INP_WUNLOCK(inp);
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
if (error)
return (error);
if ((optval < TCP_TUNNELING_PORT_MIN) ||
(optval > TCP_TUNNELING_PORT_MAX)) {
/* Its got to be in range */
return (EINVAL);
}
if ((V_tcp_udp_tunneling_port == 0) && (optval != 0)) {
/* You have to have enabled a UDP tunneling port first */
return (EINVAL);
}
INP_WLOCK_RECHECK(inp);
if (tp->t_state != TCPS_CLOSED) {
/* You can't change after you are connected */
error = EINVAL;
} else {
/* Ok we are all good set the port */
tp->t_port = htons(optval);
}
goto unlock_and_done;
case TCP_MAXSEG:
INP_WUNLOCK(inp);
error = sooptcopyin(sopt, &optval, sizeof optval,
@ -2388,6 +2413,11 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
case TCP_REMOTE_UDP_ENCAPS_PORT:
optval = ntohs(tp->t_port);
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
case TCP_NOOPT:
optval = tp->t_flags & TF_NOOPT;
INP_WUNLOCK(inp);

View File

@ -282,6 +282,16 @@ struct tcptemp {
struct tcphdr tt_t;
};
/* Enable TCP/UDP tunneling port */
#define TCP_TUNNELING_PORT_MIN 0
#define TCP_TUNNELING_PORT_MAX 65535
#define TCP_TUNNELING_PORT_DEFAULT 0
/* Enable TCP/UDP tunneling port */
#define TCP_TUNNELING_OVERHEAD_MIN sizeof(struct udphdr)
#define TCP_TUNNELING_OVERHEAD_MAX 1024
#define TCP_TUNNELING_OVERHEAD_DEFAULT TCP_TUNNELING_OVERHEAD_MIN
/* Minimum map entries limit value, if set */
#define TCP_MIN_MAP_ENTRIES_LIMIT 128
@ -502,6 +512,8 @@ struct in_conninfo;
struct tcptw {
struct inpcb *tw_inpcb; /* XXX back pointer to internet pcb */
uint32_t t_port:16, /* UDP port number if TCPoUDP */
t_unused:16;
tcp_seq snd_nxt;
tcp_seq rcv_nxt;
tcp_seq iss;
@ -678,7 +690,10 @@ struct tcpstat {
uint64_t tcps_pmtud_blackhole_activated_min_mss; /* BH at min MSS Count */
uint64_t tcps_pmtud_blackhole_failed; /* Black Hole Failure Count */
uint64_t _pad[12]; /* 6 UTO, 6 TBD */
uint64_t tcps_tunneled_pkts; /* Packets encap's in UDP received */
uint64_t tcps_tunneled_errs; /* Packets that had errors that were UDP encaped */
uint64_t _pad[10]; /* 6 UTO, 6 TBD */
};
#define tcps_rcvmemdrop tcps_rcvreassfull /* compat */
@ -776,7 +791,9 @@ struct xtcpcb {
uint32_t t_rcv_wnd; /* (s) */
uint32_t t_snd_wnd; /* (s) */
uint32_t xt_ecn; /* (s) */
int32_t spare32[26];
uint16_t xt_encaps_port; /* (s) */
int16_t spare16;
int32_t spare32[25];
} __aligned(8);
#ifdef _KERNEL
@ -867,6 +884,8 @@ VNET_DECLARE(int, tcp_sack_globalmaxholes);
VNET_DECLARE(int, tcp_sack_maxholes);
VNET_DECLARE(int, tcp_sc_rst_sock_fail);
VNET_DECLARE(int, tcp_sendspace);
VNET_DECLARE(int, tcp_udp_tunneling_overhead);
VNET_DECLARE(int, tcp_udp_tunneling_port);
VNET_DECLARE(struct inpcbhead, tcb);
VNET_DECLARE(struct inpcbinfo, tcbinfo);
@ -929,6 +948,7 @@ void tcp_twstart(struct tcpcb *);
void tcp_twclose(struct tcptw *, int);
void tcp_ctlinput(int, struct sockaddr *, void *);
int tcp_ctloutput(struct socket *, struct sockopt *);
void tcp_ctlinput_viaudp(int, struct sockaddr *, void *, void *);
struct tcpcb *
tcp_drop(struct tcpcb *, int);
void tcp_drain(void);
@ -963,6 +983,7 @@ void hhook_run_tcp_est_in(struct tcpcb *tp,
int tcp_input(struct mbuf **, int *, int);
int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
struct tcpcb *, int);
int tcp_input_with_port(struct mbuf **, int *, int, uint16_t);
void tcp_handle_wakeup(struct tcpcb *, struct socket *);
void tcp_do_segment(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *, int, int, uint8_t);
@ -1033,7 +1054,7 @@ void tcp_setpersist(struct tcpcb *);
void tcp_slowtimo(void);
struct tcptemp *
tcpip_maketemplate(struct inpcb *);
void tcpip_fillheaders(struct inpcb *, void *, void *);
void tcpip_fillheaders(struct inpcb *, uint16_t, void *, void *);
void tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
int tcp_timer_suspend(struct tcpcb *, uint32_t);
void tcp_timers_unsuspend(struct tcpcb *, uint32_t);

View File

@ -352,7 +352,7 @@ toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
INP_RLOCK_ASSERT(inp);
(void )syncache_add(inc, to, th, inp, inp->inp_socket, NULL, tod,
todctx, iptos);
todctx, iptos, htons(0));
}
int
@ -362,7 +362,7 @@ toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
NET_EPOCH_ASSERT();
return (syncache_expand(inc, to, th, lsop, NULL));
return (syncache_expand(inc, to, th, lsop, NULL, htons(0)));
}
/*

View File

@ -74,8 +74,10 @@ VNET_DECLARE(int, tcp_v6mssdflt); /* XXX */
struct ip6_hdr;
void tcp6_ctlinput(int, struct sockaddr *, void *);
void tcp6_ctlinput_viaudp(int, struct sockaddr *, void *, void *);
void tcp6_init(void);
int tcp6_input(struct mbuf **, int *, int);
int tcp6_input_with_port(struct mbuf **, int *, int, uint16_t);
extern struct pr_usrreqs tcp6_usrreqs;

View File

@ -198,6 +198,7 @@ struct pkthdr {
} PH_loc;
};
#define ether_vtag PH_per.sixteen[0]
#define tcp_tun_port PH_per.sixteen[0] /* outbound */
#define PH_vt PH_per
#define vt_nrecs sixteen[0] /* mld and v6-ND */
#define tso_segsz PH_per.sixteen[1] /* inbound after LRO */

View File

@ -664,6 +664,10 @@ tcp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
"{N:(for} {:received-ack-bytes/%ju} {N:/byte%s})\n");
p(tcps_rcvdupack, "\t\t{:received-duplicate-acks/%ju} "
"{N:/duplicate ack%s}\n");
p(tcps_tunneled_pkts, "\t\t{:received-udp-tunneled-pkts/%ju} "
"{N:/UDP tunneled pkt%s}\n");
p(tcps_tunneled_errs, "\t\t{:received-bad-udp-tunneled-pkts/%ju} "
"{N:/UDP tunneled pkt cnt with error%s}\n");
p(tcps_rcvacktoomuch, "\t\t{:received-acks-for-unsent-data/%ju} "
"{N:/ack%s for unsent data}\n");
p2(tcps_rcvpack, tcps_rcvbyte, "\t\t"

View File

@ -27,7 +27,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd December 30, 2020
.Dd March 28, 2021
.Dt SOCKSTAT 1
.Os
.Sh NAME
@ -98,7 +98,7 @@ Display the protocol state, if applicable.
This is currently only implemented for SCTP and TCP.
.It Fl U
Display the remote UDP encapsulation port number, if applicable.
This is currently only implemented for SCTP.
This is currently only implemented for SCTP and TCP.
.It Fl u
Show
.Dv AF_LOCAL
@ -163,7 +163,7 @@ The address the foreign end of the socket is bound to (see
.It Li ENCAPS
The remote UDP encapsulation port number if
.Fl U
is specified (only for SCTP).
is specified (only for SCTP or TCP).
.It Li PATH STATE
The path state if
.Fl s

View File

@ -710,6 +710,8 @@ gather_inet(int proto)
sockaddr(&faddr->address, sock->family,
&xip->in6p_faddr, xip->inp_fport);
}
if (proto == IPPROTO_TCP)
faddr->encaps_port = xtp->xt_encaps_port;
laddr->next = NULL;
faddr->next = NULL;
sock->laddr = laddr;
@ -1087,10 +1089,13 @@ displaysock(struct sock *s, int pos)
}
if (opt_U) {
if (faddr != NULL &&
s->proto == IPPROTO_SCTP &&
s->state != SCTP_CLOSED &&
s->state != SCTP_BOUND &&
s->state != SCTP_LISTEN) {
((s->proto == IPPROTO_SCTP &&
s->state != SCTP_CLOSED &&
s->state != SCTP_BOUND &&
s->state != SCTP_LISTEN) ||
(s->proto == IPPROTO_TCP &&
s->state != TCPS_CLOSED &&
s->state != TCPS_LISTEN))) {
while (pos < offset)
pos += xprintf(" ");
pos += xprintf("%u",