1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-10-18 02:19:39 +00:00

pf: Add support for endpoint independent NAT bindings for UDP

With Endpoint Independent NAT bindings for UDP flows from a NATed source
address are always mapped to the same ip:port pair on the NAT router.
This allows a client to connect to multiple external servers while
appearing as the same host and enables NAT traversal without requiring
the client to use a middlebox traversal protocol such as STUN or TURN.

Introduce the 'endpoint-independent' option to NAT rules to allow
configuration of endpoint independent without effecting existing
deployments.

This change satisfies REQ 1 and 3 of RFC 4787 also known as 'full cone'
NAT.

Using Endpoint Independent NAT changes NAT exhaustion behaviour it does
not introduce any additional security considerations compared to other
forms of NAT.

PR:             219803
Co-authored-by: Damjan Jovanovic <damjan.jov@gmail.com>
Co-authored-by: Naman Sood <mail@nsood.in>
Reviewed-by:	kp
Sponsored-by:   Tailscale
Sponsored-by:   The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D11137
This commit is contained in:
Tom Jones 2024-09-06 12:59:09 +01:00
parent 674cbf38f6
commit 390dc369ef
11 changed files with 489 additions and 28 deletions

View File

@ -326,6 +326,7 @@ static struct pool_opts {
int marker;
#define POM_TYPE 0x01
#define POM_STICKYADDRESS 0x02
#define POM_ENDPI 0x04
u_int8_t opts;
int type;
int staticport;
@ -512,7 +513,7 @@ int parseport(char *, struct range *r, int);
%token UPPERLIMIT QUEUE PRIORITY QLIMIT HOGS BUCKETS RTABLE TARGET INTERVAL
%token DNPIPE DNQUEUE RIDENTIFIER
%token LOAD RULESET_OPTIMIZATION PRIO
%token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE
%token STICKYADDRESS ENDPI MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE
%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY PFLOW
%token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE SETTOS
%token DIVERTTO DIVERTREPLY BRIDGE_TO
@ -4593,6 +4594,14 @@ pool_opt : BITMASK {
pool_opts.marker |= POM_STICKYADDRESS;
pool_opts.opts |= PF_POOL_STICKYADDR;
}
| ENDPI {
if (pool_opts.marker & POM_ENDPI) {
yyerror("endpoint-independent cannot be redefined");
YYERROR;
}
pool_opts.marker |= POM_ENDPI;
pool_opts.opts |= PF_POOL_ENDPI;
}
| MAPEPORTSET number '/' number '/' number {
if (pool_opts.mape.offset) {
yyerror("map-e-portset cannot be redefined");
@ -6299,6 +6308,7 @@ lookup(char *s)
{ "dnqueue", DNQUEUE},
{ "drop", DROP},
{ "dup-to", DUPTO},
{ "endpoint-independent", ENDPI},
{ "ether", ETHER},
{ "fail-policy", FAILPOLICY},
{ "fairq", FAIRQ},

View File

@ -488,6 +488,8 @@ print_pool(struct pfctl_pool *pool, u_int16_t p1, u_int16_t p2,
}
if (pool->opts & PF_POOL_STICKYADDR)
printf(" sticky-address");
if (pool->opts & PF_POOL_ENDPI)
printf(" endpoint-independent");
if (id == PF_NAT && p1 == 0 && p2 == 0)
printf(" static-port");
if (pool->mape.offset > 0)

View File

@ -0,0 +1 @@
nat on vtnet1 inet from ! (vtnet1) to any -> (vtnet1) endpoint-independent

View File

@ -0,0 +1 @@
nat on vtnet1 inet from ! (vtnet1) to any -> (vtnet1) round-robin endpoint-independent

View File

@ -26,7 +26,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.Dd September 2, 2024
.Dd September 6, 2024
.Dt PF 4
.Os
.Sh NAME
@ -89,6 +89,10 @@ Should be power of 2.
Default value is 32768.
.It Va net.pf.rule_tag_hashsize
Size of the hash table that stores tags.
.It Va net.pf.udpendpoint_hashsize
Size of hash table that store UDP endpoint mappings.
Should be power of 2.
Default value is 32768.
.It Va net.pf.default_to_drop
This value overrides
.Cd "options PF_DEFAULT_TO_DROP"

View File

@ -27,7 +27,7 @@
.\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
.Dd June 24, 2024
.Dd September 4, 2024
.Dt PF.CONF 5
.Os
.Sh NAME
@ -2278,6 +2278,16 @@ from modifying the source port on TCP and UDP packets.
With
.Ar nat
rules, the
.It Ar endpoint-independent
With
.Ar nat
rules, the
.Ar endpoint-independent
option caues
.Xr pf 4
to always map connections from a UDP source address and port to the same
NAT address and port.
This feature implements "full-cone" NAT behavior.
.Ar map-e-portset
option enables the source port translation of MAP-E (RFC 7597) Customer Edge.
In order to make the host act as a MAP-E Customer Edge, setting up a tunneling

View File

@ -940,6 +940,29 @@ struct pf_state_peer {
u_int8_t pad[1];
};
/* Keep synced with struct pf_udp_endpoint. */
struct pf_udp_endpoint_cmp {
struct pf_addr addr;
uint16_t port;
sa_family_t af;
uint8_t pad[1];
};
struct pf_udp_endpoint {
struct pf_addr addr;
uint16_t port;
sa_family_t af;
uint8_t pad[1];
struct pf_udp_mapping *mapping;
LIST_ENTRY(pf_udp_endpoint) entry;
};
struct pf_udp_mapping {
struct pf_udp_endpoint endpoints[2];
u_int refs;
};
/* Keep synced with struct pf_state_key. */
struct pf_state_key_cmp {
struct pf_addr addr[2];
@ -1069,6 +1092,7 @@ struct pf_kstate {
union pf_krule_ptr nat_rule;
struct pf_addr rt_addr;
struct pf_state_key *key[2]; /* addresses stack and wire */
struct pf_udp_mapping *udp_mapping;
struct pfi_kkif *kif;
struct pfi_kkif *orig_kif; /* The real kif, even if we're a floating state (i.e. if == V_pfi_all). */
struct pfi_kkif *rt_kif;
@ -2124,17 +2148,28 @@ struct pf_idhash {
struct mtx lock;
};
struct pf_udpendpointhash {
LIST_HEAD(, pf_udp_endpoint) endpoints;
/* refcont is synchronized on the source endpoint's row lock */
struct mtx lock;
};
extern u_long pf_ioctl_maxcount;
VNET_DECLARE(u_long, pf_hashmask);
#define V_pf_hashmask VNET(pf_hashmask)
VNET_DECLARE(u_long, pf_srchashmask);
#define V_pf_srchashmask VNET(pf_srchashmask)
VNET_DECLARE(u_long, pf_udpendpointhashmask);
#define V_pf_udpendpointhashmask VNET(pf_udpendpointhashmask)
#define PF_HASHSIZ (131072)
#define PF_SRCHASHSIZ (PF_HASHSIZ/4)
#define PF_UDPENDHASHSIZ (PF_HASHSIZ/4)
VNET_DECLARE(struct pf_keyhash *, pf_keyhash);
VNET_DECLARE(struct pf_idhash *, pf_idhash);
VNET_DECLARE(struct pf_udpendpointhash *, pf_udpendpointhash);
#define V_pf_keyhash VNET(pf_keyhash)
#define V_pf_idhash VNET(pf_idhash)
#define V_pf_udpendpointhash VNET(pf_udpendpointhash)
VNET_DECLARE(struct pf_srchash *, pf_srchash);
#define V_pf_srchash VNET(pf_srchash)
@ -2209,6 +2244,8 @@ VNET_DECLARE(uma_zone_t, pf_state_z);
#define V_pf_state_z VNET(pf_state_z)
VNET_DECLARE(uma_zone_t, pf_state_key_z);
#define V_pf_state_key_z VNET(pf_state_key_z)
VNET_DECLARE(uma_zone_t, pf_udp_mapping_z);
#define V_pf_udp_mapping_z VNET(pf_udp_mapping_z)
VNET_DECLARE(uma_zone_t, pf_state_scrub_z);
#define V_pf_state_scrub_z VNET(pf_state_scrub_z)
@ -2281,6 +2318,15 @@ extern struct pf_kstate *pf_find_state_all(
extern bool pf_find_state_all_exists(
const struct pf_state_key_cmp *,
u_int);
extern struct pf_udp_mapping *pf_udp_mapping_find(struct pf_udp_endpoint_cmp
*endpoint);
extern struct pf_udp_mapping *pf_udp_mapping_create(sa_family_t af,
struct pf_addr *src_addr, uint16_t src_port,
struct pf_addr *nat_addr, uint16_t nat_port);
extern int pf_udp_mapping_insert(struct pf_udp_mapping
*mapping);
extern void pf_udp_mapping_release(struct pf_udp_mapping
*mapping);
extern struct pf_ksrc_node *pf_find_src_node(struct pf_addr *,
struct pf_krule *, sa_family_t,
struct pf_srchash **, bool);
@ -2574,7 +2620,8 @@ u_short pf_get_translation(struct pf_pdesc *, struct mbuf *,
struct pf_state_key **, struct pf_state_key **,
struct pf_addr *, struct pf_addr *,
uint16_t, uint16_t, struct pf_kanchor_stackframe *,
struct pf_krule **);
struct pf_krule **,
struct pf_udp_mapping **udp_mapping);
struct pf_state_key *pf_state_key_setup(struct pf_pdesc *, struct mbuf *, int,
struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t);

View File

@ -283,6 +283,7 @@ VNET_DEFINE_STATIC(uma_zone_t, pf_sources_z);
uma_zone_t pf_mtag_z;
VNET_DEFINE(uma_zone_t, pf_state_z);
VNET_DEFINE(uma_zone_t, pf_state_key_z);
VNET_DEFINE(uma_zone_t, pf_udp_mapping_z);
VNET_DEFINE(struct unrhdr64, pf_stateid);
@ -330,7 +331,7 @@ static int pf_create_state(struct pf_krule *, struct pf_krule *,
struct pf_state_key *, struct mbuf *, int,
u_int16_t, u_int16_t, int *, struct pfi_kkif *,
struct pf_kstate **, int, u_int16_t, u_int16_t,
int, struct pf_krule_slist *);
int, struct pf_krule_slist *, struct pf_udp_mapping *);
static int pf_state_key_addr_setup(struct pf_pdesc *, struct mbuf *,
int, struct pf_state_key_cmp *, int, struct pf_addr *,
int, struct pf_addr *, int);
@ -493,22 +494,29 @@ MALLOC_DEFINE(M_PF_RULE_ITEM, "pf_krule_item", "pf(4) rule items");
VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
VNET_DEFINE(struct pf_idhash *, pf_idhash);
VNET_DEFINE(struct pf_srchash *, pf_srchash);
VNET_DEFINE(struct pf_udpendpointhash *, pf_udpendpointhash);
VNET_DEFINE(struct pf_udpendpointmapping *, pf_udpendpointmapping);
SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"pf(4)");
VNET_DEFINE(u_long, pf_hashmask);
VNET_DEFINE(u_long, pf_srchashmask);
VNET_DEFINE(u_long, pf_udpendpointhashmask);
VNET_DEFINE_STATIC(u_long, pf_hashsize);
#define V_pf_hashsize VNET(pf_hashsize)
VNET_DEFINE_STATIC(u_long, pf_srchashsize);
#define V_pf_srchashsize VNET(pf_srchashsize)
VNET_DEFINE_STATIC(u_long, pf_udpendpointhashsize);
#define V_pf_udpendpointhashsize VNET(pf_udpendpointhashsize)
u_long pf_ioctl_maxcount = 65535;
SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
&VNET_NAME(pf_hashsize), 0, "Size of pf(4) states hashtable");
SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
&VNET_NAME(pf_srchashsize), 0, "Size of pf(4) source nodes hashtable");
SYSCTL_ULONG(_net_pf, OID_AUTO, udpendpoint_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
&VNET_NAME(pf_udpendpointhashsize), 0, "Size of pf(4) endpoint hashtable");
SYSCTL_ULONG(_net_pf, OID_AUTO, request_maxcount, CTLFLAG_RWTUN,
&pf_ioctl_maxcount, 0, "Maximum number of tables, addresses, ... in a single ioctl() call");
@ -699,6 +707,17 @@ pf_hashsrc(struct pf_addr *addr, sa_family_t af)
return (h & V_pf_srchashmask);
}
static inline uint32_t
pf_hashudpendpoint(struct pf_udp_endpoint *endpoint)
{
uint32_t h;
h = murmur3_32_hash32((uint32_t *)endpoint,
sizeof(struct pf_udp_endpoint_cmp)/sizeof(uint32_t),
V_pf_hashseed);
return (h & V_pf_udpendpointhashmask);
}
#ifdef ALTQ
static int
pf_state_hash(struct pf_kstate *s)
@ -1086,12 +1105,15 @@ pf_initialize(void)
struct pf_keyhash *kh;
struct pf_idhash *ih;
struct pf_srchash *sh;
struct pf_udpendpointhash *uh;
u_int i;
if (V_pf_hashsize == 0 || !powerof2(V_pf_hashsize))
V_pf_hashsize = PF_HASHSIZ;
if (V_pf_srchashsize == 0 || !powerof2(V_pf_srchashsize))
V_pf_srchashsize = PF_SRCHASHSIZ;
if (V_pf_udpendpointhashsize == 0 || !powerof2(V_pf_udpendpointhashsize))
V_pf_udpendpointhashsize = PF_UDPENDHASHSIZ;
V_pf_hashseed = arc4random();
@ -1154,6 +1176,30 @@ pf_initialize(void)
for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++)
mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
/* UDP endpoint mappings. */
V_pf_udp_mapping_z = uma_zcreate("pf UDP mappings",
sizeof(struct pf_udp_mapping), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
V_pf_udpendpointhash = mallocarray(V_pf_udpendpointhashsize,
sizeof(struct pf_udpendpointhash), M_PFHASH, M_NOWAIT | M_ZERO);
if (V_pf_udpendpointhash == NULL) {
printf("pf: Unable to allocate memory for "
"udpendpoint_hashsize %lu.\n", V_pf_udpendpointhashsize);
V_pf_udpendpointhashsize = PF_UDPENDHASHSIZ;
V_pf_udpendpointhash = mallocarray(V_pf_udpendpointhashsize,
sizeof(struct pf_udpendpointhash), M_PFHASH, M_WAITOK | M_ZERO);
}
V_pf_udpendpointhashmask = V_pf_udpendpointhashsize - 1;
for (i = 0, uh = V_pf_udpendpointhash;
i <= V_pf_udpendpointhashmask;
i++, uh++) {
mtx_init(&uh->lock, "pf_udpendpointhash", NULL,
MTX_DEF | MTX_DUPOK);
}
/* ALTQ */
TAILQ_INIT(&V_pf_altqs[0]);
TAILQ_INIT(&V_pf_altqs[1]);
@ -1187,10 +1233,12 @@ pf_cleanup(void)
struct pf_keyhash *kh;
struct pf_idhash *ih;
struct pf_srchash *sh;
struct pf_udpendpointhash *uh;
struct pf_send_entry *pfse, *next;
u_int i;
for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask;
for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash;
i <= V_pf_hashmask;
i++, kh++, ih++) {
KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
__func__));
@ -1209,6 +1257,15 @@ pf_cleanup(void)
}
free(V_pf_srchash, M_PFHASH);
for (i = 0, uh = V_pf_udpendpointhash;
i <= V_pf_udpendpointhashmask;
i++, uh++) {
KASSERT(LIST_EMPTY(&uh->endpoints),
("%s: udp endpoint hash not empty", __func__));
mtx_destroy(&uh->lock);
}
free(V_pf_udpendpointhash, M_PFHASH);
STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
m_freem(pfse->pfse_m);
free(pfse, M_PFTEMP);
@ -1218,6 +1275,7 @@ pf_cleanup(void)
uma_zdestroy(V_pf_sources_z);
uma_zdestroy(V_pf_state_z);
uma_zdestroy(V_pf_state_key_z);
uma_zdestroy(V_pf_udp_mapping_z);
}
static int
@ -1807,6 +1865,123 @@ pf_find_state_all_exists(const struct pf_state_key_cmp *key, u_int dir)
return (false);
}
struct pf_udp_mapping *
pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_port,
struct pf_addr *nat_addr, uint16_t nat_port)
{
struct pf_udp_mapping *mapping;
mapping = uma_zalloc(V_pf_udp_mapping_z, M_NOWAIT | M_ZERO);
if (mapping == NULL)
return (NULL);
PF_ACPY(&mapping->endpoints[0].addr, src_addr, af);
mapping->endpoints[0].port = src_port;
mapping->endpoints[0].af = af;
mapping->endpoints[0].mapping = mapping;
PF_ACPY(&mapping->endpoints[1].addr, nat_addr, af);
mapping->endpoints[1].port = nat_port;
mapping->endpoints[1].af = af;
mapping->endpoints[1].mapping = mapping;
refcount_init(&mapping->refs, 1);
return (mapping);
}
int
pf_udp_mapping_insert(struct pf_udp_mapping *mapping)
{
struct pf_udpendpointhash *h0, *h1;
struct pf_udp_endpoint *endpoint;
int ret = EEXIST;
h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
if (h0 == h1) {
PF_HASHROW_LOCK(h0);
} else if (h0 < h1) {
PF_HASHROW_LOCK(h0);
PF_HASHROW_LOCK(h1);
} else {
PF_HASHROW_LOCK(h1);
PF_HASHROW_LOCK(h0);
}
LIST_FOREACH(endpoint, &h0->endpoints, entry) {
if (bcmp(endpoint, &mapping->endpoints[0],
sizeof(struct pf_udp_endpoint_cmp)) == 0)
break;
}
if (endpoint != NULL)
goto cleanup;
LIST_FOREACH(endpoint, &h1->endpoints, entry) {
if (bcmp(endpoint, &mapping->endpoints[1],
sizeof(struct pf_udp_endpoint_cmp)) == 0)
break;
}
if (endpoint != NULL)
goto cleanup;
LIST_INSERT_HEAD(&h0->endpoints, &mapping->endpoints[0], entry);
LIST_INSERT_HEAD(&h1->endpoints, &mapping->endpoints[1], entry);
ret = 0;
cleanup:
if (h0 != h1) {
PF_HASHROW_UNLOCK(h0);
PF_HASHROW_UNLOCK(h1);
} else {
PF_HASHROW_UNLOCK(h0);
}
return (ret);
}
void
pf_udp_mapping_release(struct pf_udp_mapping *mapping)
{
/* refcount is synchronized on the source endpoint's row lock */
struct pf_udpendpointhash *h0, *h1;
if (mapping == NULL)
return;
h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
PF_HASHROW_LOCK(h0);
if (refcount_release(&mapping->refs)) {
LIST_REMOVE(&mapping->endpoints[0], entry);
PF_HASHROW_UNLOCK(h0);
h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
PF_HASHROW_LOCK(h1);
LIST_REMOVE(&mapping->endpoints[1], entry);
PF_HASHROW_UNLOCK(h1);
uma_zfree(V_pf_udp_mapping_z, mapping);
} else {
PF_HASHROW_UNLOCK(h0);
}
}
struct pf_udp_mapping *
pf_udp_mapping_find(struct pf_udp_endpoint_cmp *key)
{
struct pf_udpendpointhash *uh;
struct pf_udp_endpoint *endpoint;
uh = &V_pf_udpendpointhash[pf_hashudpendpoint((struct pf_udp_endpoint*)key)];
PF_HASHROW_LOCK(uh);
LIST_FOREACH(endpoint, &uh->endpoints, entry) {
if (bcmp(endpoint, key, sizeof(struct pf_udp_endpoint_cmp)) == 0 &&
bcmp(endpoint, &endpoint->mapping->endpoints[0],
sizeof(struct pf_udp_endpoint_cmp)) == 0)
break;
}
if (endpoint == NULL) {
PF_HASHROW_UNLOCK(uh);
return (NULL);
}
refcount_acquire(&endpoint->mapping->refs);
PF_HASHROW_UNLOCK(uh);
return (endpoint->mapping);
}
/* END state table stuff */
static void
@ -2423,6 +2598,9 @@ pf_unlink_state(struct pf_kstate *s)
PF_HASHROW_UNLOCK(ih);
pf_detach_state(s);
pf_udp_mapping_release(s->udp_mapping);
/* pf_state_insert() initialises refs to 2 */
return (pf_release_staten(s, 2));
}
@ -4686,6 +4864,7 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, struct pfi_kkif *kif,
u_int16_t bproto_sum = 0, bip_sum = 0;
u_int8_t icmptype = 0, icmpcode = 0;
struct pf_kanchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE];
struct pf_udp_mapping *udp_mapping = NULL;
PF_RULES_RASSERT();
@ -4760,7 +4939,7 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, struct pfi_kkif *kif,
/* check packet for BINAT/NAT/RDR */
transerror = pf_get_translation(pd, m, off, kif, &nsn, &sk,
&nk, saddr, daddr, sport, dport, anchor_stack, &nr);
&nk, saddr, daddr, sport, dport, anchor_stack, &nr, &udp_mapping);
switch (transerror) {
default:
/* A translation error occurred. */
@ -5058,8 +5237,9 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, struct pfi_kkif *kif,
int action;
action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off,
sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum,
hdrlen, &match_rules);
hdrlen, &match_rules, udp_mapping);
if (action != PF_PASS) {
pf_udp_mapping_release(udp_mapping);
if (action == PF_DROP &&
(r->rule_flag & PFRULE_RETURN))
pf_return(r, nr, pd, sk, off, m, th, kif,
@ -5075,6 +5255,7 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, struct pfi_kkif *kif,
uma_zfree(V_pf_state_key_z, sk);
uma_zfree(V_pf_state_key_z, nk);
pf_udp_mapping_release(udp_mapping);
}
/* copy back packet headers if we performed NAT operations */
@ -5102,6 +5283,8 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, struct pfi_kkif *kif,
uma_zfree(V_pf_state_key_z, sk);
uma_zfree(V_pf_state_key_z, nk);
pf_udp_mapping_release(udp_mapping);
return (PF_DROP);
}
@ -5111,7 +5294,7 @@ pf_create_state(struct pf_krule *r, struct pf_krule *nr, struct pf_krule *a,
struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport,
u_int16_t dport, int *rewrite, struct pfi_kkif *kif, struct pf_kstate **sm,
int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen,
struct pf_krule_slist *match_rules)
struct pf_krule_slist *match_rules, struct pf_udp_mapping *udp_mapping)
{
struct pf_kstate *s = NULL;
struct pf_ksrc_node *sn = NULL;
@ -5328,6 +5511,8 @@ pf_create_state(struct pf_krule *r, struct pf_krule *nr, struct pf_krule *a,
return (PF_SYNPROXY_DROP);
}
s->udp_mapping = udp_mapping;
return (PF_PASS);
csfailed:

View File

@ -129,6 +129,7 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
PF_ADDR_RANGE };
#define PF_POOL_TYPEMASK 0x0f
#define PF_POOL_STICKYADDR 0x20
#define PF_POOL_ENDPI 0x40
#define PF_WSCALE_FLAG 0x80
#define PF_WSCALE_MASK 0x0f

View File

@ -62,7 +62,8 @@ static struct pf_krule *pf_match_translation(struct pf_pdesc *, struct mbuf *,
uint16_t, int, struct pf_kanchor_stackframe *);
static int pf_get_sport(sa_family_t, uint8_t, struct pf_krule *,
struct pf_addr *, uint16_t, struct pf_addr *, uint16_t, struct pf_addr *,
uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node **);
uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node **,
struct pf_udp_mapping **);
#define mix(a,b,c) \
do { \
@ -216,14 +217,47 @@ static int
pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low,
uint16_t high, struct pf_ksrc_node **sn)
uint16_t high, struct pf_ksrc_node **sn,
struct pf_udp_mapping **udp_mapping)
{
struct pf_state_key_cmp key;
struct pf_addr init_addr;
struct pf_srchash *sh = NULL;
bzero(&init_addr, sizeof(init_addr));
MPASS(*udp_mapping == NULL);
/*
* If we are UDP and have an existing mapping we can get source port
* from the mapping. In this case we have to look up the src_node as
* pf_map_addr would.
*/
if (proto == IPPROTO_UDP && (r->rpool.opts & PF_POOL_ENDPI)) {
struct pf_udp_endpoint_cmp udp_source;
bzero(&udp_source, sizeof(udp_source));
udp_source.af = af;
PF_ACPY(&udp_source.addr, saddr, af);
udp_source.port = sport;
*udp_mapping = pf_udp_mapping_find(&udp_source);
if (*udp_mapping) {
PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, af);
*nport = (*udp_mapping)->endpoints[1].port;
/* Try to find a src_node as per pf_map_addr(). */
if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
(r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
*sn = pf_find_src_node(saddr, r, af, &sh, 0);
return (0);
} else {
*udp_mapping = pf_udp_mapping_create(af, saddr, sport, &init_addr, 0);
if (*udp_mapping == NULL)
return (1);
}
}
if (pf_map_addr(af, r, saddr, naddr, NULL, &init_addr, sn))
return (1);
goto failed;
if (proto == IPPROTO_ICMP) {
if (*nport == htons(ICMP_ECHO)) {
@ -250,6 +284,8 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
do {
PF_ACPY(&key.addr[1], naddr, key.af);
if (*udp_mapping)
PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, af);
/*
* port search; start random, step;
@ -277,8 +313,16 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
} else if (low == high) {
key.port[1] = htons(low);
if (!pf_find_state_all_exists(&key, PF_IN)) {
*nport = htons(low);
return (0);
if (*udp_mapping != NULL) {
(*udp_mapping)->endpoints[1].port = htons(low);
if (pf_udp_mapping_insert(*udp_mapping) == 0) {
*nport = htons(low);
return (0);
}
} else {
*nport = htons(low);
return (0);
}
}
} else {
uint32_t tmp;
@ -293,18 +337,35 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
cut = arc4random() % (1 + high - low) + low;
/* low <= cut <= high */
for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
key.port[1] = htons(tmp);
if (!pf_find_state_all_exists(&key, PF_IN)) {
*nport = htons(tmp);
return (0);
if (*udp_mapping != NULL) {
(*udp_mapping)->endpoints[1].port = htons(tmp);
if (pf_udp_mapping_insert(*udp_mapping) == 0) {
*nport = htons(tmp);
return (0);
}
} else {
key.port[1] = htons(tmp);
if (!pf_find_state_all_exists(&key, PF_IN)) {
*nport = htons(tmp);
return (0);
}
}
}
tmp = cut;
for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
key.port[1] = htons(tmp);
if (!pf_find_state_all_exists(&key, PF_IN)) {
*nport = htons(tmp);
return (0);
if (proto == IPPROTO_UDP &&
(r->rpool.opts & PF_POOL_ENDPI)) {
(*udp_mapping)->endpoints[1].port = htons(tmp);
if (pf_udp_mapping_insert(*udp_mapping) == 0) {
*nport = htons(tmp);
return (0);
}
} else {
key.port[1] = htons(tmp);
if (!pf_find_state_all_exists(&key, PF_IN)) {
*nport = htons(tmp);
return (0);
}
}
}
}
@ -326,6 +387,10 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
return (1);
}
} while (! PF_AEQ(&init_addr, naddr, af) );
failed:
uma_zfree(V_pf_udp_mapping_z, *udp_mapping);
*udp_mapping = NULL;
return (1); /* none available */
}
@ -333,7 +398,7 @@ static int
pf_get_mape_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
uint16_t dport, struct pf_addr *naddr, uint16_t *nport,
struct pf_ksrc_node **sn)
struct pf_ksrc_node **sn, struct pf_udp_mapping **udp_mapping)
{
uint16_t psmask, low, highmask;
uint16_t i, ahigh, cut;
@ -353,13 +418,13 @@ pf_get_mape_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
for (i = cut; i <= ahigh; i++) {
low = (i << ashift) | psmask;
if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
naddr, nport, low, low | highmask, sn))
naddr, nport, low, low | highmask, sn, udp_mapping))
return (0);
}
for (i = cut - 1; i > 0; i--) {
low = (i << ashift) | psmask;
if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
naddr, nport, low, low | highmask, sn))
naddr, nport, low, low | highmask, sn, udp_mapping))
return (0);
}
return (1);
@ -597,7 +662,8 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
struct pf_state_key **skp, struct pf_state_key **nkp,
struct pf_addr *saddr, struct pf_addr *daddr,
uint16_t sport, uint16_t dport, struct pf_kanchor_stackframe *anchor_stack,
struct pf_krule **rp)
struct pf_krule **rp,
struct pf_udp_mapping **udp_mapping)
{
struct pf_krule *r = NULL;
struct pf_addr *naddr;
@ -661,7 +727,7 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
}
if (r->rpool.mape.offset > 0) {
if (pf_get_mape_sport(pd->af, pd->proto, r, saddr,
sport, daddr, dport, naddr, nportp, sn)) {
sport, daddr, dport, naddr, nportp, sn, udp_mapping)) {
DPFPRINTF(PF_DEBUG_MISC,
("pf: MAP-E port allocation (%u/%u/%u)"
" failed\n",
@ -672,7 +738,7 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
goto notrans;
}
} else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport,
daddr, dport, naddr, nportp, low, high, sn)) {
daddr, dport, naddr, nportp, low, high, sn, udp_mapping)) {
DPFPRINTF(PF_DEBUG_MISC,
("pf: NAT proxy port allocation (%u-%u) failed\n",
r->rpool.proxy_port[0], r->rpool.proxy_port[1]));

View File

@ -112,6 +112,139 @@ nested_anchor_body()
}
atf_test_case "endpoint_independent" "cleanup"
endpoint_independent_head()
{
atf_set descr 'Test that a client behind NAT gets the same external IP:port for different servers'
atf_set require.user root
}
endpoint_independent_body()
{
pft_init
filter="udp and dst port 1234" # only capture udp pings
epair_client=$(vnet_mkepair)
epair_nat=$(vnet_mkepair)
epair_server1=$(vnet_mkepair)
epair_server2=$(vnet_mkepair)
bridge=$(vnet_mkbridge)
vnet_mkjail nat ${epair_client}b ${epair_nat}a
vnet_mkjail client ${epair_client}a
vnet_mkjail server1 ${epair_server1}a
vnet_mkjail server2 ${epair_server2}a
ifconfig ${epair_server1}b up
ifconfig ${epair_server2}b up
ifconfig ${epair_nat}b up
ifconfig ${bridge} \
addm ${epair_server1}b \
addm ${epair_server2}b \
addm ${epair_nat}b \
up
jexec nat ifconfig ${epair_client}b 192.0.2.1/24 up
jexec nat ifconfig ${epair_nat}a 198.51.100.42/24 up
jexec nat sysctl net.inet.ip.forwarding=1
jexec client ifconfig ${epair_client}a 192.0.2.2/24 up
jexec client route add default 192.0.2.1
jexec server1 ifconfig ${epair_server1}a 198.51.100.32/24 up
jexec server2 ifconfig ${epair_server2}a 198.51.100.22/24 up
# Enable pf!
jexec nat pfctl -e
# validate non-endpoint independent nat rule behaviour
pft_set_rules nat \
"nat on ${epair_nat}a inet from ! (${epair_nat}a) to any -> (${epair_nat}a)"
jexec server1 tcpdump -i ${epair_server1}a -w ${PWD}/server1.pcap \
--immediate-mode $filter &
server1tcppid="$!"
jexec server2 tcpdump -i ${epair_server2}a -w ${PWD}/server2.pcap \
--immediate-mode $filter &
server2tcppid="$!"
# send out multiple packets
for i in $(seq 1 10); do
echo "ping" | jexec client nc -u 198.51.100.32 1234 -p 4242 -w 0
echo "ping" | jexec client nc -u 198.51.100.22 1234 -p 4242 -w 0
done
kill $server1tcppid
kill $server2tcppid
tuple_server1=$(tcpdump -r ${PWD}/server1.pcap | awk '{addr=$3} END {print addr}')
tuple_server2=$(tcpdump -r ${PWD}/server2.pcap | awk '{addr=$3} END {print addr}')
if [ -z $tuple_server1 ]
then
atf_fail "server1 did not receive connection from client (default)"
fi
if [ -z $tuple_server2 ]
then
atf_fail "server2 did not receive connection from client (default)"
fi
if [ "$tuple_server1" = "$tuple_server2" ]
then
echo "server1 tcpdump: $tuple_server1"
echo "server2 tcpdump: $tuple_server2"
atf_fail "Received same IP:port on server1 and server2 (default)"
fi
# validate endpoint independent nat rule behaviour
pft_set_rules nat \
"nat on ${epair_nat}a inet from ! (${epair_nat}a) to any -> (${epair_nat}a) endpoint-independent"
jexec server1 tcpdump -i ${epair_server1}a -w ${PWD}/server1.pcap \
--immediate-mode $filter &
server1tcppid="$!"
jexec server2 tcpdump -i ${epair_server2}a -w ${PWD}/server2.pcap \
--immediate-mode $filter &
server2tcppid="$!"
# send out multiple packets, sometimes one fails to go through
for i in $(seq 1 10); do
echo "ping" | jexec client nc -u 198.51.100.32 1234 -p 4242 -w 0
echo "ping" | jexec client nc -u 198.51.100.22 1234 -p 4242 -w 0
done
kill $server1tcppid
kill $server2tcppid
tuple_server1=$(tcpdump -r ${PWD}/server1.pcap | awk '{addr=$3} END {print addr}')
tuple_server2=$(tcpdump -r ${PWD}/server2.pcap | awk '{addr=$3} END {print addr}')
if [ -z $tuple_server1 ]
then
atf_fail "server1 did not receive connection from client (endpoint-independent)"
fi
if [ -z $tuple_server2 ]
then
atf_fail "server2 did not receive connection from client (endpoint-independent)"
fi
if [ ! "$tuple_server1" = "$tuple_server2" ]
then
echo "server1 tcpdump: $tuple_server1"
echo "server2 tcpdump: $tuple_server2"
atf_fail "Received different IP:port on server1 than server2 (endpoint-independent)"
fi
}
endpoint_independent_cleanup()
{
pft_cleanup
rm -f server1.out
rm -f server2.out
}
nested_anchor_cleanup()
{
pft_cleanup
@ -121,4 +254,5 @@ atf_init_test_cases()
{
atf_add_test_case "exhaust"
atf_add_test_case "nested_anchor"
atf_add_test_case "endpoint_independent"
}