diff --git a/sys/netinet/in_gif.c b/sys/netinet/in_gif.c index 3d2ff660a99..fd6397e3b18 100644 --- a/sys/netinet/in_gif.c +++ b/sys/netinet/in_gif.c @@ -161,10 +161,8 @@ in_gif_output(ifp, family, m) /* version will be set in ip_output() */ iphdr.ip_ttl = ip_gif_ttl; iphdr.ip_len = m->m_pkthdr.len + sizeof(struct ip); - if (ifp->if_flags & IFF_LINK1) - ip_ecn_ingress(ECN_ALLOWED, &iphdr.ip_tos, &tos); - else - ip_ecn_ingress(ECN_NOCARE, &iphdr.ip_tos, &tos); + ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE, + &iphdr.ip_tos, &tos); /* prepend new IP header */ M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); @@ -250,10 +248,12 @@ in_gif_input(m, off) return; } ip = mtod(m, struct ip *); - if (gifp->if_flags & IFF_LINK1) - ip_ecn_egress(ECN_ALLOWED, &otos, &ip->ip_tos); - else - ip_ecn_egress(ECN_NOCARE, &otos, &ip->ip_tos); + if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ? + ECN_ALLOWED : ECN_NOCARE, + &otos, &ip->ip_tos) == 0) { + m_freem(m); + return; + } break; } #endif @@ -261,7 +261,8 @@ in_gif_input(m, off) case IPPROTO_IPV6: { struct ip6_hdr *ip6; - u_int8_t itos; + u_int8_t itos, oitos; + af = AF_INET6; if (m->m_len < sizeof(*ip6)) { m = m_pullup(m, sizeof(*ip6)); @@ -269,13 +270,17 @@ in_gif_input(m, off) return; } ip6 = mtod(m, struct ip6_hdr *); - itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; - if (gifp->if_flags & IFF_LINK1) - ip_ecn_egress(ECN_ALLOWED, &otos, &itos); - else - ip_ecn_egress(ECN_NOCARE, &otos, &itos); - ip6->ip6_flow &= ~htonl(0xff << 20); - ip6->ip6_flow |= htonl((u_int32_t)itos << 20); + itos = oitos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; + if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ? + ECN_ALLOWED : ECN_NOCARE, + &otos, &itos) == 0) { + m_freem(m); + return; + } + if (itos != oitos) { + ip6->ip6_flow &= ~htonl(0xff << 20); + ip6->ip6_flow |= htonl((u_int32_t)itos << 20); + } break; } #endif /* INET6 */ diff --git a/sys/netinet/ip.h b/sys/netinet/ip.h index 24f66ceb24d..025ad085d14 100644 --- a/sys/netinet/ip.h +++ b/sys/netinet/ip.h @@ -84,10 +84,11 @@ CTASSERT(sizeof (struct ip) == 20); #define IPTOS_THROUGHPUT 0x08 #define IPTOS_RELIABILITY 0x04 #define IPTOS_MINCOST 0x02 -/* ECN bits proposed by Sally Floyd */ -#define IPTOS_CE 0x01 /* congestion experienced */ -#define IPTOS_ECT 0x02 /* ECN-capable transport */ - +#if 1 +/* ECN RFC3168 obsoletes RFC2481, and these will be deprecated soon. */ +#define IPTOS_CE 0x01 +#define IPTOS_ECT 0x02 +#endif /* * Definitions for IP precedence (also in ip_tos) (hopefully unused) @@ -101,6 +102,16 @@ CTASSERT(sizeof (struct ip) == 20); #define IPTOS_PREC_PRIORITY 0x20 #define IPTOS_PREC_ROUTINE 0x00 +/* + * ECN (Explicit Congestion Notification) codepoints in RFC3168 + * mapped to the lower 2 bits of the TOS field. + */ +#define IPTOS_ECN_NOTECT 0x00 /* not-ECT */ +#define IPTOS_ECN_ECT1 0x01 /* ECN-capable transport (1) */ +#define IPTOS_ECN_ECT0 0x02 /* ECN-capable transport (0) */ +#define IPTOS_ECN_CE 0x03 /* congestion experienced */ +#define IPTOS_ECN_MASK 0x03 /* ECN field mask */ + /* * Definitions for options. */ diff --git a/sys/netinet/ip_ecn.c b/sys/netinet/ip_ecn.c index de3d38e9e25..9ea2f6bce58 100644 --- a/sys/netinet/ip_ecn.c +++ b/sys/netinet/ip_ecn.c @@ -1,5 +1,5 @@ /* $FreeBSD$ */ -/* $KAME: ip_ecn.c,v 1.11 2001/05/03 16:09:29 itojun Exp $ */ +/* $KAME: ip_ecn.c,v 1.12 2002/01/07 11:34:47 kjc Exp $ */ /* * Copyright (C) 1999 WIDE Project. @@ -55,6 +55,37 @@ #include #endif +/* + * ECN and TOS (or TCLASS) processing rules at tunnel encapsulation and + * decapsulation from RFC3168: + * + * Outer Hdr at Inner Hdr at + * Encapsulator Decapsulator + * Header fields: -------------------- ------------ + * DS Field copied from inner hdr no change + * ECN Field constructed by (I) constructed by (E) + * + * ECN_ALLOWED (full functionality): + * (I) if the ECN field in the inner header is set to CE, then set the + * ECN field in the outer header to ECT(0). + * otherwise, copy the ECN field to the outer header. + * + * (E) if the ECN field in the outer header is set to CE and the ECN + * field of the inner header is not-ECT, drop the packet. + * if the ECN field in the inner header is set to ECT(0) or ECT(1) + * and the ECN field in the outer header is set to CE, then copy CE to + * the inner header. otherwise, make no change to the inner header. + * + * ECN_FORBIDDEN (limited functionality): + * (I) set the ECN field to not-ECT in the outer header. + * + * (E) if the ECN field in the outer header is set to CE, drop the packet. + * otherwise, make no change to the ECN field in the inner header. + * + * the drop rule is for backward compatibility and protection against + * erasure of CE. + */ + /* * modify outer ECN (TOS) field on ingress operation (tunnel encapsulation). */ @@ -70,10 +101,18 @@ ip_ecn_ingress(mode, outer, inner) *outer = *inner; switch (mode) { case ECN_ALLOWED: /* ECN allowed */ - *outer &= ~IPTOS_CE; + /* + * full-functionality: if the inner is CE, set ECT(0) + * to the outer. otherwise, copy the ECN field. + */ + if ((*inner & IPTOS_ECN_MASK) == IPTOS_ECN_CE) + *outer &= ~IPTOS_ECN_ECT1; break; case ECN_FORBIDDEN: /* ECN forbidden */ - *outer &= ~(IPTOS_ECT | IPTOS_CE); + /* + * limited-functionality: set not-ECT to the outer + */ + *outer &= ~IPTOS_ECN_MASK; break; case ECN_NOCARE: /* no consideration to ECN */ break; @@ -82,8 +121,9 @@ ip_ecn_ingress(mode, outer, inner) /* * modify inner ECN (TOS) field on egress operation (tunnel decapsulation). + * the caller should drop the packet if the return value is 0. */ -void +int ip_ecn_egress(mode, outer, inner) int mode; const u_int8_t *outer; @@ -94,13 +134,28 @@ ip_ecn_egress(mode, outer, inner) switch (mode) { case ECN_ALLOWED: - if (*outer & IPTOS_CE) - *inner |= IPTOS_CE; + /* + * full-functionality: if the outer is CE and the inner is + * not-ECT, should drop it. otherwise, copy CE. + */ + if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_CE) { + if ((*inner & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) + return (0); + *inner |= IPTOS_ECN_CE; + } break; case ECN_FORBIDDEN: /* ECN forbidden */ + /* + * limited-functionality: if the outer is CE, should drop it. + * otherwise, leave the inner. + */ + if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_CE) + return (0); + break; case ECN_NOCARE: /* no consideration to ECN */ break; } + return (1); } #ifdef INET6 @@ -115,28 +170,31 @@ ip6_ecn_ingress(mode, outer, inner) if (!outer || !inner) panic("NULL pointer passed to ip6_ecn_ingress"); - outer8 = (ntohl(*outer) >> 20) & 0xff; inner8 = (ntohl(*inner) >> 20) & 0xff; ip_ecn_ingress(mode, &outer8, &inner8); *outer &= ~htonl(0xff << 20); *outer |= htonl((u_int32_t)outer8 << 20); } -void +int ip6_ecn_egress(mode, outer, inner) int mode; const u_int32_t *outer; u_int32_t *inner; { - u_int8_t outer8, inner8; + u_int8_t outer8, inner8, oinner8; if (!outer || !inner) panic("NULL pointer passed to ip6_ecn_egress"); outer8 = (ntohl(*outer) >> 20) & 0xff; - inner8 = (ntohl(*inner) >> 20) & 0xff; - ip_ecn_egress(mode, &outer8, &inner8); - *inner &= ~htonl(0xff << 20); - *inner |= htonl((u_int32_t)inner8 << 20); + inner8 = oinner8 = (ntohl(*inner) >> 20) & 0xff; + if (ip_ecn_egress(mode, &outer8, &inner8) == 0) + return (0); + if (inner8 != oinner8) { + *inner &= ~htonl(0xff << 20); + *inner |= htonl((u_int32_t)inner8 << 20); + } + return (1); } #endif diff --git a/sys/netinet/ip_ecn.h b/sys/netinet/ip_ecn.h index 1a38a48aa7c..01163edccf9 100644 --- a/sys/netinet/ip_ecn.h +++ b/sys/netinet/ip_ecn.h @@ -1,5 +1,5 @@ /* $FreeBSD$ */ -/* $KAME: ip_ecn.h,v 1.6 2001/05/03 14:51:48 itojun Exp $ */ +/* $KAME: ip_ecn.h,v 1.8 2002/01/07 11:34:47 kjc Exp $ */ /* * Copyright (C) 1999 WIDE Project. @@ -35,6 +35,9 @@ * http://www.aciri.org/floyd/papers/draft-ipsec-ecn-00.txt */ +#ifndef _NETINET_IP_ECN_H_ +#define _NETINET_IP_ECN_H_ + #if defined(_KERNEL) && !defined(_LKM) #include "opt_inet.h" #endif @@ -45,5 +48,6 @@ #ifdef _KERNEL extern void ip_ecn_ingress(int, u_int8_t *, const u_int8_t *); -extern void ip_ecn_egress(int, const u_int8_t *, u_int8_t *); +extern int ip_ecn_egress(int, const u_int8_t *, u_int8_t *); +#endif #endif diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 02bd1323276..30a5b750184 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -1037,6 +1037,7 @@ ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp, struct mbuf *t; int hlen = ip->ip_hl << 2; int i, next; + u_int8_t ecn, ecn0; IPQ_LOCK_ASSERT(); @@ -1085,6 +1086,22 @@ ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp, #define GETIP(m) ((struct ip*)((m)->m_pkthdr.header)) + /* + * Handle ECN by comparing this segment with the first one; + * if CE is set, do not lose CE. + * drop if CE and not-ECT are mixed for the same packet. + */ + ecn = ip->ip_tos & IPTOS_ECN_MASK; + ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK; + if (ecn == IPTOS_ECN_CE) { + if (ecn0 == IPTOS_ECN_NOTECT) + goto dropfrag; + if (ecn0 != IPTOS_ECN_CE) + GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE; + } + if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) + goto dropfrag; + /* * Find a segment which begins after this one does. */ diff --git a/sys/netinet6/ah_input.c b/sys/netinet6/ah_input.c index 6fb807195e1..78076b64e4f 100644 --- a/sys/netinet6/ah_input.c +++ b/sys/netinet6/ah_input.c @@ -405,7 +405,10 @@ ah4_input(m, off) } ip = mtod(m, struct ip *); /* ECN consideration. */ - ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos); + if (!ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos)) { + ipsecstat.in_inval++; + goto fail; + } if (!key_checktunnelsanity(sav, AF_INET, (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) { ipseclog((LOG_NOTICE, "ipsec tunnel address mismatch " @@ -812,7 +815,10 @@ ah6_input(mp, offp, proto) } ip6 = mtod(m, struct ip6_hdr *); /* ECN consideration. */ - ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow); + if (!ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow)) { + ipsec6stat.in_inval++; + goto fail; + } if (!key_checktunnelsanity(sav, AF_INET6, (caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst)) { ipseclog((LOG_NOTICE, "ipsec tunnel address mismatch " diff --git a/sys/netinet6/esp_input.c b/sys/netinet6/esp_input.c index f2d802c0972..f25d0f3160f 100644 --- a/sys/netinet6/esp_input.c +++ b/sys/netinet6/esp_input.c @@ -371,7 +371,10 @@ noreplaycheck: } ip = mtod(m, struct ip *); /* ECN consideration. */ - ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos); + if (!ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos)) { + ipsecstat.in_inval++; + goto bad; + } if (!key_checktunnelsanity(sav, AF_INET, (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) { ipseclog((LOG_ERR, "ipsec tunnel address mismatch " @@ -723,7 +726,10 @@ noreplaycheck: } ip6 = mtod(m, struct ip6_hdr *); /* ECN consideration. */ - ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow); + if (!ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow)) { + ipsec6stat.in_inval++; + goto bad; + } if (!key_checktunnelsanity(sav, AF_INET6, (caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst)) { ipseclog((LOG_ERR, "ipsec tunnel address mismatch " diff --git a/sys/netinet6/frag6.c b/sys/netinet6/frag6.c index 3bb425fcf8d..f8a86a1fac0 100644 --- a/sys/netinet6/frag6.c +++ b/sys/netinet6/frag6.c @@ -52,6 +52,8 @@ #include #include #include +#include /* for ECN definitions */ +#include /* for ECN definitions */ #include @@ -151,6 +153,7 @@ frag6_input(mp, offp, proto) int first_frag = 0; int fragoff, frgpartlen; /* must be larger than u_int16_t */ struct ifnet *dstifp; + u_int8_t ecn, ecn0; ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST @@ -349,6 +352,26 @@ frag6_input(mp, offp, proto) goto insert; } + /* + * Handle ECN by comparing this segment with the first one; + * if CE is set, do not lose CE. + * drop if CE and not-ECT are mixed for the same packet. + */ + ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; + ecn0 = (ntohl(q6->ip6q_down->ip6af_head) >> 20) & IPTOS_ECN_MASK; + if (ecn == IPTOS_ECN_CE) { + if (ecn0 == IPTOS_ECN_NOTECT) { + free(ip6af, M_FTABLE); + goto dropfrag; + } + if (ecn0 != IPTOS_ECN_CE) + q6->ip6q_down->ip6af_head |= htonl(IPTOS_ECN_CE << 20); + } + if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) { + free(ip6af, M_FTABLE); + goto dropfrag; + } + /* * Find a segment which begins after this one does. */ diff --git a/sys/netinet6/in6_gif.c b/sys/netinet6/in6_gif.c index fae8a986489..e3cd124d01f 100644 --- a/sys/netinet6/in6_gif.c +++ b/sys/netinet6/in6_gif.c @@ -168,11 +168,9 @@ in6_gif_output(ifp, family, m) m_freem(m); return ENETUNREACH; } - if (ifp->if_flags & IFF_LINK1) - ip_ecn_ingress(ECN_ALLOWED, &otos, &itos); - else - ip_ecn_ingress(ECN_NOCARE, &otos, &itos); - ip6->ip6_flow &= ~ntohl(0xff00000); + ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE, + &otos, &itos); + ip6->ip6_flow &= ~htonl(0xff << 20); ip6->ip6_flow |= htonl((u_int32_t)otos << 20); if (dst->sin6_family != sin6_dst->sin6_family || @@ -259,10 +257,12 @@ in6_gif_input(mp, offp, proto) return IPPROTO_DONE; } ip = mtod(m, struct ip *); - if (gifp->if_flags & IFF_LINK1) - ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos); - else - ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos); + if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ? + ECN_ALLOWED : ECN_NOCARE, + &otos8, &ip->ip_tos) == 0) { + m_freem(m); + return IPPROTO_DONE; + } break; } #endif /* INET */ @@ -277,10 +277,12 @@ in6_gif_input(mp, offp, proto) return IPPROTO_DONE; } ip6 = mtod(m, struct ip6_hdr *); - if (gifp->if_flags & IFF_LINK1) - ip6_ecn_egress(ECN_ALLOWED, &otos, &ip6->ip6_flow); - else - ip6_ecn_egress(ECN_NOCARE, &otos, &ip6->ip6_flow); + if (ip6_ecn_egress((gifp->if_flags & IFF_LINK1) ? + ECN_ALLOWED : ECN_NOCARE, + &otos, &ip6->ip6_flow) == 0) { + m_freem(m); + return IPPROTO_DONE; + } break; } #endif diff --git a/sys/netinet6/ip6_ecn.h b/sys/netinet6/ip6_ecn.h index 4107cf09af2..6644b561e14 100644 --- a/sys/netinet6/ip6_ecn.h +++ b/sys/netinet6/ip6_ecn.h @@ -36,6 +36,6 @@ */ #ifdef _KERNEL -extern void ip6_ecn_ingress __P((int, u_int32_t *, const u_int32_t *)); -extern void ip6_ecn_egress __P((int, const u_int32_t *, u_int32_t *)); +extern void ip6_ecn_ingress(int, u_int32_t *, const u_int32_t *); +extern int ip6_ecn_egress(int, const u_int32_t *, u_int32_t *); #endif