mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-10 14:02:43 +00:00
Introduce ip_fastforward and remove ip_flow.
Short description of ip_fastforward: o adds full direct process-to-completion IPv4 forwarding code o handles ip fragmentation incl. hw support (ip_flow did not) o sends icmp needfrag to source if DF is set (ip_flow did not) o supports ipfw and ipfilter (ip_flow did not) o supports divert, ipfw fwd and ipfilter nat (ip_flow did not) o returns anything it can't handle back to normal ip_input Enable with sysctl -w net.inet.ip.fastforwarding=1 Reviewed by: sam (mentor)
This commit is contained in:
parent
f9d801d6f7
commit
9188b4a169
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=122702
@ -1448,7 +1448,7 @@ netinet/ip_ecn.c optional inet
|
||||
netinet/ip_ecn.c optional inet6
|
||||
netinet/ip_encap.c optional inet
|
||||
netinet/ip_encap.c optional inet6
|
||||
netinet/ip_flow.c optional inet
|
||||
netinet/ip_fastfwd.c optional inet
|
||||
netinet/ip_fw2.c optional ipfirewall
|
||||
netinet/ip_icmp.c optional inet
|
||||
netinet/ip_input.c optional inet
|
||||
|
@ -543,14 +543,14 @@ arc_input(ifp, m)
|
||||
#ifdef INET
|
||||
case ARCTYPE_IP:
|
||||
m_adj(m, ARC_HDRNEWLEN);
|
||||
if (ipflow_fastforward(m))
|
||||
if (ip_fastforward(m))
|
||||
return;
|
||||
isr = NETISR_IP;
|
||||
break;
|
||||
|
||||
case ARCTYPE_IP_OLD:
|
||||
m_adj(m, ARC_HDRLEN);
|
||||
if (ipflow_fastforward(m))
|
||||
if (ip_fastforward(m))
|
||||
return;
|
||||
isr = NETISR_IP;
|
||||
break;
|
||||
|
@ -253,7 +253,7 @@ ef_inputEII(struct mbuf *m, struct ether_header *eh, u_short ether_type)
|
||||
#endif
|
||||
#ifdef INET
|
||||
case ETHERTYPE_IP:
|
||||
if (ipflow_fastforward(m))
|
||||
if (ip_fastforward(m))
|
||||
return (0);
|
||||
isr = NETISR_IP;
|
||||
break;
|
||||
|
@ -714,7 +714,7 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
|
||||
switch (ether_type) {
|
||||
#ifdef INET
|
||||
case ETHERTYPE_IP:
|
||||
if (ipflow_fastforward(m))
|
||||
if (ip_fastforward(m))
|
||||
return;
|
||||
isr = NETISR_IP;
|
||||
break;
|
||||
|
@ -471,7 +471,7 @@ fddi_input(ifp, m)
|
||||
switch (type) {
|
||||
#ifdef INET
|
||||
case ETHERTYPE_IP:
|
||||
if (ipflow_fastforward(m))
|
||||
if (ip_fastforward(m))
|
||||
return;
|
||||
isr = NETISR_IP;
|
||||
break;
|
||||
|
@ -556,7 +556,7 @@ iso88025_input(ifp, m)
|
||||
#ifdef INET
|
||||
case ETHERTYPE_IP:
|
||||
th->iso88025_shost[0] &= ~(TR_RII);
|
||||
if (ipflow_fastforward(m))
|
||||
if (ip_fastforward(m))
|
||||
return;
|
||||
isr = NETISR_IP;
|
||||
break;
|
||||
|
@ -1537,7 +1537,7 @@ ppp_inproc(sc, m)
|
||||
m->m_pkthdr.len -= PPP_HDRLEN;
|
||||
m->m_data += PPP_HDRLEN;
|
||||
m->m_len -= PPP_HDRLEN;
|
||||
if (ipflow_fastforward(m))
|
||||
if (ip_fastforward(m))
|
||||
return;
|
||||
isr = NETISR_IP;
|
||||
break;
|
||||
|
@ -230,9 +230,7 @@ void in_rtqdrain(void);
|
||||
void ip_input(struct mbuf *);
|
||||
int in_ifadown(struct ifaddr *ifa, int);
|
||||
void in_ifscrub(struct ifnet *, struct in_ifaddr *);
|
||||
int ipflow_fastforward(struct mbuf *);
|
||||
void ipflow_create(const struct route *, struct mbuf *);
|
||||
void ipflow_slowtimo(void);
|
||||
int ip_fastforward(struct mbuf *);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
|
806
sys/netinet/ip_fastfwd.c
Normal file
806
sys/netinet/ip_fastfwd.c
Normal file
@ -0,0 +1,806 @@
|
||||
/*
|
||||
* Copyright (c) 2003 Andre Oppermann, Internet Business Solutions AG
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* ip_fastforward gets its speed from processing the forwarded packet to
|
||||
* completion (if_output on the other side) without any queues or netisr's.
|
||||
* The receiving interface DMAs the packet into memory, the upper half of
|
||||
* driver calls ip_fastforward, we do our routing table lookup and directly
|
||||
* send it off to the outgoing interface which DMAs the packet to the
|
||||
* network card. The only part of the packet we touch with the CPU is the
|
||||
* IP header. We are essentially limited by bus bandwidth and how fast the
|
||||
* network card/driver can set up receives and transmits.
|
||||
*
|
||||
* We handle basic errors, ip header errors, checksum errors,
|
||||
* destination unreachable, fragmentation and fragmentation needed and
|
||||
* report them via icmp to the sender.
|
||||
*
|
||||
* Else if something is not pure IPv4 unicast forwarding we fall back to
|
||||
* the normal ip_input processing path. We should only be called from
|
||||
* interfaces connected to the outside world.
|
||||
*
|
||||
* Firewalling is fully supported including divert, ipfw fwd and ipfilter
|
||||
* ipnat and address rewrite.
|
||||
*
|
||||
* IPSEC is not supported if this host is a tunnel broker. IPSEC is
|
||||
* supported for connections to/from local host.
|
||||
*
|
||||
* We try to do the least expensive (in CPU ops) checks and operations
|
||||
* first to catch junk with as little overhead as possible.
|
||||
*
|
||||
* We take full advantage of hardware support for ip checksum and
|
||||
* fragmentation offloading.
|
||||
*
|
||||
* We don't do ICMP redirect in the fast forwarding path. I have had my own
|
||||
* cases where two core routers with Zebra routing suite would send millions
|
||||
* ICMP redirects to connected hosts if the router to dest was not the default
|
||||
* gateway. In one case it was filling the routing table of a host with close
|
||||
* 300'000 cloned redirect entries until it ran out of kernel memory. However
|
||||
* the networking code proved very robust and it didn't crash or went ill
|
||||
* otherwise.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Many thanks to Matt Thomas of NetBSD for basic structure of ip_flow.c which
|
||||
* is being followed here.
|
||||
*/
|
||||
|
||||
#include "opt_ipfw.h"
|
||||
#include "opt_ipdn.h"
|
||||
#include "opt_ipdivert.h"
|
||||
#include "opt_ipfilter.h"
|
||||
#include "opt_ipstealth.h"
|
||||
#include "opt_mac.h"
|
||||
#include "opt_pfil_hooks.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/mac.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/protosw.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include <net/pfil.h>
|
||||
#include <net/if.h>
|
||||
#include <net/if_types.h>
|
||||
#include <net/if_var.h>
|
||||
#include <net/if_dl.h>
|
||||
#include <net/route.h>
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_systm.h>
|
||||
#include <netinet/in_var.h>
|
||||
#include <netinet/ip.h>
|
||||
#include <netinet/ip_var.h>
|
||||
#include <netinet/ip_icmp.h>
|
||||
|
||||
#include <machine/in_cksum.h>
|
||||
|
||||
#include <netinet/ip_fw.h>
|
||||
#include <netinet/ip_dummynet.h>
|
||||
|
||||
static int ipfastforward_active = 0;
|
||||
SYSCTL_INT(_net_inet_ip, OID_AUTO, fastforwarding, CTLFLAG_RW,
|
||||
&ipfastforward_active, 0, "Enable fast IP forwarding");
|
||||
|
||||
/*
|
||||
* Try to forward a packet based on the destination address.
|
||||
* This is a fast path optimized for the plain forwarding case.
|
||||
* If the packet is handled (and consumed) here then we return 1;
|
||||
* otherwise 0 is returned and the packet should be delivered
|
||||
* to ip_input for full processing.
|
||||
*/
|
||||
int
|
||||
ip_fastforward(struct mbuf *m)
|
||||
{
|
||||
struct ip *ip;
|
||||
struct mbuf *m0 = NULL;
|
||||
#ifdef IPDIVERT
|
||||
struct ip *tip;
|
||||
struct mbuf *teem = NULL;
|
||||
#endif
|
||||
struct mbuf *tag = NULL;
|
||||
struct route ro;
|
||||
struct sockaddr_in *dst = NULL;
|
||||
struct in_ifaddr *ia = NULL;
|
||||
struct ifaddr *ifa = NULL;
|
||||
struct ifnet *ifp = NULL;
|
||||
struct ip_fw_args args;
|
||||
in_addr_t odest, dest;
|
||||
u_short sum;
|
||||
int error = 0;
|
||||
int hlen, ipfw, mtu;
|
||||
|
||||
/*
|
||||
* Are we active and forwarding packets?
|
||||
*/
|
||||
if (!ipfastforward_active || !ipforwarding)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If there is any MT_TAG we fall back to ip_input because we can't
|
||||
* handle TAGs here. Should never happen as we get directly called
|
||||
* from the if_output routines.
|
||||
*/
|
||||
if (m->m_type == MT_TAG) {
|
||||
KASSERT(0, ("%s: packet with MT_TAG not expected", __func__));
|
||||
return 0;
|
||||
}
|
||||
|
||||
M_ASSERTVALID(m);
|
||||
M_ASSERTPKTHDR(m);
|
||||
|
||||
/*
|
||||
* Step 1: check for packet drop conditions (and sanity checks)
|
||||
*/
|
||||
|
||||
/*
|
||||
* Is entire packet big enough?
|
||||
*/
|
||||
if (m->m_pkthdr.len < sizeof(struct ip)) {
|
||||
ipstat.ips_tooshort++;
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/*
|
||||
* Is first mbuf large enough for ip header and is header present?
|
||||
*/
|
||||
if (m->m_len < sizeof (struct ip) &&
|
||||
(m = m_pullup(m, sizeof (struct ip))) == 0) {
|
||||
ipstat.ips_toosmall++;
|
||||
goto drop;
|
||||
}
|
||||
|
||||
ip = mtod(m, struct ip *);
|
||||
|
||||
/*
|
||||
* Is it IPv4?
|
||||
*/
|
||||
if (ip->ip_v != IPVERSION) {
|
||||
ipstat.ips_badvers++;
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/*
|
||||
* Is IP header length correct and is it in first mbuf?
|
||||
*/
|
||||
hlen = ip->ip_hl << 2;
|
||||
if (hlen < sizeof(struct ip)) { /* minimum header length */
|
||||
ipstat.ips_badlen++;
|
||||
goto drop;
|
||||
}
|
||||
if (hlen > m->m_len) {
|
||||
if ((m = m_pullup(m, hlen)) == 0) {
|
||||
ipstat.ips_badhlen++;
|
||||
goto drop;
|
||||
}
|
||||
ip = mtod(m, struct ip *);
|
||||
}
|
||||
|
||||
/*
|
||||
* Checksum correct?
|
||||
*/
|
||||
if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED)
|
||||
sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
|
||||
else {
|
||||
if (hlen == sizeof(struct ip))
|
||||
sum = in_cksum_hdr(ip);
|
||||
else
|
||||
sum = in_cksum(m, hlen);
|
||||
}
|
||||
if (sum) {
|
||||
ipstat.ips_badsum++;
|
||||
goto drop;
|
||||
}
|
||||
m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
|
||||
|
||||
/*
|
||||
* Convert to host representation
|
||||
*/
|
||||
ip->ip_len = ntohs(ip->ip_len);
|
||||
ip->ip_off = ntohs(ip->ip_off);
|
||||
|
||||
/*
|
||||
* Is IP length longer than packet we have got?
|
||||
*/
|
||||
if (m->m_pkthdr.len < ip->ip_len) {
|
||||
ipstat.ips_tooshort++;
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/*
|
||||
* Is packet longer than IP header tells us? If yes, truncate packet.
|
||||
*/
|
||||
if (m->m_pkthdr.len > ip->ip_len) {
|
||||
if (m->m_len == m->m_pkthdr.len) {
|
||||
m->m_len = ip->ip_len;
|
||||
m->m_pkthdr.len = ip->ip_len;
|
||||
} else
|
||||
m_adj(m, ip->ip_len - m->m_pkthdr.len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Is packet from or to 127/8?
|
||||
*/
|
||||
if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
|
||||
(ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
|
||||
ipstat.ips_badaddr++;
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/*
|
||||
* Step 2: fallback conditions to normal ip_input path processing
|
||||
*/
|
||||
|
||||
/*
|
||||
* Only IP packets without options
|
||||
*/
|
||||
if (ip->ip_hl != (sizeof(struct ip) >> 2))
|
||||
goto fallback;
|
||||
|
||||
/*
|
||||
* Only unicast IP, not from loopback, no L2 or IP broadcast,
|
||||
* no multicast, no INADDR_ANY
|
||||
*
|
||||
* XXX: Probably some of these checks could be direct drop
|
||||
* conditions. However it is not clear whether there are some
|
||||
* hacks or obscure behaviours which make it neccessary to
|
||||
* let ip_input handle it. We play safe here and let ip_input
|
||||
* deal with it until it is proven that we can directly drop it.
|
||||
*/
|
||||
if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
|
||||
ntohl(ip->ip_src.s_addr) == (u_long)INADDR_BROADCAST ||
|
||||
ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST ||
|
||||
IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
|
||||
IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
|
||||
ip->ip_dst.s_addr == INADDR_ANY )
|
||||
goto fallback;
|
||||
|
||||
/*
|
||||
* Is it for a local address on this host?
|
||||
*/
|
||||
LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
|
||||
if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
|
||||
goto fallback;
|
||||
}
|
||||
|
||||
/*
|
||||
* Or is it for a local IP broadcast address on this host?
|
||||
*/
|
||||
if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
|
||||
TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
|
||||
if (ifa->ifa_addr->sa_family != AF_INET)
|
||||
continue;
|
||||
ia = ifatoia(ifa);
|
||||
if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr)
|
||||
goto fallback;
|
||||
if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
|
||||
ip->ip_dst.s_addr)
|
||||
goto fallback;
|
||||
continue;
|
||||
fallback:
|
||||
/* return packet back to netisr for slow processing */
|
||||
ip->ip_len = htons(ip->ip_len);
|
||||
ip->ip_off = htons(ip->ip_off);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
ipstat.ips_total++;
|
||||
|
||||
/*
|
||||
* Step 3: incoming packet firewall processing
|
||||
*/
|
||||
|
||||
odest = dest = ip->ip_dst.s_addr;
|
||||
#ifdef PFIL_HOOKS
|
||||
/*
|
||||
* Run through list of ipfilter hooks for input packets
|
||||
*/
|
||||
if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN) ||
|
||||
m == NULL)
|
||||
return 1;
|
||||
|
||||
M_ASSERTVALID(m);
|
||||
M_ASSERTPKTHDR(m);
|
||||
|
||||
ip = mtod(m, struct ip *); /* m may have changed by pfil hook */
|
||||
dest = ip->ip_dst.s_addr;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Run through ipfw for input packets
|
||||
*/
|
||||
if (fw_enable && IPFW_LOADED) {
|
||||
bzero(&args, sizeof(args));
|
||||
args.m = m;
|
||||
ipfw = 0;
|
||||
|
||||
ipfw = ip_fw_chk_ptr(&args);
|
||||
m = args.m;
|
||||
|
||||
M_ASSERTVALID(m);
|
||||
M_ASSERTPKTHDR(m);
|
||||
|
||||
/*
|
||||
* Packet denied, drop it
|
||||
*/
|
||||
if ((ipfw & IP_FW_PORT_DENY_FLAG) || m == NULL)
|
||||
goto drop;
|
||||
/*
|
||||
* Send packet to the appropriate pipe
|
||||
*/
|
||||
if (DUMMYNET_LOADED && (ipfw & IP_FW_PORT_DYNT_FLAG) != 0) {
|
||||
ip_dn_io_ptr(m, ipfw & 0xffff, DN_TO_IP_IN, &args);
|
||||
return 1;
|
||||
}
|
||||
#ifdef IPDIVERT
|
||||
/*
|
||||
* Divert packet
|
||||
*/
|
||||
if (ipfw != 0 && (ipfw & IP_FW_PORT_DYNT_FLAG) == 0) {
|
||||
/*
|
||||
* See if this is a fragment
|
||||
*/
|
||||
if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
|
||||
MGETHDR(tag, M_DONTWAIT, MT_TAG);
|
||||
if (tag == NULL)
|
||||
goto drop;
|
||||
tag->m_flags = PACKET_TAG_DIVERT;
|
||||
tag->m_data = (caddr_t)(u_long)args.divert_rule;
|
||||
tag->m_next = m;
|
||||
/* XXX: really bloody hack, see ip_input */
|
||||
tag->m_nextpkt = (struct mbuf *)1;
|
||||
m = tag;
|
||||
tag = NULL;
|
||||
|
||||
goto droptoours;
|
||||
}
|
||||
/*
|
||||
* Tee packet
|
||||
*/
|
||||
if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0)
|
||||
teem = m_dup(m, M_DONTWAIT);
|
||||
else
|
||||
teem = m;
|
||||
if (teem == NULL)
|
||||
goto passin;
|
||||
|
||||
/*
|
||||
* Delayed checksums are not compatible
|
||||
*/
|
||||
if (teem->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
|
||||
in_delayed_cksum(teem);
|
||||
teem->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
|
||||
}
|
||||
/*
|
||||
* Restore packet header fields to original values
|
||||
*/
|
||||
tip = mtod(teem, struct ip *);
|
||||
tip->ip_len = htons(tip->ip_len);
|
||||
tip->ip_off = htons(tip->ip_off);
|
||||
/*
|
||||
* Deliver packet to divert input routine
|
||||
*/
|
||||
divert_packet(teem, 0, ipfw & 0xffff, args.divert_rule);
|
||||
/*
|
||||
* If this was not tee, we are done
|
||||
*/
|
||||
if ((ipfw & IP_FW_PORT_TEE_FLAG) == 0)
|
||||
return 1;
|
||||
/* Continue if it was tee */
|
||||
goto passin;
|
||||
}
|
||||
#endif
|
||||
if (ipfw == 0 && args.next_hop != NULL) {
|
||||
dest = args.next_hop->sin_addr.s_addr;
|
||||
goto passin;
|
||||
}
|
||||
/*
|
||||
* Let through or not?
|
||||
*/
|
||||
if (ipfw != 0)
|
||||
goto drop;
|
||||
}
|
||||
passin:
|
||||
ip = mtod(m, struct ip *); /* if m changed during fw processing */
|
||||
|
||||
/*
|
||||
* Destination address changed?
|
||||
*/
|
||||
if (odest != dest) {
|
||||
/*
|
||||
* Is it now for a local address on this host?
|
||||
*/
|
||||
LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
|
||||
if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
|
||||
goto forwardlocal;
|
||||
}
|
||||
/*
|
||||
* Go on with new destination address
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Step 4: decrement TTL and look up route
|
||||
*/
|
||||
|
||||
/*
|
||||
* Check TTL
|
||||
*/
|
||||
#ifdef IPSTEALTH
|
||||
if (!ipstealth) {
|
||||
#endif
|
||||
if (ip->ip_ttl <= IPTTLDEC) {
|
||||
icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, NULL, NULL);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decrement the TTL and incrementally change the checksum.
|
||||
* Don't bother doing this with hw checksum offloading.
|
||||
*/
|
||||
ip->ip_ttl -= IPTTLDEC;
|
||||
if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8))
|
||||
ip->ip_sum -= ~htons(IPTTLDEC << 8);
|
||||
else
|
||||
ip->ip_sum += htons(IPTTLDEC << 8);
|
||||
#ifdef IPSTEALTH
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Find route to destination.
|
||||
*/
|
||||
bzero(&ro, sizeof(ro));
|
||||
dst = (struct sockaddr_in *)&ro.ro_dst;
|
||||
dst->sin_family = AF_INET;
|
||||
dst->sin_len = sizeof(*dst);
|
||||
dst->sin_addr.s_addr = dest;
|
||||
rtalloc_ign(&ro, (RTF_PRCLONING | RTF_CLONING));
|
||||
|
||||
/*
|
||||
* Route there and interface still up?
|
||||
*/
|
||||
if (ro.ro_rt &&
|
||||
(ro.ro_rt->rt_flags & RTF_UP) &&
|
||||
(ro.ro_rt->rt_ifp->if_flags & IFF_UP)) {
|
||||
ia = ifatoia(ro.ro_rt->rt_ifa);
|
||||
ifp = ro.ro_rt->rt_ifp;
|
||||
if (ro.ro_rt->rt_flags & RTF_GATEWAY)
|
||||
dst = (struct sockaddr_in *)ro.ro_rt->rt_gateway;
|
||||
} else {
|
||||
ipstat.ips_noroute++;
|
||||
ipstat.ips_cantforward++;
|
||||
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, NULL, NULL);
|
||||
if (ro.ro_rt)
|
||||
RTFREE(ro.ro_rt);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Step 5: outgoing firewall packet processing
|
||||
*/
|
||||
|
||||
#ifdef PFIL_HOOKS
|
||||
/*
|
||||
* Run through list of hooks for output packets.
|
||||
*/
|
||||
if (pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT) || m == NULL) {
|
||||
RTFREE(ro.ro_rt);
|
||||
return 1;
|
||||
}
|
||||
|
||||
M_ASSERTVALID(m);
|
||||
M_ASSERTPKTHDR(m);
|
||||
|
||||
ip = mtod(m, struct ip *);
|
||||
dest = ip->ip_dst.s_addr;
|
||||
#endif
|
||||
if (fw_enable && IPFW_LOADED && !args.next_hop) {
|
||||
bzero(&args, sizeof(args));
|
||||
args.m = m;
|
||||
args.oif = ifp;
|
||||
ipfw = 0;
|
||||
|
||||
ipfw = ip_fw_chk_ptr(&args);
|
||||
m = args.m;
|
||||
|
||||
M_ASSERTVALID(m);
|
||||
M_ASSERTPKTHDR(m);
|
||||
|
||||
if ((ipfw & IP_FW_PORT_DENY_FLAG) || m == NULL) {
|
||||
RTFREE(ro.ro_rt);
|
||||
goto drop;
|
||||
}
|
||||
if (DUMMYNET_LOADED && (ipfw & IP_FW_PORT_DYNT_FLAG) != 0) {
|
||||
/*
|
||||
* XXX note: if the ifp or rt entry are deleted
|
||||
* while a pkt is in dummynet, we are in trouble!
|
||||
*/
|
||||
args.ro = &ro; /* dummynet does not save it */
|
||||
args.dst = dst;
|
||||
|
||||
ip_dn_io_ptr(m, ipfw & 0xffff, DN_TO_IP_OUT, &args);
|
||||
RTFREE(ro.ro_rt);
|
||||
return 1;
|
||||
}
|
||||
#ifdef IPDIVERT
|
||||
if (ipfw != 0 && (ipfw & IP_FW_PORT_DYNT_FLAG) == 0) {
|
||||
/*
|
||||
* See if this is a fragment
|
||||
*/
|
||||
if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
|
||||
MGETHDR(tag, M_DONTWAIT, MT_TAG);
|
||||
if (tag == NULL) {
|
||||
RTFREE(ro.ro_rt);
|
||||
goto drop;
|
||||
}
|
||||
tag->m_flags = PACKET_TAG_DIVERT;
|
||||
tag->m_data = (caddr_t)(u_int32_t)args.divert_rule;
|
||||
tag->m_next = m;
|
||||
/* XXX: really bloody hack, see ip_input */
|
||||
tag->m_nextpkt = (struct mbuf *)1;
|
||||
m = tag;
|
||||
tag = NULL;
|
||||
|
||||
goto droptoours;
|
||||
}
|
||||
/*
|
||||
* Tee packet
|
||||
*/
|
||||
if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0)
|
||||
teem = m_dup(m, M_DONTWAIT);
|
||||
else
|
||||
teem = m;
|
||||
if (teem == NULL)
|
||||
goto passout;
|
||||
|
||||
/*
|
||||
* Delayed checksums are not compatible with divert
|
||||
*/
|
||||
if (teem->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
|
||||
in_delayed_cksum(teem);
|
||||
teem->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
|
||||
}
|
||||
/*
|
||||
* Restore packet header fields to original values
|
||||
*/
|
||||
tip = mtod(teem, struct ip *);
|
||||
tip->ip_len = htons(tip->ip_len);
|
||||
tip->ip_off = htons(tip->ip_off);
|
||||
/*
|
||||
* Deliver packet to divert input routine
|
||||
*/
|
||||
divert_packet(teem, 0, ipfw & 0xffff, args.divert_rule);
|
||||
/*
|
||||
* If this was not tee, we are done
|
||||
*/
|
||||
if ((ipfw & IP_FW_PORT_TEE_FLAG) == 0) {
|
||||
RTFREE(ro.ro_rt);
|
||||
return 1;
|
||||
}
|
||||
/* Continue if it was tee */
|
||||
goto passout;
|
||||
}
|
||||
#endif
|
||||
if (ipfw == 0 && args.next_hop != NULL) {
|
||||
dest = args.next_hop->sin_addr.s_addr;
|
||||
goto passout;
|
||||
}
|
||||
/*
|
||||
* Let through or not?
|
||||
*/
|
||||
if (ipfw != 0)
|
||||
goto drop;
|
||||
}
|
||||
passout:
|
||||
ip = mtod(m, struct ip *);
|
||||
|
||||
/*
|
||||
* Destination address changed?
|
||||
*/
|
||||
if (odest != dest) {
|
||||
/*
|
||||
* Is it now for a local address on this host?
|
||||
*/
|
||||
LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
|
||||
if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr) {
|
||||
forwardlocal:
|
||||
if (args.next_hop) {
|
||||
/* XXX leak */
|
||||
MGETHDR(tag, M_DONTWAIT, MT_TAG);
|
||||
if (tag == NULL) {
|
||||
if (ro.ro_rt)
|
||||
RTFREE(ro.ro_rt);
|
||||
goto drop;
|
||||
}
|
||||
tag->m_flags = PACKET_TAG_IPFORWARD;
|
||||
tag->m_data = (caddr_t)args.next_hop;
|
||||
tag->m_next = m;
|
||||
/* XXX: really bloody hack,
|
||||
* see ip_input */
|
||||
tag->m_nextpkt = (struct mbuf *)1;
|
||||
m = tag;
|
||||
tag = NULL;
|
||||
}
|
||||
#ifdef IPDIVERT
|
||||
droptoours: /* Used for DIVERT */
|
||||
#endif
|
||||
MGETHDR(tag, M_DONTWAIT, MT_TAG);
|
||||
if (tag == NULL) {
|
||||
if (ro.ro_rt)
|
||||
RTFREE(ro.ro_rt);
|
||||
goto drop;
|
||||
}
|
||||
tag->m_flags = PACKET_TAG_IPFASTFWD_OURS;
|
||||
tag->m_data = NULL;
|
||||
tag->m_next = m;
|
||||
/* XXX: really bloody hack, see ip_input */
|
||||
tag->m_nextpkt = (struct mbuf *)1;
|
||||
m = tag;
|
||||
tag = NULL;
|
||||
|
||||
/* ip still points to the real packet */
|
||||
ip->ip_len = htons(ip->ip_len);
|
||||
ip->ip_off = htons(ip->ip_off);
|
||||
|
||||
/*
|
||||
* Return packet for processing by ip_input
|
||||
*/
|
||||
if (ro.ro_rt)
|
||||
RTFREE(ro.ro_rt);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Redo route lookup with new destination address
|
||||
*/
|
||||
RTFREE(ro.ro_rt);
|
||||
bzero(&ro, sizeof(ro));
|
||||
dst = (struct sockaddr_in *)&ro.ro_dst;
|
||||
dst->sin_family = AF_INET;
|
||||
dst->sin_len = sizeof(*dst);
|
||||
dst->sin_addr.s_addr = dest;
|
||||
rtalloc_ign(&ro, (RTF_PRCLONING | RTF_CLONING));
|
||||
|
||||
/*
|
||||
* Route there and interface still up?
|
||||
*/
|
||||
if (ro.ro_rt &&
|
||||
(ro.ro_rt->rt_flags & RTF_UP) &&
|
||||
(ro.ro_rt->rt_ifp->if_flags & IFF_UP)) {
|
||||
ia = ifatoia(ro.ro_rt->rt_ifa);
|
||||
ifp = ro.ro_rt->rt_ifp;
|
||||
if (ro.ro_rt->rt_flags & RTF_GATEWAY)
|
||||
dst = (struct sockaddr_in *)ro.ro_rt->rt_gateway;
|
||||
} else {
|
||||
ipstat.ips_noroute++;
|
||||
ipstat.ips_cantforward++;
|
||||
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST,
|
||||
NULL, NULL);
|
||||
if (ro.ro_rt)
|
||||
RTFREE(ro.ro_rt);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Step 6: send off the packet
|
||||
*/
|
||||
|
||||
/*
|
||||
* Check if packet fits MTU or if hardware will fragement for us
|
||||
*/
|
||||
if (ro.ro_rt->rt_rmx.rmx_mtu)
|
||||
mtu = min(ro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
|
||||
else
|
||||
mtu = ifp->if_mtu;
|
||||
|
||||
if (ip->ip_len <= mtu ||
|
||||
(ifp->if_hwassist & CSUM_FRAGMENT && (ip->ip_off & IP_DF) == 0)) {
|
||||
/*
|
||||
* Restore packet header fields to original values
|
||||
*/
|
||||
ip->ip_len = htons(ip->ip_len);
|
||||
ip->ip_off = htons(ip->ip_off);
|
||||
/*
|
||||
* Send off the packet via outgoing interface
|
||||
*/
|
||||
error = (*ifp->if_output)(ifp, m,
|
||||
(struct sockaddr *)dst, ro.ro_rt);
|
||||
if (ia) {
|
||||
ia->ia_ifa.if_opackets++;
|
||||
ia->ia_ifa.if_obytes += m->m_pkthdr.len;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Handle EMSGSIZE with icmp reply
|
||||
* needfrag for TCP MTU discovery
|
||||
*/
|
||||
if (ip->ip_off & IP_DF) {
|
||||
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
|
||||
NULL, ifp);
|
||||
ipstat.ips_cantfrag++;
|
||||
RTFREE(ro.ro_rt);
|
||||
return 1;
|
||||
} else {
|
||||
/*
|
||||
* We have to fragement the packet
|
||||
*/
|
||||
m->m_pkthdr.csum_flags |= CSUM_IP;
|
||||
if (ip_fragment(ip, &m, mtu, ifp->if_hwassist,
|
||||
(~ifp->if_hwassist & CSUM_DELAY_IP))) {
|
||||
RTFREE(ro.ro_rt);
|
||||
goto drop;
|
||||
}
|
||||
KASSERT(m != NULL, ("null mbuf and no error"));
|
||||
/*
|
||||
* Send off the fragments via outgoing interface
|
||||
*/
|
||||
error = 0;
|
||||
do {
|
||||
m0 = m->m_nextpkt;
|
||||
m->m_nextpkt = NULL;
|
||||
|
||||
error = (*ifp->if_output)(ifp, m,
|
||||
(struct sockaddr *)dst, ro.ro_rt);
|
||||
if (error)
|
||||
break;
|
||||
} while ((m = m0) != NULL);
|
||||
if (error) {
|
||||
/* Reclaim remaining fragments */
|
||||
for (; m; m = m0) {
|
||||
m0 = m->m_nextpkt;
|
||||
m->m_nextpkt = NULL;
|
||||
m_freem(m);
|
||||
}
|
||||
} else
|
||||
ipstat.ips_fragmented++;
|
||||
}
|
||||
}
|
||||
|
||||
if (error != 0)
|
||||
ipstat.ips_odropped++;
|
||||
else {
|
||||
ro.ro_rt->rt_rmx.rmx_pksent++;
|
||||
ipstat.ips_forward++;
|
||||
ipstat.ips_fastforward++;
|
||||
}
|
||||
RTFREE(ro.ro_rt);
|
||||
return 1;
|
||||
drop:
|
||||
if (m)
|
||||
m_freem(m);
|
||||
return 1;
|
||||
}
|
@ -1,377 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1998 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by the 3am Software Foundry ("3am"). It was developed by Matt Thomas.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the NetBSD
|
||||
* Foundation, Inc. and its contributors.
|
||||
* 4. Neither the name of The NetBSD Foundation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/protosw.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/kernel.h>
|
||||
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include <net/if.h>
|
||||
#include <net/route.h>
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_systm.h>
|
||||
#include <netinet/ip.h>
|
||||
#include <netinet/in_var.h>
|
||||
#include <netinet/ip_var.h>
|
||||
#include <netinet/ip_flow.h>
|
||||
|
||||
#define IPFLOW_TIMER (5 * PR_SLOWHZ)
|
||||
#define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */
|
||||
#define IPFLOW_HASHSIZE (1 << IPFLOW_HASHBITS)
|
||||
#if IPFLOW_HASHSIZE > 255
|
||||
#error "make ipf_hash larger"
|
||||
#endif
|
||||
static struct ipflow_head ipflows[IPFLOW_HASHSIZE];
|
||||
static int ipflow_inuse;
|
||||
#define IPFLOW_MAX 256
|
||||
|
||||
/*
|
||||
* Each flow list has a lock that guards updates to the list and to
|
||||
* all entries on the list. Flow entries hold the hash index for
|
||||
* finding the head of the list so the lock can be found quickly.
|
||||
*
|
||||
* ipflow_inuse holds a count of the number of flow entries present.
|
||||
* This is used to bound the size of the table. When IPFLOW_MAX entries
|
||||
* are present and an additional entry is needed one is chosen for
|
||||
* replacement. We could use atomic ops for this counter but having it
|
||||
* inconsistent doesn't appear to be a problem.
|
||||
*/
|
||||
#define IPFLOW_HEAD_LOCK(_ipfh) mtx_lock(&(_ipfh)->ipfh_mtx)
|
||||
#define IPFLOW_HEAD_UNLOCK(_ipfh) mtx_unlock(&(_ipfh)->ipfh_mtx)
|
||||
#define IPFLOW_LOCK(_ipf) \
|
||||
IPFLOW_HEAD_LOCK(&ipflows[(_ipf)->ipf_hash])
|
||||
#define IPFLOW_UNLOCK(_ipf) \
|
||||
IPFLOW_HEAD_UNLOCK(&ipflows[(_ipf)->ipf_hash])
|
||||
|
||||
static int ipflow_active = 0;
|
||||
SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW,
|
||||
&ipflow_active, 0, "Enable flow-based IP forwarding");
|
||||
|
||||
static MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow");
|
||||
|
||||
static unsigned
|
||||
ipflow_hash(struct in_addr dst, struct in_addr src, unsigned tos)
|
||||
{
|
||||
unsigned hash = tos;
|
||||
int idx;
|
||||
for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS)
|
||||
hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx);
|
||||
return hash & (IPFLOW_HASHSIZE-1);
|
||||
}
|
||||
|
||||
static struct ipflow *
|
||||
ipflow_lookup(const struct ip *ip)
|
||||
{
|
||||
unsigned hash;
|
||||
struct ipflow_head *head;
|
||||
struct ipflow *ipf;
|
||||
|
||||
hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
|
||||
head = &ipflows[hash];
|
||||
|
||||
IPFLOW_HEAD_LOCK(head);
|
||||
LIST_FOREACH(ipf, &head->ipfh_head, ipf_next) {
|
||||
if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr
|
||||
&& ip->ip_src.s_addr == ipf->ipf_src.s_addr
|
||||
&& ip->ip_tos == ipf->ipf_tos) {
|
||||
/* NB: return head locked */
|
||||
return ipf;
|
||||
}
|
||||
}
|
||||
IPFLOW_HEAD_UNLOCK(head);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int
|
||||
ipflow_fastforward(struct mbuf *m)
|
||||
{
|
||||
struct ip *ip;
|
||||
struct ipflow *ipf;
|
||||
struct rtentry *rt;
|
||||
struct sockaddr *dst;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Are we forwarding packets? Big enough for an IP packet?
|
||||
*/
|
||||
if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip))
|
||||
return 0;
|
||||
/*
|
||||
* IP header with no option and valid version and length
|
||||
*/
|
||||
ip = mtod(m, struct ip *);
|
||||
if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2)
|
||||
|| ntohs(ip->ip_len) > m->m_pkthdr.len)
|
||||
return 0;
|
||||
/*
|
||||
* Find a flow.
|
||||
*/
|
||||
if ((ipf = ipflow_lookup(ip)) == NULL)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Route and interface still up?
|
||||
*/
|
||||
rt = ipf->ipf_ro.ro_rt;
|
||||
if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0) {
|
||||
IPFLOW_UNLOCK(ipf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Packet size OK? TTL?
|
||||
*/
|
||||
if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) {
|
||||
IPFLOW_UNLOCK(ipf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Everything checks out and so we can forward this packet.
|
||||
* Modify the TTL and incrementally change the checksum.
|
||||
*/
|
||||
ip->ip_ttl -= IPTTLDEC;
|
||||
if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) {
|
||||
ip->ip_sum += htons(IPTTLDEC << 8) + 1;
|
||||
} else {
|
||||
ip->ip_sum += htons(IPTTLDEC << 8);
|
||||
}
|
||||
|
||||
/*
|
||||
* Send the packet on its way. All we can get back is ENOBUFS
|
||||
*/
|
||||
ipf->ipf_uses++;
|
||||
ipf->ipf_timer = IPFLOW_TIMER;
|
||||
|
||||
if (rt->rt_flags & RTF_GATEWAY)
|
||||
dst = rt->rt_gateway;
|
||||
else
|
||||
dst = &ipf->ipf_ro.ro_dst;
|
||||
if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) {
|
||||
if (error == ENOBUFS)
|
||||
ipf->ipf_dropped++;
|
||||
else
|
||||
ipf->ipf_errors++;
|
||||
}
|
||||
IPFLOW_UNLOCK(ipf);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
ipflow_addstats(struct ipflow *ipf)
|
||||
{
|
||||
ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
|
||||
ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped;
|
||||
ipstat.ips_forward += ipf->ipf_uses;
|
||||
ipstat.ips_fastforward += ipf->ipf_uses;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX the locking here makes reaping an entry very expensive...
|
||||
*/
|
||||
static struct ipflow *
|
||||
ipflow_reap(void)
|
||||
{
|
||||
struct ipflow *victim = NULL;
|
||||
struct ipflow *ipf;
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
|
||||
struct ipflow_head *head = &ipflows[idx];
|
||||
|
||||
IPFLOW_HEAD_LOCK(head);
|
||||
LIST_FOREACH(ipf, &head->ipfh_head, ipf_next) {
|
||||
/*
|
||||
* If this no longer points to a valid route
|
||||
* reclaim it.
|
||||
*/
|
||||
if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0)
|
||||
goto done;
|
||||
/*
|
||||
* choose the one that's been least recently used
|
||||
* or has had the least uses in the last 1.5
|
||||
* intervals.
|
||||
*/
|
||||
if (victim == NULL)
|
||||
victim = ipf;
|
||||
else if (ipf->ipf_timer < victim->ipf_timer
|
||||
|| (ipf->ipf_timer == victim->ipf_timer
|
||||
&& ipf->ipf_last_uses + ipf->ipf_uses <
|
||||
victim->ipf_last_uses + victim->ipf_uses)) {
|
||||
if (victim->ipf_hash != ipf->ipf_hash)
|
||||
IPFLOW_UNLOCK(victim);
|
||||
victim = ipf;
|
||||
}
|
||||
}
|
||||
if (victim && victim->ipf_hash != idx)
|
||||
IPFLOW_HEAD_UNLOCK(head);
|
||||
}
|
||||
ipf = victim;
|
||||
done:
|
||||
/*
|
||||
* Remove the entry from the flow table.
|
||||
*/
|
||||
LIST_REMOVE(ipf, ipf_next);
|
||||
IPFLOW_UNLOCK(ipf);
|
||||
|
||||
ipflow_addstats(ipf);
|
||||
RTFREE(ipf->ipf_ro.ro_rt);
|
||||
return ipf;
|
||||
}
|
||||
|
||||
static void
|
||||
ipflow_free(struct ipflow *ipf)
|
||||
{
|
||||
/*
|
||||
* Remove the flow from the hash table.
|
||||
*/
|
||||
LIST_REMOVE(ipf, ipf_next);
|
||||
|
||||
ipflow_addstats(ipf);
|
||||
RTFREE(ipf->ipf_ro.ro_rt);
|
||||
ipflow_inuse--;
|
||||
free(ipf, M_IPFLOW);
|
||||
}
|
||||
|
||||
void
|
||||
ipflow_slowtimo(void)
|
||||
{
|
||||
struct ipflow *ipf;
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
|
||||
struct ipflow_head *head = &ipflows[idx];
|
||||
|
||||
IPFLOW_HEAD_LOCK(head);
|
||||
ipf = LIST_FIRST(&head->ipfh_head);
|
||||
while (ipf != NULL) {
|
||||
struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next);
|
||||
if (--ipf->ipf_timer == 0) {
|
||||
ipflow_free(ipf);
|
||||
} else {
|
||||
ipf->ipf_last_uses = ipf->ipf_uses;
|
||||
ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
|
||||
ipstat.ips_forward += ipf->ipf_uses;
|
||||
ipstat.ips_fastforward += ipf->ipf_uses;
|
||||
ipf->ipf_uses = 0;
|
||||
}
|
||||
ipf = next_ipf;
|
||||
}
|
||||
IPFLOW_HEAD_UNLOCK(head);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ipflow_create(const struct route *ro, struct mbuf *m)
|
||||
{
|
||||
const struct ip *const ip = mtod(m, struct ip *);
|
||||
struct ipflow *ipf;
|
||||
|
||||
/*
|
||||
* Don't create cache entries for ICMP messages.
|
||||
*/
|
||||
if (!ipflow_active || ip->ip_p == IPPROTO_ICMP)
|
||||
return;
|
||||
/*
|
||||
* See if an existing flow struct exists. If so remove it from it's
|
||||
* list and free the old route. If not, try to malloc a new one
|
||||
* (if we aren't at our limit).
|
||||
*/
|
||||
ipf = ipflow_lookup(ip);
|
||||
if (ipf == NULL) {
|
||||
if (ipflow_inuse == IPFLOW_MAX) {
|
||||
ipf = ipflow_reap();
|
||||
} else {
|
||||
ipf = (struct ipflow *) malloc(sizeof(*ipf), M_IPFLOW,
|
||||
M_NOWAIT);
|
||||
if (ipf == NULL)
|
||||
return;
|
||||
ipflow_inuse++;
|
||||
}
|
||||
bzero((caddr_t) ipf, sizeof(*ipf));
|
||||
|
||||
ipf->ipf_hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
|
||||
ipf->ipf_dst = ip->ip_dst;
|
||||
ipf->ipf_src = ip->ip_src;
|
||||
ipf->ipf_tos = ip->ip_tos;
|
||||
|
||||
IPFLOW_LOCK(ipf);
|
||||
} else {
|
||||
LIST_REMOVE(ipf, ipf_next);
|
||||
|
||||
ipflow_addstats(ipf); /* add stats to old route */
|
||||
RTFREE(ipf->ipf_ro.ro_rt); /* clear reference */
|
||||
ipf->ipf_uses = ipf->ipf_last_uses = 0;
|
||||
ipf->ipf_errors = ipf->ipf_dropped = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill in the updated information.
|
||||
*/
|
||||
ipf->ipf_ro = *ro;
|
||||
RT_LOCK(ro->ro_rt);
|
||||
RT_ADDREF(ro->ro_rt);
|
||||
RT_UNLOCK(ro->ro_rt);
|
||||
ipf->ipf_timer = IPFLOW_TIMER;
|
||||
/*
|
||||
* Insert into the approriate bucket of the flow table.
|
||||
*/
|
||||
LIST_INSERT_HEAD(&ipflows[ipf->ipf_hash].ipfh_head, ipf, ipf_next);
|
||||
IPFLOW_UNLOCK(ipf);
|
||||
}
|
||||
|
||||
static void
|
||||
ipflow_init(void)
|
||||
{
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
|
||||
struct ipflow_head *head = &ipflows[idx];
|
||||
LIST_INIT(&head->ipfh_head);
|
||||
mtx_init(&head->ipfh_mtx, "ipflow list head", NULL, MTX_DEF);
|
||||
}
|
||||
}
|
||||
SYSINIT(ipflow, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipflow_init, 0);
|
@ -1,64 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1998 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by the 3am Software Foundry ("3am"). It was developed by Matt Thomas.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the NetBSD
|
||||
* Foundation, Inc. and its contributors.
|
||||
* 4. Neither the name of The NetBSD Foundation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _NETINET_IP_FLOW_H
|
||||
#define _NETINET_IP_FLOW_H
|
||||
|
||||
struct ipflow {
|
||||
LIST_ENTRY(ipflow) ipf_next; /* next ipflow in bucket */
|
||||
struct in_addr ipf_dst; /* destination address */
|
||||
struct in_addr ipf_src; /* source address */
|
||||
|
||||
/* NB: this assumes the size of the list head hash table is <=256 */
|
||||
u_int8_t ipf_hash; /* index in list head table */
|
||||
u_int8_t ipf_tos; /* type-of-service */
|
||||
struct route ipf_ro; /* associated route entry */
|
||||
u_long ipf_uses; /* number of uses in this period */
|
||||
|
||||
int ipf_timer; /* remaining lifetime of this entry */
|
||||
u_long ipf_dropped; /* ENOBUFS returned by if_output */
|
||||
u_long ipf_errors; /* other errors returned by if_output */
|
||||
u_long ipf_last_uses; /* number of uses in last period */
|
||||
};
|
||||
|
||||
struct ipflow_head {
|
||||
LIST_HEAD(ipflowhead, ipflow) ipfh_head;
|
||||
struct mtx ipfh_mtx;
|
||||
};
|
||||
|
||||
#endif
|
@ -360,11 +360,12 @@ ip_forward_cacheinval(void)
|
||||
void
|
||||
ip_input(struct mbuf *m)
|
||||
{
|
||||
struct ip *ip;
|
||||
struct ip *ip = NULL;
|
||||
struct ipq *fp;
|
||||
struct in_ifaddr *ia = NULL;
|
||||
struct ifaddr *ifa;
|
||||
int i, hlen, checkif;
|
||||
int i, checkif, hlen = 0;
|
||||
int ours = 0;
|
||||
u_short sum;
|
||||
struct in_addr pkt_dst;
|
||||
u_int32_t divert_info = 0; /* packet divert/tee info */
|
||||
@ -387,8 +388,18 @@ ip_input(struct mbuf *m)
|
||||
args.divert_rule = 0; /* divert cookie */
|
||||
args.next_hop = NULL;
|
||||
|
||||
/* Grab info from MT_TAG mbufs prepended to the chain. */
|
||||
for (; m && m->m_type == MT_TAG; m = m->m_next) {
|
||||
/*
|
||||
* Grab info from MT_TAG mbufs prepended to the chain.
|
||||
*
|
||||
* XXX: This is ugly. These pseudo mbuf prepend tags should really
|
||||
* be real m_tags. Before these have always been allocated on the
|
||||
* callers stack, so we didn't have to free them. Now with
|
||||
* ip_fastforward they are true mbufs and we have to free them
|
||||
* otherwise we have a leak. Must rewrite ipfw to use m_tags.
|
||||
*/
|
||||
for (; m && m->m_type == MT_TAG;) {
|
||||
struct mbuf *m0;
|
||||
|
||||
switch(m->_m_tag_id) {
|
||||
default:
|
||||
printf("ip_input: unrecognised MT_TAG tag %d\n",
|
||||
@ -406,11 +417,24 @@ ip_input(struct mbuf *m)
|
||||
case PACKET_TAG_IPFORWARD:
|
||||
args.next_hop = (struct sockaddr_in *)m->m_hdr.mh_data;
|
||||
break;
|
||||
|
||||
case PACKET_TAG_IPFASTFWD_OURS:
|
||||
ours = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
m0 = m;
|
||||
m = m->m_next;
|
||||
/* XXX: This is set by ip_fastforward */
|
||||
if (m0->m_nextpkt == (struct mbuf *)1)
|
||||
m_free(m0);
|
||||
}
|
||||
|
||||
M_ASSERTPKTHDR(m);
|
||||
|
||||
if (ours) /* ip_fastforward firewall changed dest to local */
|
||||
goto ours;
|
||||
|
||||
if (args.rule) { /* dummynet already filtered us */
|
||||
ip = mtod(m, struct ip *);
|
||||
hlen = ip->ip_hl << 2;
|
||||
@ -1350,7 +1374,6 @@ ip_slowtimo()
|
||||
}
|
||||
}
|
||||
IPQ_UNLOCK();
|
||||
ipflow_slowtimo();
|
||||
splx(s);
|
||||
}
|
||||
|
||||
@ -1980,10 +2003,8 @@ ip_forward(struct mbuf *m, struct route *ro,
|
||||
if (type)
|
||||
ipstat.ips_redirectsent++;
|
||||
else {
|
||||
if (mcopy) {
|
||||
ipflow_create(ro, mcopy);
|
||||
if (mcopy)
|
||||
m_freem(mcopy);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -920,6 +920,7 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
|
||||
tag.mh_flags = PACKET_TAG_IPFORWARD;
|
||||
tag.mh_data = (caddr_t)args.next_hop;
|
||||
tag.mh_next = m;
|
||||
tag.mh_nextpkt = NULL;
|
||||
|
||||
if (m->m_pkthdr.rcvif == NULL)
|
||||
m->m_pkthdr.rcvif = ifunit("lo0");
|
||||
|
@ -558,6 +558,7 @@ struct mbuf *m_split(struct mbuf *, int, int);
|
||||
#define PACKET_TAG_DIVERT 17 /* divert info */
|
||||
#define PACKET_TAG_IPFORWARD 18 /* ipforward info */
|
||||
#define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */
|
||||
#define PACKET_TAG_IPFASTFWD_OURS 20 /* IP fastforward dropback */
|
||||
|
||||
/* Packet tag routines */
|
||||
struct m_tag *m_tag_alloc(u_int32_t, int, int, int);
|
||||
|
Loading…
Reference in New Issue
Block a user