mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-23 16:01:42 +00:00
This patch provides the back end support for equal-cost multi-path
(ECMP) for both IPv4 and IPv6. Previously, multipath route insertion is disallowed. For example, route add -net 192.103.54.0/24 10.9.44.1 route add -net 192.103.54.0/24 10.9.44.2 The second route insertion will trigger an error message of "add net 192.103.54.0/24: gateway 10.2.5.2: route already in table" Multiple default routes can also be inserted. Here is the netstat output: default 10.2.5.1 UGS 0 3074 bge0 => default 10.2.5.2 UGS 0 0 bge0 When multipath routes exist, the "route delete" command requires a specific gateway to be specified or else an error message would be displayed. For example, route delete default would fail and trigger the following error message: "route: writing to routing socket: No such process" "delete net default: not in table" On the other hand, route delete default 10.2.5.2 would be successful: "delete net default: gateway 10.2.5.2" One does not have to specify a gateway if there is only a single route for a particular destination. I need to perform more testings on address aliases and multiple interfaces that have the same IP prefixes. This patch as it stands today is not yet ready for prime time. Therefore, the ECMP code fragments are fully guarded by the RADIX_MPATH macro. Include the "options RADIX_MPATH" in the kernel configuration to enable this feature. Reviewed by: robert, sam, gnn, julian, kmacy
This commit is contained in:
parent
22dd228d5d
commit
e440aed958
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=178167
@ -1670,6 +1670,7 @@ net/ppp_deflate.c optional ppp_deflate
|
||||
net/ppp_tty.c optional ppp
|
||||
net/pfil.c optional ether | inet
|
||||
net/radix.c standard
|
||||
net/radix_mpath.c standard
|
||||
net/raw_cb.c standard
|
||||
net/raw_usrreq.c standard
|
||||
net/route.c standard
|
||||
|
@ -393,6 +393,7 @@ NETATALK opt_atalk.h
|
||||
PPP_BSDCOMP opt_ppp.h
|
||||
PPP_DEFLATE opt_ppp.h
|
||||
PPP_FILTER opt_ppp.h
|
||||
RADIX_MPATH opt_mpath.h
|
||||
SLIP_IFF_OPTS opt_slip.h
|
||||
TCPDEBUG
|
||||
TCP_SIGNATURE opt_inet.h
|
||||
|
@ -48,6 +48,13 @@
|
||||
#include <net/radix.h>
|
||||
#endif
|
||||
|
||||
#include "opt_mpath.h"
|
||||
|
||||
#ifdef RADIX_MPATH
|
||||
#include <net/radix_mpath.h>
|
||||
#endif
|
||||
|
||||
|
||||
static int rn_walktree_from(struct radix_node_head *h, void *a, void *m,
|
||||
walktree_f_t *f, void *w);
|
||||
static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
|
||||
@ -630,6 +637,21 @@ rn_addroute(v_arg, n_arg, head, treenodes)
|
||||
saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
|
||||
if (keyduplicated) {
|
||||
for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
|
||||
#ifdef RADIX_MPATH
|
||||
/* permit multipath, if enabled for the family */
|
||||
if (rn_mpath_capable(head) && netmask == tt->rn_mask) {
|
||||
/*
|
||||
* go down to the end of multipaths, so that
|
||||
* new entry goes into the end of rn_dupedkey
|
||||
* chain.
|
||||
*/
|
||||
do {
|
||||
t = tt;
|
||||
tt = tt->rn_dupedkey;
|
||||
} while (tt && t->rn_mask == tt->rn_mask);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
if (tt->rn_mask == netmask)
|
||||
return (0);
|
||||
if (netmask == 0 ||
|
||||
|
@ -130,6 +130,7 @@ struct radix_node_head {
|
||||
void (*rnh_close) /* do something when the last ref drops */
|
||||
(struct radix_node *rn, struct radix_node_head *head);
|
||||
struct radix_node rnh_nodes[3]; /* empty tree for common case */
|
||||
int rnh_multipath; /* multipath capable ? */
|
||||
#ifdef _KERNEL
|
||||
struct mtx rnh_mtx; /* locks entire radix tree */
|
||||
#endif
|
||||
|
122
sys/net/route.c
122
sys/net/route.c
@ -32,6 +32,7 @@
|
||||
|
||||
#include "opt_inet.h"
|
||||
#include "opt_mrouting.h"
|
||||
#include "opt_mpath.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
@ -44,6 +45,10 @@
|
||||
#include <net/if.h>
|
||||
#include <net/route.h>
|
||||
|
||||
#ifdef RADIX_MPATH
|
||||
#include <net/radix_mpath.h>
|
||||
#endif
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/ip_mroute.h>
|
||||
|
||||
@ -700,6 +705,67 @@ rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
|
||||
}
|
||||
switch (req) {
|
||||
case RTM_DELETE:
|
||||
#ifdef RADIX_MPATH
|
||||
/*
|
||||
* if we got multipath routes, we require users to specify
|
||||
* a matching RTAX_GATEWAY.
|
||||
*/
|
||||
if (rn_mpath_capable(rnh)) {
|
||||
struct rtentry *rto = NULL;
|
||||
|
||||
rn = rnh->rnh_matchaddr(dst, rnh);
|
||||
if (rn == NULL)
|
||||
senderr(ESRCH);
|
||||
rto = rt = RNTORT(rn);
|
||||
rt = rt_mpath_matchgate(rt, gateway);
|
||||
if (!rt)
|
||||
senderr(ESRCH);
|
||||
/*
|
||||
* this is the first entry in the chain
|
||||
*/
|
||||
if (rto == rt) {
|
||||
rn = rn_mpath_next((struct radix_node *)rt);
|
||||
/*
|
||||
* there is another entry, now it's active
|
||||
*/
|
||||
if (rn) {
|
||||
rto = RNTORT(rn);
|
||||
RT_LOCK(rto);
|
||||
rto->rt_flags |= RTF_UP;
|
||||
RT_UNLOCK(rto);
|
||||
} else if (rt->rt_flags & RTF_GATEWAY) {
|
||||
/*
|
||||
* For gateway routes, we need to
|
||||
* make sure that we we are deleting
|
||||
* the correct gateway.
|
||||
* rt_mpath_matchgate() does not
|
||||
* check the case when there is only
|
||||
* one route in the chain.
|
||||
*/
|
||||
if (gateway &&
|
||||
(rt->rt_gateway->sa_len != gateway->sa_len ||
|
||||
memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
|
||||
senderr(ESRCH);
|
||||
}
|
||||
/*
|
||||
* use the normal delete code to remove
|
||||
* the first entry
|
||||
*/
|
||||
goto normal_rtdel;
|
||||
}
|
||||
/*
|
||||
* if the entry is 2nd and on up
|
||||
*/
|
||||
if (!rt_mpath_deldup(rto, rt))
|
||||
panic ("rtrequest1: rt_mpath_deldup");
|
||||
RT_LOCK(rt);
|
||||
RT_ADDREF(rt);
|
||||
rt->rt_flags &= ~RTF_UP;
|
||||
goto deldone; /* done with the RTM_DELETE command */
|
||||
}
|
||||
#endif
|
||||
|
||||
normal_rtdel:
|
||||
/*
|
||||
* Remove the item from the tree and return it.
|
||||
* Complain if it is not there and do no more processing.
|
||||
@ -740,6 +806,7 @@ rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
|
||||
if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
|
||||
ifa->ifa_rtrequest(RTM_DELETE, rt, info);
|
||||
|
||||
deldone:
|
||||
/*
|
||||
* One more rtentry floating around that is not
|
||||
* linked to the routing table. rttrash will be decremented
|
||||
@ -822,6 +889,22 @@ rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
|
||||
rt->rt_ifa = ifa;
|
||||
rt->rt_ifp = ifa->ifa_ifp;
|
||||
|
||||
#ifdef RADIX_MPATH
|
||||
/* do not permit exactly the same dst/mask/gw pair */
|
||||
if (rn_mpath_capable(rnh) &&
|
||||
rt_mpath_conflict(rnh, rt, netmask)) {
|
||||
if (rt->rt_gwroute)
|
||||
RTFREE(rt->rt_gwroute);
|
||||
if (rt->rt_ifa) {
|
||||
IFAFREE(rt->rt_ifa);
|
||||
}
|
||||
Free(rt_key(rt));
|
||||
RT_LOCK_DESTROY(rt);
|
||||
uma_zfree(rtzone, rt);
|
||||
senderr(EEXIST);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
|
||||
rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
|
||||
if (rn == NULL) {
|
||||
@ -1166,7 +1249,7 @@ rtinit(struct ifaddr *ifa, int cmd, int flags)
|
||||
struct mbuf *m = NULL;
|
||||
struct rtentry *rt = NULL;
|
||||
struct rt_addrinfo info;
|
||||
int error;
|
||||
int error=0;
|
||||
|
||||
if (flags & RTF_HOST) {
|
||||
dst = ifa->ifa_dstaddr;
|
||||
@ -1208,10 +1291,32 @@ rtinit(struct ifaddr *ifa, int cmd, int flags)
|
||||
if ((rnh = rt_tables[dst->sa_family]) == NULL)
|
||||
goto bad;
|
||||
RADIX_NODE_HEAD_LOCK(rnh);
|
||||
#ifdef RADIX_MPATH
|
||||
if (rn_mpath_capable(rnh)) {
|
||||
|
||||
rn = rnh->rnh_matchaddr(dst, rnh);
|
||||
if (rn == NULL)
|
||||
error = ESRCH;
|
||||
else {
|
||||
rt = RNTORT(rn);
|
||||
/*
|
||||
* for interface route the rt->rt_gateway is
|
||||
* sockaddr_intf for cloning ARP entries, so
|
||||
* rt_mpath_matchgate must use the interface
|
||||
* address
|
||||
*/
|
||||
rt = rt_mpath_matchgate(rt, ifa->ifa_addr);
|
||||
if (!rt)
|
||||
error = ESRCH;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
error = ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL ||
|
||||
(rn->rn_flags & RNF_ROOT) ||
|
||||
RNTORT(rn)->rt_ifa != ifa ||
|
||||
!sa_equal((struct sockaddr *)rn->rn_key, dst));
|
||||
|
||||
RADIX_NODE_HEAD_UNLOCK(rnh);
|
||||
if (error) {
|
||||
bad:
|
||||
@ -1235,6 +1340,21 @@ rtinit(struct ifaddr *ifa, int cmd, int flags)
|
||||
* notify any listening routing agents of the change
|
||||
*/
|
||||
RT_LOCK(rt);
|
||||
#ifdef RADIX_MPATH
|
||||
/*
|
||||
* in case address alias finds the first address
|
||||
* e.g. ifconfig bge0 192.103.54.246/24
|
||||
* e.g. ifconfig bge0 192.103.54.247/24
|
||||
* the address set in the route is 192.103.54.246
|
||||
* so we need to replace it with 192.103.54.247
|
||||
*/
|
||||
if (memcmp(rt->rt_ifa->ifa_addr, ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
|
||||
IFAFREE(rt->rt_ifa);
|
||||
IFAREF(ifa);
|
||||
rt->rt_ifp = ifa->ifa_ifp;
|
||||
rt->rt_ifa = ifa;
|
||||
}
|
||||
#endif
|
||||
rt_newaddrmsg(cmd, ifa, error, rt);
|
||||
if (cmd == RTM_DELETE) {
|
||||
/*
|
||||
|
@ -97,6 +97,9 @@ struct mbuf;
|
||||
*/
|
||||
#ifndef RNF_NORMAL
|
||||
#include <net/radix.h>
|
||||
#ifdef RADIX_MPATH
|
||||
#include <net/radix_mpath.h>
|
||||
#endif
|
||||
#endif
|
||||
struct rtentry {
|
||||
struct radix_node rt_nodes[2]; /* tree glue, and other values */
|
||||
|
@ -30,6 +30,8 @@
|
||||
* $FreeBSD$
|
||||
*/
|
||||
#include "opt_sctp.h"
|
||||
#include "opt_mpath.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/domain.h>
|
||||
#include <sys/kernel.h>
|
||||
@ -420,6 +422,24 @@ route_output(struct mbuf *m, struct socket *so)
|
||||
RADIX_NODE_HEAD_UNLOCK(rnh);
|
||||
senderr(ESRCH);
|
||||
}
|
||||
#ifdef RADIX_MPATH
|
||||
/*
|
||||
* for RTM_CHANGE/LOCK, if we got multipath routes,
|
||||
* we require users to specify a matching RTAX_GATEWAY.
|
||||
*
|
||||
* for RTM_GET, gate is optional even with multipath.
|
||||
* if gate == NULL the first match is returned.
|
||||
* (no need to call rt_mpath_matchgate if gate == NULL)
|
||||
*/
|
||||
if (rn_mpath_capable(rnh) &&
|
||||
(rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
|
||||
rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
|
||||
if (!rt) {
|
||||
RADIX_NODE_HEAD_UNLOCK(rnh);
|
||||
senderr(ESRCH);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
RT_LOCK(rt);
|
||||
RT_ADDREF(rt);
|
||||
RADIX_NODE_HEAD_UNLOCK(rnh);
|
||||
|
@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include "opt_pf.h"
|
||||
#include "opt_carp.h"
|
||||
#include "opt_sctp.h"
|
||||
#include "opt_mpath.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
@ -51,6 +52,9 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include <net/if.h>
|
||||
#include <net/route.h>
|
||||
#ifdef RADIX_MPATH
|
||||
#include <net/radix_mpath.h>
|
||||
#endif
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_systm.h>
|
||||
@ -352,7 +356,11 @@ struct domain inetdomain = {
|
||||
.dom_name = "internet",
|
||||
.dom_protosw = inetsw,
|
||||
.dom_protoswNPROTOSW = &inetsw[sizeof(inetsw)/sizeof(inetsw[0])],
|
||||
#ifdef RADIX_MPATH
|
||||
.dom_rtattach = rn4_mpath_inithead,
|
||||
#else
|
||||
.dom_rtattach = in_inithead,
|
||||
#endif
|
||||
.dom_rtoffset = 32,
|
||||
.dom_maxrtkey = sizeof(struct sockaddr_in)
|
||||
};
|
||||
|
@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include "opt_ipsec.h"
|
||||
#include "opt_mac.h"
|
||||
#include "opt_mbuf_stress_test.h"
|
||||
#include "opt_mpath.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
@ -54,6 +55,9 @@ __FBSDID("$FreeBSD$");
|
||||
#include <net/netisr.h>
|
||||
#include <net/pfil.h>
|
||||
#include <net/route.h>
|
||||
#ifdef RADIX_MPATH
|
||||
#include <net/radix_mpath.h>
|
||||
#endif
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_systm.h>
|
||||
@ -225,7 +229,12 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
|
||||
* operation (as it is for ARP).
|
||||
*/
|
||||
if (ro->ro_rt == NULL)
|
||||
#ifdef RADIX_MPATH
|
||||
rtalloc_mpath(ro,
|
||||
ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr));
|
||||
#else
|
||||
rtalloc_ign(ro, 0);
|
||||
#endif
|
||||
if (ro->ro_rt == NULL) {
|
||||
ipstat.ips_noroute++;
|
||||
error = EHOSTUNREACH;
|
||||
|
@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include "opt_ipstealth.h"
|
||||
#include "opt_carp.h"
|
||||
#include "opt_sctp.h"
|
||||
#include "opt_mpath.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/socket.h>
|
||||
@ -83,6 +84,9 @@ __FBSDID("$FreeBSD$");
|
||||
#include <net/if.h>
|
||||
#include <net/radix.h>
|
||||
#include <net/route.h>
|
||||
#ifdef RADIX_MPATH
|
||||
#include <net/radix_mpath.h>
|
||||
#endif
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_systm.h>
|
||||
@ -347,7 +351,11 @@ struct domain inet6domain = {
|
||||
.dom_protosw = (struct protosw *)inet6sw,
|
||||
.dom_protoswNPROTOSW = (struct protosw *)
|
||||
&inet6sw[sizeof(inet6sw)/sizeof(inet6sw[0])],
|
||||
#ifdef RADIX_MPATH
|
||||
.dom_rtattach = rn6_mpath_inithead,
|
||||
#else
|
||||
.dom_rtattach = in6_inithead,
|
||||
#endif
|
||||
.dom_rtoffset = offsetof(struct sockaddr_in6, sin6_addr) << 3,
|
||||
.dom_maxrtkey = sizeof(struct sockaddr_in6),
|
||||
.dom_ifattach = in6_domifattach,
|
||||
|
@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include "opt_inet.h"
|
||||
#include "opt_inet6.h"
|
||||
#include "opt_mpath.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
@ -84,6 +85,9 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include <net/if.h>
|
||||
#include <net/route.h>
|
||||
#ifdef RADIX_MPATH
|
||||
#include <net/radix_mpath.h>
|
||||
#endif
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_var.h>
|
||||
@ -568,7 +572,12 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
|
||||
sa6->sin6_scope_id = 0;
|
||||
|
||||
if (clone) {
|
||||
#ifdef RADIX_MPATH
|
||||
rtalloc_mpath((struct route *)ro,
|
||||
ntohl(sa6->sin6_addr.s6_addr32[3]));
|
||||
#else
|
||||
rtalloc((struct route *)ro);
|
||||
#endif
|
||||
} else {
|
||||
ro->ro_rt = rtalloc1(&((struct route *)ro)
|
||||
->ro_dst, 0, 0UL);
|
||||
|
@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include "opt_inet6.h"
|
||||
#include "opt_ipsec.h"
|
||||
#include "opt_carp.h"
|
||||
#include "opt_mpath.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
@ -55,6 +56,9 @@ __FBSDID("$FreeBSD$");
|
||||
#include <net/if_dl.h>
|
||||
#include <net/if_var.h>
|
||||
#include <net/route.h>
|
||||
#ifdef RADIX_MPATH
|
||||
#include <net/radix_mpath.h>
|
||||
#endif
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_var.h>
|
||||
@ -208,13 +212,23 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
|
||||
struct rtentry *rt;
|
||||
struct sockaddr_in6 tsin6;
|
||||
int need_proxy;
|
||||
#ifdef RADIX_MPATH
|
||||
struct route_in6 ro;
|
||||
#endif
|
||||
|
||||
bzero(&tsin6, sizeof tsin6);
|
||||
tsin6.sin6_len = sizeof(struct sockaddr_in6);
|
||||
tsin6.sin6_family = AF_INET6;
|
||||
tsin6.sin6_addr = taddr6;
|
||||
|
||||
#ifdef RADIX_MPATH
|
||||
bzero(&ro, sizeof(ro));
|
||||
ro.ro_dst = tsin6;
|
||||
rtalloc_mpath((struct route *)&ro, RTF_ANNOUNCE);
|
||||
rt = ro.ro_rt;
|
||||
#else
|
||||
rt = rtalloc1((struct sockaddr *)&tsin6, 0, 0);
|
||||
#endif
|
||||
need_proxy = (rt && (rt->rt_flags & RTF_ANNOUNCE) != 0 &&
|
||||
rt->rt_gateway->sa_family == AF_LINK);
|
||||
if (rt)
|
||||
|
Loading…
Reference in New Issue
Block a user