diff --git a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c index 9d5734d7428a..5ef383e8373b 100644 --- a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c +++ b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c @@ -750,7 +750,7 @@ ipf_fastroute(m0, mpp, fin, fdp) * currently "to " and "to :ip#" are not supported * for IPv6 */ - return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); + return ip6_output(m, NULL, NULL, 0, NULL, NULL); } #endif diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c index 061bdcc85226..957e86615fec 100644 --- a/sys/dev/cxgbe/tom/t4_listen.c +++ b/sys/dev/cxgbe/tom/t4_listen.c @@ -1140,7 +1140,7 @@ get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp, } /* TODO: Multipath */ - if (fib6_lookup_nh_ext(inc->inc_fibnum, inc->inc6_faddr, + if (fib6_lookup_nh_ext(inc->inc_fibnum, &inc->inc6_faddr, 0, 0, 0, &nhu.u.nh6) != 0) return (NULL); ((struct sockaddr_in6 *)dst)->sin6_addr = nhu.u.nh6.nh_addr; diff --git a/sys/net/rt_nhops.c b/sys/net/rt_nhops.c index 23157fe8cf70..4b9624339cee 100644 --- a/sys/net/rt_nhops.c +++ b/sys/net/rt_nhops.c @@ -66,6 +66,10 @@ #include #include #include +#include +#include + +#include #include #include @@ -117,11 +121,20 @@ static void fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst, static void fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst, struct nhop4_basic *pnh4); #endif -#ifdef INET -static void fib6_rte_to_nh_extended(struct rtentry *rte, struct in6_addr dst, +#ifdef INET6 +static void fib6_rte_to_nh_extended(struct rtentry *rte, struct in6_addr *dst, struct nhop6_extended *pnh6); -static void fib6_rte_to_nh_basic(struct rtentry *rte, struct in6_addr dst, +static void fib6_rte_to_nh_basic(struct rtentry *rte, struct in6_addr *dst, struct nhop6_basic *pnh6); +static int fib6_storelladdr(struct ifnet *ifp, struct in6_addr *dst, + int mm_flags, u_char *desten); +static uint16_t fib6_get_ifa(struct rtentry *rte); +static int fib6_lla_to_nh_basic(struct in6_addr *dst, uint32_t scopeid, + struct nhop6_basic *pnh6); +static int fib6_lla_to_nh_extended(struct in6_addr *dst, uint32_t scopeid, + struct nhop6_extended *pnh6); +static int fib6_lla_to_nh(struct in6_addr *dst, uint32_t scopeid, + struct nhop_prepend *nh, struct ifnet **lifp); #endif MALLOC_DEFINE(M_RTFIB, "rtfib", "routing fwd"); @@ -292,8 +305,11 @@ fib4_lookup_prepend(uint32_t fibnum, struct in_addr dst, struct mbuf *m, * Currently all we have is rte ifp. * Simply use it. */ - lifp = rte->rt_ifp; + /* Save interface address ifp */ + lifp = rte->rt_ifa->ifa_ifp; + nh->aifp_idx = lifp->if_index; /* Save both logical and transmit interface indexes */ + lifp = rte->rt_ifp; nh->lifp_idx = lifp->if_index; nh->i.ifp_idx = nh->lifp_idx; @@ -407,6 +423,7 @@ fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst, gw = (struct sockaddr_in *)rt_key(rte); if (gw->sin_addr.s_addr == 0) pnh4->nh_flags |= NHF_DEFAULT; + /* XXX: Set RTF_BROADCAST if GW address is broadcast */ } static void @@ -428,6 +445,7 @@ fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst, gw = (struct sockaddr_in *)rt_key(rte); if (gw->sin_addr.s_addr == 0) pnh4->nh_flags |= NHF_DEFAULT; + /* XXX: Set RTF_BROADCAST if GW address is broadcast */ ia = ifatoia(rte->rt_ifa); pnh4->nh_src = IA_SIN(ia)->sin_addr; @@ -561,19 +579,335 @@ fib6_choose_prepend(uint32_t fibnum, struct nhop_prepend *nh_src, */ } +/* + * Temporary function to copy ethernet address from valid lle + */ +static int +fib6_storelladdr(struct ifnet *ifp, struct in6_addr *dst, int mm_flags, + u_char *desten) +{ + struct llentry *ln; + struct sockaddr_in6 dst_sa; + + if (mm_flags & M_MCAST) { + ETHER_MAP_IPV6_MULTICAST(&dst, desten); + return (0); + } + + memset(&dst_sa, 0, sizeof(dst_sa)); + dst_sa.sin6_family = AF_INET6; + dst_sa.sin6_len = sizeof(dst_sa); + dst_sa.sin6_addr = *dst; + dst_sa.sin6_scope_id = ifp->if_index; + + + /* + * the entry should have been created in nd6_store_lladdr + */ + IF_AFDATA_RLOCK(ifp); + ln = lla_lookup(LLTABLE6(ifp), 0, (struct sockaddr *)&dst_sa); + + /* + * Perform fast path for the following cases: + * 1) lle state is REACHABLE + * 2) lle state is DELAY (NS message sentNS message sent) + * + * Every other case involves lle modification, so we handle + * them separately. + */ + if (ln == NULL || (ln->ln_state != ND6_LLINFO_REACHABLE && + ln->ln_state != ND6_LLINFO_DELAY)) { + if (ln != NULL) + LLE_RUNLOCK(ln); + IF_AFDATA_RUNLOCK(ifp); + return (1); + } + bcopy(&ln->ll_addr, desten, ifp->if_addrlen); + LLE_RUNLOCK(ln); + IF_AFDATA_RUNLOCK(ifp); + + return (0); +} + +int +fib6_lookup_prepend(uint32_t fibnum, struct in6_addr *dst, uint32_t scopeid, + struct mbuf *m, struct nhop_prepend *nh, struct nhop6_extended *nh_ext) +{ + struct radix_node_head *rnh; + struct radix_node *rn; + struct sockaddr_in6 sin6, *gw_sa; + struct in6_addr gw6; + struct rtentry *rte; + struct ifnet *lifp; + struct ether_header *eh; + uint32_t flags; + int error; + + if (IN6_IS_SCOPE_LINKLOCAL(dst)) { + /* Do not lookup link-local addresses in rtable */ + error = fib6_lla_to_nh(dst, scopeid, nh, &lifp); + if (error != 0) + return (error); + /* */ + gw6 = *dst; + goto do_l2; + } + + + KASSERT((fibnum < rt_numfibs), ("fib6_lookup_prepend: bad fibnum")); + rnh = rt_tables_get_rnh(fibnum, AF_INET6); + if (rnh == NULL) + return (ENOENT); + + /* Prepare lookup key */ + memset(&sin6, 0, sizeof(sin6)); + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_addr = *dst; + sin6.sin6_scope_id = scopeid; + sa6_embedscope(&sin6, 0); + + + RADIX_NODE_HEAD_RLOCK(rnh); + rn = rnh->rnh_matchaddr((void *)&sin6, rnh); + rte = RNTORT(rn); + if (rn == NULL || ((rn->rn_flags & RNF_ROOT) != 0) || + RT_LINK_IS_UP(rte->rt_ifp) == 0) { + RADIX_NODE_HEAD_RUNLOCK(rnh); + return (EHOSTUNREACH); + } + + /* Explicitly zero nexthop */ + memset(nh, 0, sizeof(*nh)); + flags = 0; + nh->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp)); + if (rte->rt_flags & RTF_GATEWAY) { + gw_sa = (struct sockaddr_in6 *)rte->rt_gateway; + gw6 = gw_sa->sin6_addr; + in6_clearscope(&gw6); + } else + gw6 = *dst; + /* Set flags */ + flags = fib_rte_to_nh_flags(rte->rt_flags); + gw_sa = (struct sockaddr_in6 *)rt_key(rte); + if (IN6_IS_ADDR_UNSPECIFIED(&gw_sa->sin6_addr)) + flags |= NHF_DEFAULT; + + /* + * TODO: nh L2/L3 resolve. + * Currently all we have is rte ifp. + * Simply use it. + */ + /* Save interface address ifp */ + nh->aifp_idx = fib6_get_ifa(rte); + /* Save both logical and transmit interface indexes */ + lifp = rte->rt_ifp; + nh->lifp_idx = lifp->if_index; + nh->i.ifp_idx = nh->lifp_idx; + + RADIX_NODE_HEAD_RUNLOCK(rnh); + + nh->nh_flags = flags; +do_l2: + /* + * Try to lookup L2 info. + * Do this using separate LLE locks. + * TODO: move this under radix lock. + */ + if (lifp->if_type == IFT_ETHER) { + eh = (struct ether_header *)nh->d.data; + + /* + * Fill in ethernet header. + * It should be already presented if we're + * sending data via known gateway. + */ + error = fib6_storelladdr(lifp, &gw6, m ? m->m_flags : 0, + eh->ether_dhost); + if (error == 0) { + memcpy(&eh->ether_shost, IF_LLADDR(lifp), ETHER_ADDR_LEN); + eh->ether_type = htons(ETHERTYPE_IPV6); + nh->nh_count = ETHER_HDR_LEN; + return (0); + } + } + + /* Notify caller that no L2 info is linked */ + nh->nh_count = 0; + nh->nh_flags |= NHF_L2_INCOMPLETE; + /* ..And save gateway address */ + nh->d.gw6 = gw6; + return (0); +} + +int +fib6_sendmbuf(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m, + struct nhop_prepend *nh) +{ + int error; + + if (nh != NULL && (nh->nh_flags & NHF_L2_INCOMPLETE) == 0) { + + /* + * Fast path case. Most packets should + * be sent from here. + * TODO: Make special ifnet + * 'if_output_frame' handler for that. + */ + struct route_compat rc; + struct ether_header *eh; + rc.ro_flags = AF_INET6 << 8 | RT_NHOP; + rc.ro_nh = nh; + + M_PREPEND(m, nh->nh_count, M_NOWAIT); + if (m == NULL) + return (ENOBUFS); + eh = mtod(m, struct ether_header *); + memcpy(eh, nh->d.data, nh->nh_count); + error = (*ifp->if_output)(ifp, m, + NULL, (struct route *)&rc); + } else { + /* We need to perform ND lookup */ + struct sockaddr_in6 gw_out; + + memset(&gw_out, 0, sizeof(gw_out)); + gw_out.sin6_family = AF_INET6; + gw_out.sin6_len = sizeof(gw_out); + gw_out.sin6_addr = nh->d.gw6; + gw_out.sin6_scope_id = ifp->if_index; + sa6_embedscope(&gw_out, 0); + + error = nd6_output(ifp, origifp, m, &gw_out, NULL); + } + + return (error); +} + +static uint16_t +fib6_get_ifa(struct rtentry *rte) +{ + struct ifnet *ifp; + struct sockaddr_dl *sdl; + + ifp = rte->rt_ifp; + if ((ifp->if_flags & IFF_LOOPBACK) && + rte->rt_gateway->sa_family == AF_LINK) { + sdl = (struct sockaddr_dl *)rte->rt_gateway; + return (sdl->sdl_index); + } + + return (ifp->if_index); +#if 0 + /* IPv6 case */ + /* Alternative way to get interface address ifp */ + /* + * Adjust the "outgoing" interface. If we're going to loop + * the packet back to ourselves, the ifp would be the loopback + * interface. However, we'd rather know the interface associated + * to the destination address (which should probably be one of + * our own addresses.) + */ + if (rt) { + if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) && + (rt->rt_gateway->sa_family == AF_LINK)) + *retifp = + ifnet_byindex(((struct sockaddr_dl *) + rt->rt_gateway)->sdl_index); + } + /* IPv4 case */ + //pnh6->nh_ifp = rte->rt_ifa->ifa_ifp; +#endif +} + +static int +fib6_lla_to_nh_basic(struct in6_addr *dst, uint32_t scopeid, + struct nhop6_basic *pnh6) +{ + struct ifnet *ifp; + + ifp = ifnet_byindex_locked(scopeid); + if (ifp == NULL) + return (ENOENT); + + /* Do explicit nexthop zero unless we're copying it */ + memset(pnh6, 0, sizeof(*pnh6)); + + pnh6->nh_ifp = ifp; + pnh6->nh_mtu = IN6_LINKMTU(ifp); + /* No flags set */ + pnh6->nh_addr = *dst; + + return (0); +} + +static int +fib6_lla_to_nh_extended(struct in6_addr *dst, uint32_t scopeid, + struct nhop6_extended *pnh6) +{ + struct ifnet *ifp; + + ifp = ifnet_byindex_locked(scopeid); + if (ifp == NULL) + return (ENOENT); + + /* Do explicit nexthop zero unless we're copying it */ + memset(pnh6, 0, sizeof(*pnh6)); + + pnh6->nh_ifp = ifp; + pnh6->nh_mtu = IN6_LINKMTU(ifp); + /* No flags set */ + pnh6->nh_addr = *dst; + + return (0); +} + +static int +fib6_lla_to_nh(struct in6_addr *dst, uint32_t scopeid, + struct nhop_prepend *nh, struct ifnet **lifp) +{ + struct ifnet *ifp; + + ifp = ifnet_byindex_locked(scopeid); + if (ifp == NULL) + return (ENOENT); + + /* Do explicit nexthop zero unless we're copying it */ + memset(nh, 0, sizeof(*nh)); + /* No flags set */ + nh->nh_mtu = IN6_LINKMTU(ifp); + + /* Save lifp */ + *lifp = ifp; + + nh->aifp_idx = scopeid; + nh->lifp_idx = scopeid; + /* Check id this is for-us address */ + if (in6_ifawithifp_lla(ifp, dst)) { + if ((ifp = V_loif) != NULL) + nh->lifp_idx = ifp->if_index; + } + + return (0); +} + + static void -fib6_rte_to_nh_basic(struct rtentry *rte, struct in6_addr dst, +fib6_rte_to_nh_basic(struct rtentry *rte, struct in6_addr *dst, struct nhop6_basic *pnh6) { struct sockaddr_in6 *gw; - pnh6->nh_ifp = rte->rt_ifa->ifa_ifp; - pnh6->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu); + /* Do explicit nexthop zero unless we're copying it */ + memset(pnh6, 0, sizeof(*pnh6)); + + pnh6->nh_ifp = ifnet_byindex(fib6_get_ifa(rte)); + + pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp)); if (rte->rt_flags & RTF_GATEWAY) { gw = (struct sockaddr_in6 *)rte->rt_gateway; pnh6->nh_addr = gw->sin6_addr; + in6_clearscope(&pnh6->nh_addr); } else - pnh6->nh_addr = dst; + pnh6->nh_addr = *dst; /* Set flags */ pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags); gw = (struct sockaddr_in6 *)rt_key(rte); @@ -582,19 +916,23 @@ fib6_rte_to_nh_basic(struct rtentry *rte, struct in6_addr dst, } static void -fib6_rte_to_nh_extended(struct rtentry *rte, struct in6_addr dst, +fib6_rte_to_nh_extended(struct rtentry *rte, struct in6_addr *dst, struct nhop6_extended *pnh6) { struct sockaddr_in6 *gw; struct in6_ifaddr *ia; - pnh6->nh_ifp = rte->rt_ifa->ifa_ifp; - pnh6->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu); + /* Do explicit nexthop zero unless we're copying it */ + memset(pnh6, 0, sizeof(*pnh6)); + + pnh6->nh_ifp = ifnet_byindex(fib6_get_ifa(rte)); + pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp)); if (rte->rt_flags & RTF_GATEWAY) { gw = (struct sockaddr_in6 *)rte->rt_gateway; pnh6->nh_addr = gw->sin6_addr; + in6_clearscope(&pnh6->nh_addr); } else - pnh6->nh_addr = dst; + pnh6->nh_addr = *dst; /* Set flags */ pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags); gw = (struct sockaddr_in6 *)rt_key(rte); @@ -602,18 +940,22 @@ fib6_rte_to_nh_extended(struct rtentry *rte, struct in6_addr dst, pnh6->nh_flags |= NHF_DEFAULT; ia = ifatoia6(rte->rt_ifa); - pnh6->nh_src = IA6_SIN6(ia)->sin6_addr; } int -fib6_lookup_nh_basic(uint32_t fibnum, struct in6_addr dst, uint32_t flowid, - struct nhop6_basic *pnh6) +fib6_lookup_nh_basic(uint32_t fibnum, struct in6_addr *dst, uint32_t scopeid, + uint32_t flowid, struct nhop6_basic *pnh6) { struct radix_node_head *rnh; struct radix_node *rn; struct sockaddr_in6 sin6; struct rtentry *rte; + if (IN6_IS_SCOPE_LINKLOCAL(dst)) { + /* Do not lookup link-local addresses in rtable */ + return (fib6_lla_to_nh_basic(dst, scopeid, pnh6)); + } + KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_basic: bad fibnum")); rnh = rt_tables_get_rnh(fibnum, AF_INET6); if (rnh == NULL) @@ -621,7 +963,9 @@ fib6_lookup_nh_basic(uint32_t fibnum, struct in6_addr dst, uint32_t flowid, /* Prepare lookup key */ memset(&sin6, 0, sizeof(sin6)); - sin6.sin6_addr = dst; + sin6.sin6_addr = *dst; + sin6.sin6_scope_id = scopeid; + sa6_embedscope(&sin6, 0); RADIX_NODE_HEAD_RLOCK(rnh); rn = rnh->rnh_matchaddr((void *)&sin6, rnh); @@ -649,7 +993,7 @@ fib6_lookup_nh_basic(uint32_t fibnum, struct in6_addr dst, uint32_t flowid, * - mtu from logical transmit interface will be returned. */ int -fib6_lookup_nh_ext(uint32_t fibnum, struct in6_addr dst, uint32_t scopeid, +fib6_lookup_nh_ext(uint32_t fibnum, struct in6_addr *dst, uint32_t scopeid, uint32_t flowid, uint32_t flags, struct nhop6_extended *pnh6) { struct radix_node_head *rnh; @@ -657,6 +1001,12 @@ fib6_lookup_nh_ext(uint32_t fibnum, struct in6_addr dst, uint32_t scopeid, struct sockaddr_in6 sin6; struct rtentry *rte; + if (IN6_IS_SCOPE_LINKLOCAL(dst)) { + /* Do not lookup link-local addresses in rtable */ + /* XXX: Do lwref on egress ifp */ + return (fib6_lla_to_nh_extended(dst, scopeid, pnh6)); + } + KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_ext: bad fibnum")); rnh = rt_tables_get_rnh(fibnum, AF_INET6); if (rnh == NULL) @@ -665,7 +1015,9 @@ fib6_lookup_nh_ext(uint32_t fibnum, struct in6_addr dst, uint32_t scopeid, /* Prepare lookup key */ memset(&sin6, 0, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); - sin6.sin6_addr = dst; + sin6.sin6_addr = *dst; + sin6.sin6_scope_id = scopeid; + sa6_embedscope(&sin6, 0); RADIX_NODE_HEAD_RLOCK(rnh); rn = rnh->rnh_matchaddr((void *)&sin6, rnh); diff --git a/sys/net/rt_nhops.h b/sys/net/rt_nhops.h index 782ff4a5cb76..f928f4d315ed 100644 --- a/sys/net/rt_nhops.h +++ b/sys/net/rt_nhops.h @@ -74,7 +74,8 @@ struct nhop_prepend { uint16_t ifp_idx; /* Transmit interface index */ uint16_t nhop_idx; /* L2 multipath nhop index */ } i; - uint16_t spare1[3]; + uint16_t aifp_idx; /* Interface address index */ + uint16_t spare1[2]; union { char data[MAX_PREPEND_LEN]; /* data to prepend */ #ifdef INET @@ -102,6 +103,7 @@ struct nhop_prepend { #define NH_LIFP(nh) ifnet_byindex_locked((nh)->lifp_idx) #define NH_TIFP(nh) ifnet_byindex_locked((nh)->i.ifp_idx) +#define NH_AIFP(nh) ifnet_byindex_locked((nh)->aifp_idx) /* L2/L3 recursive nexthop */ struct nhop_multi { @@ -173,7 +175,6 @@ struct nhop6_extended { uint16_t nh_flags; /* nhop flags */ uint8_t spare[4]; struct in6_addr nh_addr; /* GW/DST IPv6 address */ - struct in6_addr nh_src; /* default source IPv6 address */ uint64_t spare2[2]; }; @@ -186,9 +187,10 @@ struct nhopu_extended { struct route_info { struct nhop_prepend *ri_nh; /* Desired nexthop to use */ - struct nhop64_basic *ri_nh_info; /* Get selected route info */ - uint16_t ri_mtu; - uint16_t spare[3]; + struct nhopu_basic *ri_nh_info; /* Get selected route info */ + uint16_t ri_mtu; /* Get selected route MTU */ + uint16_t spare; + uint32_t scopeid; /* Desired scope id to use */ }; struct route_compat { @@ -206,9 +208,9 @@ void fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4); #define NHOP_LOOKUP_REF 0x01 -int fib6_lookup_nh_basic(uint32_t fibnum, struct in6_addr dst, uint32_t flowid, - struct nhop6_basic *pnh6); -int fib6_lookup_nh_ext(uint32_t fibnum, struct in6_addr dst, +int fib6_lookup_nh_basic(uint32_t fibnum, struct in6_addr *dst, + uint32_t scopeid, uint32_t flowid, struct nhop6_basic *pnh6); +int fib6_lookup_nh_ext(uint32_t fibnum, struct in6_addr *dst, uint32_t scopeid, uint32_t flowid, uint32_t flags, struct nhop6_extended *pnh6); void fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6); @@ -228,6 +230,11 @@ int fib4_sendmbuf(struct ifnet *ifp, struct mbuf *m, struct nhop_prepend *nh, void fib6_free_nh_prepend(uint32_t fibnum, struct nhop_prepend *nh); void fib6_choose_prepend(uint32_t fibnum, struct nhop_prepend *nh_src, uint32_t flowid, struct nhop_prepend *nh, struct nhop6_extended *nh_ext); +int fib6_lookup_prepend(uint32_t fibnum, struct in6_addr *dst, uint32_t scopeid, + struct mbuf *m, struct nhop_prepend *nh, struct nhop6_extended *nh_ext); + +int fib6_sendmbuf(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m, + struct nhop_prepend *nh); #define FWD_INET 0 #define FWD_INET6 1 diff --git a/sys/netgraph/ng_ipfw.c b/sys/netgraph/ng_ipfw.c index 092a0416b07e..3287967f88b1 100644 --- a/sys/netgraph/ng_ipfw.c +++ b/sys/netgraph/ng_ipfw.c @@ -271,8 +271,7 @@ ng_ipfw_rcvdata(hook_p hook, item_p item) #endif #ifdef INET6 case IPV6_VERSION >> 4: - return (ip6_output(m, NULL, NULL, 0, NULL, - NULL, NULL)); + return (ip6_output(m, NULL, NULL, 0, NULL, NULL)); #endif } } diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index 17bc0e79f693..3856966ca26e 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -930,7 +930,7 @@ carp_send_ad_locked(struct carp_softc *sc) CARPSTATS_INC(carps_opackets6); carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, - &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); + &sc->sc_carpdev->if_carp->cif_im6o, NULL)); } #endif /* INET6 */ diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index 3e3bc942df9e..a4c9828222ba 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -456,7 +456,7 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, break; #ifdef INET6 case IPV6_VERSION >> 4: - error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); + error = ip6_output(m, NULL, NULL, 0, NULL, NULL); break; #endif } diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 7039b04e7198..f8beeb87a4a5 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -110,9 +110,13 @@ extern struct protosw inetsw[]; * header (with len, off, ttl, proto, tos, src, dst). * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. - * If route ro is present and has ro_rt initialized, route lookup would be - * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, - * then result of route lookup is stored in ro->ro_rt. + * + * If @ri is present: + * - if ri->ri_nh is not null, route will be calculated using ri_nh. + * - if ri->ri_nh_info is set, nhop4_basic route info will be stored on + * successful transmit (error=0). + * - ri->ri_mtu will be set if packet fails to be transmitted due to MTU + * issues * * In the IP forwarding case, the packet will arrive with options already * inserted, so must have a NULL opt pointer. @@ -364,21 +368,12 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route_info *ri, int flags, nh = &local_nh; ifp = NH_LIFP(nh); mtu = nh->nh_mtu; - if (nh->nh_flags & (RTF_HOST|RTF_GATEWAY)) { - /* XXX: Set RTF_BROADCAST if GW address is broadcast */ + if (nh->nh_flags & (RTF_HOST|RTF_GATEWAY)) isbroadcast = (nh->nh_flags & RTF_BROADCAST); - } else + else isbroadcast = in_broadcast(dst, ifp); } - /* - * XXX: Move somewhere to sendit - */ - if (ri != NULL) { - ri->ri_mtu = mtu; - } - - /* Catch a possible divide by zero later. */ KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p", __func__, mtu, nh, (nh != NULL) ? nh->nh_flags : 0, ifp)); @@ -607,6 +602,20 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route_info *ri, int flags, goto again; } + if (ri != NULL) { + ri->ri_mtu = mtu; + if (ri->ri_nh_info != NULL) { + struct nhop4_basic *pnh4; + + pnh4 = &ri->ri_nh_info->u.nh4; + pnh4->nh_ifp = ifp; + pnh4->nh_flags = nh ? nh->nh_flags : 0; + pnh4->nh_mtu = mtu; + /* XXX: This is not always correct. */ + pnh4->nh_addr = dst; + } + } + passout: /* 127/8 must not appear on wire - RFC1122. */ if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || diff --git a/sys/netinet/sctp_os_bsd.h b/sys/netinet/sctp_os_bsd.h index 02179a2dc2a5..860de2879498 100644 --- a/sys/netinet/sctp_os_bsd.h +++ b/sys/netinet/sctp_os_bsd.h @@ -452,9 +452,9 @@ typedef struct rtentry sctp_rtentry_t; if (local_stcb && local_stcb->sctp_ep) \ result = ip6_output(o_pak, \ ((struct in6pcb *)(local_stcb->sctp_ep))->in6p_outputopts, \ - (ro), 0, 0, ifp, NULL); \ + NULL, 0, NULL, NULL); \ else \ - result = ip6_output(o_pak, NULL, (ro), 0, 0, ifp, NULL); \ + result = ip6_output(o_pak, NULL, NULL, 0, NULL, NULL); \ } struct mbuf * diff --git a/sys/netinet/tcp_offload.c b/sys/netinet/tcp_offload.c index 853d42437138..43c054c1fbea 100644 --- a/sys/netinet/tcp_offload.c +++ b/sys/netinet/tcp_offload.c @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #define TCPOUTFLAGS #include #include +#include int registered_toedevs; @@ -86,11 +87,13 @@ tcp_offload_connect(struct socket *so, struct sockaddr *nam) goto done; } else if (af == AF_INET6) { struct sockaddr_in6 *sin6; + struct in6_addr dst; + uint32_t scopeid; sin6 = (struct sockaddr_in6 *)nam; + in6_splitscope(&sin6->sin6_addr, &dst, &scopeid); - if (fib6_lookup_nh_ext(fibnum, - sin6->sin6_addr, sin6->sin6_scope_id, + if (fib6_lookup_nh_ext(fibnum, &dst, scopeid, 0, NHOP_LOOKUP_REF, &nhu_ext.u.nh6) != 0) return (EHOSTUNREACH); diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index b54f37e1f3f7..b05ba61fe128 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1257,9 +1257,9 @@ tcp_output(struct tcpcb *tp) */ #ifdef INET6 if (isipv6) { - struct route_in6 ro; + struct route_info ri; - bzero(&ro, sizeof(ro)); + bzero(&ri, sizeof(ri)); /* * we separately set hoplimit for every segment, since the * user might want to change the value via setsockopt. @@ -1281,13 +1281,12 @@ tcp_output(struct tcpcb *tp) TCP_PROBE5(send, NULL, tp, ip6, tp, th); /* TODO: IPv6 IP6TOS_ECT bit on */ - error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &ro, + error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &ri, ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), - NULL, NULL, tp->t_inpcb); + NULL, tp->t_inpcb); - if (error == EMSGSIZE && ro.ro_rt != NULL) - mtu = ro.ro_rt->rt_mtu; - RO_RTFREE(&ro); + if (error == EMSGSIZE) + mtu = ri.ri_mtu; } #endif /* INET6 */ #if defined(INET) && defined(INET6) @@ -1324,7 +1323,7 @@ tcp_output(struct tcpcb *tp) TCP_PROBE5(send, NULL, tp, ip, tp, th); error = ip_output(m, tp->t_inpcb->inp_options, &ri, - ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0, + ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), NULL, tp->t_inpcb); if (error == EMSGSIZE) diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 1765e7c28ee2..026f195f7486 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -725,7 +725,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, TCP_PROBE5(send, NULL, tp, mtod(m, const char *), tp, nth); #ifdef INET6 if (isipv6) - (void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp); + (void) ip6_output(m, NULL, NULL, ipflags, NULL, inp); #endif /* INET6 */ #if defined(INET) && defined(INET6) else diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 17f85ed38434..83c741b275e1 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -1571,7 +1571,7 @@ syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked) return (error); } #endif - error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); + error = ip6_output(m, NULL, NULL, 0, NULL, NULL); } #endif #if defined(INET6) && defined(INET) diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 33555d9fe980..00f6763f2cb1 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -596,7 +596,7 @@ tcp_twrespond(struct tcptw *tw, int flags) sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0); ip6->ip6_hlim = in6_selecthlim(inp, NULL); error = ip6_output(m, inp->in6p_outputopts, NULL, - (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp); + (tw->tw_so_options & SO_DONTROUTE), NULL, inp); } #endif #if defined(INET6) && defined(INET) diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 2827b9bb34d0..ff23257558cd 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -2142,7 +2142,12 @@ icmp6_reflect(struct mbuf *m, size_t off) int plen; int type, code; struct ifnet *outif = NULL; - struct in6_addr origdst, src, *srcp = NULL; + struct in6_addr origdst, src, dst; + struct route_info ri; + struct nhop6_basic nh6; + uint32_t scopeid; + int e; + /* too short to reflect */ if (off < sizeof(struct ip6_hdr)) { @@ -2206,11 +2211,13 @@ icmp6_reflect(struct mbuf *m, size_t off) * procedure of an outgoing packet of our own, in which case we need * to search in the ifaddr list. */ + memset(&src, 0, sizeof(src)); if (!IN6_IS_ADDR_MULTICAST(&origdst)) { if ((ia = ip6_getdstifaddr(m))) { if (!(ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) - srcp = &ia->ia_addr.sin6_addr; + src = ia->ia_addr.sin6_addr; + ifa_free(&ia->ia_ifa); } else { struct sockaddr_in6 d; @@ -2223,42 +2230,14 @@ icmp6_reflect(struct mbuf *m, size_t off) if (ia && !(ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) { - srcp = &ia->ia_addr.sin6_addr; + src = ia->ia_addr.sin6_addr; + ifa_free(&ia->ia_ifa); } } } - if (srcp == NULL) { - int e; - struct sockaddr_in6 sin6; - struct route_in6 ro; - /* - * This case matches to multicasts, our anycast, or unicasts - * that we do not own. Select a source address based on the - * source address of the erroneous packet. - */ - bzero(&sin6, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(sin6); - sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */ - - bzero(&ro, sizeof(ro)); - e = in6_selectsrc(&sin6, NULL, NULL, &ro, NULL, &outif, &src); - if (ro.ro_rt) - RTFREE(ro.ro_rt); /* XXX: we could use this */ - if (e) { - char ip6buf[INET6_ADDRSTRLEN]; - nd6log((LOG_DEBUG, - "icmp6_reflect: source can't be determined: " - "dst=%s, error=%d\n", - ip6_sprintf(ip6buf, &sin6.sin6_addr), e)); - goto bad; - } - srcp = &src; - } - - ip6->ip6_src = *srcp; + ip6->ip6_src = src; ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; @@ -2271,6 +2250,33 @@ icmp6_reflect(struct mbuf *m, size_t off) } else ip6->ip6_hlim = V_ip6_defhlim; + + /* + * Deembed scope + */ + in6_splitscope(&ip6->ip6_dst, &dst, &scopeid); + + if (IN6_IS_ADDR_UNSPECIFIED(&src)) { + + /* + * This case matches to multicasts, our anycast, or unicasts + * that we do not own. Select a source address based on the + * source address of the erroneous packet. + */ + + e = in6_selectsrc_addr(M_GETFIB(m), &dst, scopeid, &src); + if (e) { + char ip6buf[INET6_ADDRSTRLEN]; + nd6log((LOG_DEBUG, + "icmp6_reflect: source can't be determined: " + "dst=%s, error=%d\n", + ip6_sprintf(ip6buf, &dst), e)); + goto bad; + } + ip6->ip6_src = src; + } + + /* finalize header */ icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), plen); @@ -2281,17 +2287,20 @@ icmp6_reflect(struct mbuf *m, size_t off) m->m_flags &= ~(M_BCAST|M_MCAST); - ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); - if (outif) - icmp6_ifoutstat_inc(outif, type, code); + memset(&ri, 0, sizeof(ri)); + ri.ri_nh_info = (struct nhopu_basic *)&nh6; + ri.scopeid = scopeid; + + e = ip6_output(m, NULL, &ri, 0, NULL, NULL); + if (e == 0) { + /* XXX: Possible use after free */ + outif = nh6.nh_ifp; + //icmp6_ifoutstat_inc(outif, type, code); + } - if (ia != NULL) - ifa_free(&ia->ia_ifa); return; bad: - if (ia != NULL) - ifa_free(&ia->ia_ifa); m_freem(m); return; } @@ -2387,11 +2396,11 @@ icmp6_redirect_input(struct mbuf *m, int off) } { /* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */ - struct nhop6_extended nh_ext; + struct nhop6_basic nh6; - if (fib6_lookup_nh_ext(RT_DEFAULT_FIB, reddst6, 0, 0, 0, &nh_ext) == 0){ + if (fib6_lookup_nh_basic(RT_DEFAULT_FIB, &reddst6, 0, 0, &nh6)==0) { /* XXX: Think about AF_LINK GW */ - if ((nh_ext.nh_flags & NHF_GATEWAY) == 0) { + if ((nh6.nh_flags & NHF_GATEWAY) == 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; no route " "with inet6 gateway found for redirect dst: %s\n", @@ -2399,12 +2408,12 @@ icmp6_redirect_input(struct mbuf *m, int off) goto bad; } - if (bcmp(&src6, &nh_ext.nh_addr, sizeof(struct in6_addr)) != 0){ + if (IN6_ARE_ADDR_EQUAL(&src6, &nh6.nh_addr) == 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "not equal to gw-for-src=%s (must be same): " "%s\n", - ip6_sprintf(ip6buf, &nh_ext.nh_addr), + ip6_sprintf(ip6buf, &nh6.nh_addr), icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } @@ -2526,6 +2535,9 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) u_char *p; struct ifnet *outif = NULL; struct sockaddr_in6 src_sa; + struct route_info ri; + struct nhop6_basic nh6; + int e; icmp6_errcount(ND_REDIRECT, 0); @@ -2784,9 +2796,14 @@ noredhdropt:; m_tag_prepend(m, mtag); } + memset(&ri, 0, sizeof(ri)); + memset(&nh6, 0, sizeof(nh6)); + ri.ri_nh_info = (struct nhopu_basic *)&nh6; + /* send the packet to outside... */ - ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); - if (outif) { + e = ip6_output(m, NULL, &ri, 0, NULL, NULL); + if (e == 0) { + outif = nh6.nh_ifp; icmp6_ifstat_inc(outif, ifs6_out_msg); icmp6_ifstat_inc(outif, ifs6_out_redirect); } diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index 56f9aa199e90..c816e44abfa2 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -2000,6 +2000,33 @@ in6_prefixlen2mask(struct in6_addr *maskp, int len) maskp->s6_addr[bytelen] = maskarray[bitlen - 1]; } +int +in6_ifawithifp_lla(struct ifnet *ifp, struct in6_addr *dst) +{ + struct ifaddr *ifa; + struct in6_ifaddr *ifa6; + struct in6_addr a6; + + KASSERT(IN6_IS_SCOPE_LINKLOCAL(dst), ("Non-linklocal address")); + + a6 = *dst; + in6_setllascope(&a6, ifp); + + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + ifa6 = (struct in6_ifaddr *)ifa; + if (IN6_ARE_ADDR_EQUAL(&a6, &ifa6->ia_addr.sin6_addr)) { + IF_ADDR_RUNLOCK(ifp); + return (1); + } + } + IF_ADDR_RUNLOCK(ifp); + + return (0); +} + /* * return the best address out of the same scope. if no address was * found, return the first valid address from designated IF. diff --git a/sys/netinet6/in6.h b/sys/netinet6/in6.h index 5fbcf5f23d9f..c241d45167c8 100644 --- a/sys/netinet6/in6.h +++ b/sys/netinet6/in6.h @@ -653,6 +653,7 @@ int in6_localaddr(struct in6_addr *); int in6_localip(struct in6_addr *); int in6_addrscope(const struct in6_addr *); struct in6_ifaddr *in6_ifawithifp(struct ifnet *, struct in6_addr *); +int in6_ifawithifp_lla(struct ifnet *, struct in6_addr *); extern void in6_if_up(struct ifnet *); struct sockaddr; extern u_char ip6_protox[]; diff --git a/sys/netinet6/in6_gif.c b/sys/netinet6/in6_gif.c index b792ed57b5cb..6dcc08129c91 100644 --- a/sys/netinet6/in6_gif.c +++ b/sys/netinet6/in6_gif.c @@ -273,9 +273,9 @@ in6_gif_output(struct ifnet *ifp, * it is too painful to ask for resend of inner packet, to achieve * path MTU discovery for encapsulated packets. */ - error = ip6_output(m, 0, &sc->gif_ro6, IPV6_MINMTU, 0, NULL, NULL); + error = ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL); #else - error = ip6_output(m, 0, &sc->gif_ro6, 0, 0, NULL, NULL); + error = ip6_output(m, NULL, NULL, 0, NULL, NULL); #endif if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) && diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 2be2e8366af2..243bae13fb68 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -328,7 +328,6 @@ in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam, { register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; int error = 0; - struct ifnet *ifp = NULL; int scope_ambiguous = 0; struct in6_addr in6a; @@ -358,16 +357,11 @@ in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam, if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0) return (error); - error = in6_selectsrc(sin6, inp->in6p_outputopts, - inp, NULL, inp->inp_cred, &ifp, &in6a); + error = in6_selectsrc_scope(sin6, inp->in6p_outputopts, + inp, inp->inp_cred, scope_ambiguous, &in6a); if (error) return (error); - if (ifp && scope_ambiguous && - (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) { - return(error); - } - /* * Do not update this earlier, in case we return with an error. * diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index ed9e6fdfce0a..61754e61a49c 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -130,12 +130,11 @@ static VNET_DEFINE(struct in6_addrpolicy, defaultaddrpolicy); VNET_DEFINE(int, ip6_prefer_tempaddr) = 0; -static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *, - struct ip6_moptions *, struct route_in6 *, struct ifnet **, - struct rtentry **, int, u_int); -static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *, - struct ip6_moptions *, struct route_in6 *ro, struct ifnet **, - struct ifnet *, u_int); +static int in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock, + struct ip6_pktopts *opts, struct inpcb *inp, struct ucred *cred, + struct ifnet **ifpp, struct in6_addr *srcp); +static int in6_selectif(uint32_t fibnum, struct sockaddr_in6 *, + struct ip6_pktopts *, struct ip6_moptions *, struct ifnet **); static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *); @@ -147,6 +146,74 @@ static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *), static int dump_addrsel_policyent(struct in6_addrpolicy *, void *); static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); +/* + * Selects source address. + * Alters destination scope address if @update_scope is not 0. + * Stores selected address to @srcp. + * Returns 0 on success. + * + * Used by socket-based consumers. + */ +int +in6_selectsrc_scope(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, + struct inpcb *inp, struct ucred *cred, int update_scope, + struct in6_addr *srcp) +{ + struct ifnet *retifp; + uint32_t fibnum; + int error; + + fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB; + retifp = NULL; + + error = in6_selectsrc(fibnum, dstsock, opts, inp, cred, &retifp, srcp); + if (error != 0) + return (error); + + if (retifp == NULL || update_scope == 0) + return (0); + + /* + * Application should provide a proper zone ID or the use of + * default zone IDs should be enabled. Unfortunately, some + * applications do not behave as it should, so we need a + * workaround. Even if an appropriate ID is not determined + * (when it's required), if we can determine the outgoing + * interface. determine the zone ID based on the interface. + */ + error = in6_setscope(&dstsock->sin6_addr, retifp, NULL); + + return (error); +} + +/* + * Select source address based on @fibnum, @dst and @scopeid. + * Stores selected address to @srcp. + * Returns 0 on success. + * + * Used by non-socket based consumers (ND code mostly). + */ +int +in6_selectsrc_addr(uint32_t fibnum, struct in6_addr *dst, uint32_t scopeid, + struct in6_addr *srcp) +{ + struct ifnet *retifp; + struct sockaddr_in6 dst_sa; + int error; + + retifp = NULL; + memset(&dst_sa, 0, sizeof(dst_sa)); + dst_sa.sin6_family = AF_INET6; + dst_sa.sin6_len = sizeof(dst_sa); + dst_sa.sin6_addr = *dst; + dst_sa.sin6_scope_id = scopeid; + sa6_embedscope(&dst_sa, 0); + + error = in6_selectsrc(fibnum, &dst_sa, NULL, NULL, NULL, &retifp, srcp); + + return (error); +} + /* * Return an IPv6 address, which is the most appropriate for a given * destination and user specified options. @@ -175,9 +242,9 @@ static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); goto out; /* XXX: we can't use 'break' here */ \ } while(0) -int -in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, - struct inpcb *inp, struct route_in6 *ro, struct ucred *cred, +static int +in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock, + struct ip6_pktopts *opts, struct inpcb *inp, struct ucred *cred, struct ifnet **ifpp, struct in6_addr *srcp) { struct in6_addr dst, tmp; @@ -210,9 +277,8 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, if (inp != NULL) { INP_LOCK_ASSERT(inp); mopts = inp->in6p_moptions; - } else { + } else mopts = NULL; - } /* * If the source address is explicitly specified by the caller, @@ -226,9 +292,8 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct in6_ifaddr *ia6; /* get the outgoing interface */ - if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp, - (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) - != 0) + error = in6_selectif(fibnum, dstsock, opts, mopts, &ifp); + if (error != 0) return (error); /* @@ -292,8 +357,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * the outgoing interface and the destination address. */ /* get the outgoing interface */ - if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp, - (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0) + if ((error = in6_selectif(fibnum, dstsock, opts, mopts, &ifp)) != 0) return (error); #ifdef DIAGNOSTIC @@ -544,45 +608,30 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } /* - * clone - meaningful only for bsdi and freebsd + * Selects route based on fib/dst and numerous forwarding altering options. + * */ -static int -selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, - struct ip6_moptions *mopts, struct route_in6 *ro, - struct ifnet **retifp, struct rtentry **retrt, int norouteok, u_int fibnum) +int +fib6_selectroute(uint32_t fibnum, struct in6_addr *dst, uint32_t scopeid, + struct nhop_prepend *nh_src, struct mbuf *m, + struct ip6_pktopts *opts, struct ip6_moptions *mopts, + struct nhop_prepend *nh) { int error = 0; + int fill_nhop; struct ifnet *ifp = NULL; - struct rtentry *rt = NULL; struct sockaddr_in6 *sin6_next; struct in6_pktinfo *pi = NULL; - struct in6_addr *dst = &dstsock->sin6_addr; -#if 0 - char ip6buf[INET6_ADDRSTRLEN]; - - if (dstsock->sin6_addr.s6_addr32[0] == 0 && - dstsock->sin6_addr.s6_addr32[1] == 0 && - !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { - printf("in6_selectroute: strange destination %s\n", - ip6_sprintf(ip6buf, &dstsock->sin6_addr)); - } else { - printf("in6_selectroute: destination = %s%%%d\n", - ip6_sprintf(ip6buf, &dstsock->sin6_addr), - dstsock->sin6_scope_id); /* for debug */ - } -#endif + fill_nhop = 0; /* If the caller specify the outgoing interface explicitly, use it. */ if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { /* XXX boundary check is assumed to be already done. */ + ifp = ifnet_byindex(pi->ipi6_ifindex); - if (ifp != NULL && - (norouteok || retrt == NULL || - IN6_IS_ADDR_MULTICAST(dst))) { - /* - * we do not have to check or get the route for - * multicast. - */ + if (ifp != NULL && IN6_IS_ADDR_MULTICAST(dst)) { + fill_nhop = 1; + goto done; } else goto getroute; @@ -594,6 +643,7 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, */ if (IN6_IS_ADDR_MULTICAST(dst) && mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { + fill_nhop = 1; goto done; /* we do not need a route for multicast. */ } @@ -603,8 +653,6 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * use it as the gateway. */ if (opts && opts->ip6po_nexthop) { - struct route_in6 *ron; - struct llentry *la; sin6_next = satosin6(opts->ip6po_nexthop); @@ -614,268 +662,197 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, goto done; } + in6_splitscope(&sin6_next->sin6_addr, dst, &scopeid); + + if (fib6_lookup_prepend(fibnum, dst, scopeid, m, nh, NULL) != 0) { + error = EHOSTUNREACH; + goto done; + } + + /* + * If the next hop is an IPv6 address, then the node identified + * by that address must be a neighbor of the sending host. + */ + if ((nh->nh_flags & (NHF_GATEWAY|NHF_BLACKHOLE|NHF_REJECT)) + != 0) { + error = EHOSTUNREACH; + goto done; + } + + goto done; + } + + /* Do route lookup */ + if (nh_src == NULL) { + error = fib6_lookup_prepend(fibnum, dst, scopeid, m, nh, NULL); + if (error != 0) { + error = EHOSTUNREACH; + goto done; + } + } else + fib6_choose_prepend(fibnum, nh_src, m->m_pkthdr.flowid, nh, + NULL); + + /* Explicitly free nhop here to be consistent with returned results */ + fib6_free_nh_prepend(fibnum, nh); + + + /* + * Check if the outgoing interface conflicts with + * the interface specified by ipi6_ifindex (if specified). + * Note that loopback interface is always okay. + * (this may happen when we are sending a packet to one of + * our own addresses.) + * XXX-ME: this can be simplified by using aifp index. + */ + if (pi != NULL && pi->ipi6_ifindex != 0) { + ifp = NH_LIFP(nh); + if (!(ifp->if_flags & IFF_LOOPBACK) && + ifp->if_index != pi->ipi6_ifindex) { + error = EHOSTUNREACH; + goto done; + } + } + + done: + if (error == EHOSTUNREACH) + IP6STAT_INC(ip6s_noroute); + + if (error != 0) + return (error); + + if (fill_nhop == 0) + return (0); + + /* + * we do not have to check or get the route for + * multicast. However, we need to fill in @nh info + */ + memset(nh, 0, sizeof(*nh)); + nh->nh_flags = NHF_L2_INCOMPLETE; + nh->nh_count = 0; + nh->spare0 = 0; + nh->nh_mtu = IN6_LINKMTU(ifp); + nh->lifp_idx = ifp->if_index; + nh->i.ifp_idx = ifp->if_index; + nh->aifp_idx = ifp->if_index; + nh->d.gw6 = *dst; + /* In future, we will need to do some sort of refcounting */ + + return (0); +} + +static int +in6_selectif(uint32_t fibnum, struct sockaddr_in6 *dstsock, + struct ip6_pktopts *opts, struct ip6_moptions *mopts, + struct ifnet **retifp) +{ + int error = 0; + struct ifnet *ifp = NULL; + struct sockaddr_in6 *sin6_next; + struct in6_pktinfo *pi = NULL; + struct in6_addr dst; + uint32_t scopeid; + struct nhop6_basic nh6; + + /* If the caller specify the outgoing interface explicitly, use it. */ + if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { + /* XXX boundary check is assumed to be already done. */ + + ifp = ifnet_byindex(pi->ipi6_ifindex); + if (ifp != NULL) + goto done; + else + goto getroute; + } + + in6_splitscope(&dstsock->sin6_addr, &dst, &scopeid); + + /* + * If the destination address is a multicast address and the outgoing + * interface for the address is specified by the caller, use it. + */ + if (IN6_IS_ADDR_MULTICAST(&dst) && + mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { + goto done; /* we do not need a route for multicast. */ + } + + getroute: + /* + * If the next hop address for the packet is specified by the caller, + * use it as the gateway. + */ + if (opts && opts->ip6po_nexthop) { + sin6_next = satosin6(opts->ip6po_nexthop); + + /* at this moment, we only support AF_INET6 next hops */ + if (sin6_next->sin6_family != AF_INET6) { + error = EAFNOSUPPORT; /* or should we proceed? */ + goto done; + } + /* * If the next hop is an IPv6 address, then the node identified * by that address must be a neighbor of the sending host. + * XXX: Embedded form? */ - ron = &opts->ip6po_nextroute; - /* - * XXX what do we do here? - * PLZ to be fixing - */ - - - if (ron->ro_rt == NULL) { - in6_rtalloc(ron, fibnum); /* multi path case? */ - if (ron->ro_rt == NULL) { - /* XXX-BZ WT.? */ - if (ron->ro_rt) { - RTFREE(ron->ro_rt); - ron->ro_rt = NULL; - } - error = EHOSTUNREACH; - goto done; - } - } - - rt = ron->ro_rt; - ifp = rt->rt_ifp; - IF_AFDATA_RLOCK(ifp); - la = lla_lookup(LLTABLE6(ifp), 0, (struct sockaddr *)sin6_next); - IF_AFDATA_RUNLOCK(ifp); - if (la != NULL) - LLE_RUNLOCK(la); - else { + in6_splitscope(&sin6_next->sin6_addr, &dst, &scopeid); + if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, &nh6) != 0) { error = EHOSTUNREACH; goto done; } -#if 0 - if ((ron->ro_rt && - (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != - (RTF_UP | RTF_LLINFO)) || - !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr, - &sin6_next->sin6_addr)) { - if (ron->ro_rt) { - RTFREE(ron->ro_rt); - ron->ro_rt = NULL; - } - *satosin6(&ron->ro_dst) = *sin6_next; + + if ((nh6.nh_flags & (NHF_GATEWAY|NHF_BLACKHOLE|NHF_REJECT)) + != 0) { + error = EHOSTUNREACH; + goto done; } - if (ron->ro_rt == NULL) { - in6_rtalloc(ron, fibnum); /* multi path case? */ - if (ron->ro_rt == NULL || - !(ron->ro_rt->rt_flags & RTF_LLINFO)) { - if (ron->ro_rt) { - RTFREE(ron->ro_rt); - ron->ro_rt = NULL; - } - error = EHOSTUNREACH; - goto done; - } - } -#endif /* - * When cloning is required, try to allocate a route to the - * destination so that the caller can store path MTU - * information. + * XXX-ME shouldn't we check ip6po_pktinfo index here as well? */ goto done; } - /* - * Use a cached route if it exists and is valid, else try to allocate - * a new one. Note that we should check the address family of the - * cached destination, in case of sharing the cache with IPv4. - */ - if (ro) { - if (ro->ro_rt && - (!(ro->ro_rt->rt_flags & RTF_UP) || - ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || - !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, - dst))) { - RTFREE(ro->ro_rt); - ro->ro_rt = (struct rtentry *)NULL; - } - if (ro->ro_rt == (struct rtentry *)NULL) { - struct sockaddr_in6 *sa6; - - /* No route yet, so try to acquire one */ - bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); - sa6 = (struct sockaddr_in6 *)&ro->ro_dst; - *sa6 = *dstsock; - sa6->sin6_scope_id = 0; - -#ifdef RADIX_MPATH - rtalloc_mpath_fib((struct route *)ro, - ntohl(sa6->sin6_addr.s6_addr32[3]), fibnum); -#else - ro->ro_rt = in6_rtalloc1((struct sockaddr *) - &ro->ro_dst, 0, 0UL, fibnum); - if (ro->ro_rt) - RT_UNLOCK(ro->ro_rt); -#endif - } - - /* - * do not care about the result if we have the nexthop - * explicitly specified. - */ - if (opts && opts->ip6po_nexthop) - goto done; - - if (ro->ro_rt) { - ifp = ro->ro_rt->rt_ifp; - - if (ifp == NULL) { /* can this really happen? */ - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; - } - } - if (ro->ro_rt == NULL) - error = EHOSTUNREACH; - rt = ro->ro_rt; - - /* - * Check if the outgoing interface conflicts with - * the interface specified by ipi6_ifindex (if specified). - * Note that loopback interface is always okay. - * (this may happen when we are sending a packet to one of - * our own addresses.) - */ - if (ifp && opts && opts->ip6po_pktinfo && - opts->ip6po_pktinfo->ipi6_ifindex) { - if (!(ifp->if_flags & IFF_LOOPBACK) && - ifp->if_index != - opts->ip6po_pktinfo->ipi6_ifindex) { - error = EHOSTUNREACH; - goto done; - } - } - } - - done: - if (ifp == NULL && rt == NULL) { - /* - * This can happen if the caller did not pass a cached route - * nor any other hints. We treat this case an error. - */ + /* Do route lookup */ + if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, &nh6) != 0) { error = EHOSTUNREACH; + goto done; } + + ifp = nh6.nh_ifp; + + /* + * Check if the outgoing interface conflicts with + * the interface specified by ipi6_ifindex (if specified). + * Note that loopback interface is always okay. + * (this may happen when we are sending a packet to one of + * our own addresses.) + * + * XXX: basic_ means we return "proper" interface address. + * + */ + if (opts && opts->ip6po_pktinfo && opts->ip6po_pktinfo->ipi6_ifindex) { + if (!(ifp->if_flags & IFF_LOOPBACK) && + ifp->if_index != opts->ip6po_pktinfo->ipi6_ifindex) { + error = EHOSTUNREACH; + goto done; + } + } + + /* do not use a rejected or black hole route. */ + if ((nh6.nh_flags & (NHF_BLACKHOLE|NHF_REJECT)) != 0) + error = EHOSTUNREACH; + +done: if (error == EHOSTUNREACH) IP6STAT_INC(ip6s_noroute); - if (retifp != NULL) { - *retifp = ifp; - - /* - * Adjust the "outgoing" interface. If we're going to loop - * the packet back to ourselves, the ifp would be the loopback - * interface. However, we'd rather know the interface associated - * to the destination address (which should probably be one of - * our own addresses.) - */ - if (rt) { - if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) && - (rt->rt_gateway->sa_family == AF_LINK)) - *retifp = - ifnet_byindex(((struct sockaddr_dl *) - rt->rt_gateway)->sdl_index); - } - } - - if (retrt != NULL) - *retrt = rt; /* rt may be NULL */ + *retifp = ifp; return (error); } -static int -in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, - struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, - struct ifnet *oifp, u_int fibnum) -{ - int error; - struct route_in6 sro; - struct rtentry *rt = NULL; - - KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__)); - - if (ro == NULL) { - bzero(&sro, sizeof(sro)); - ro = &sro; - } - - if ((error = selectroute(dstsock, opts, mopts, ro, retifp, - &rt, 1, fibnum)) != 0) { - if (ro == &sro && rt && rt == sro.ro_rt) - RTFREE(rt); - /* Help ND. See oifp comment in in6_selectsrc(). */ - if (oifp != NULL && fibnum == RT_DEFAULT_FIB) { - *retifp = oifp; - error = 0; - } - return (error); - } - - /* - * do not use a rejected or black hole route. - * XXX: this check should be done in the L2 output routine. - * However, if we skipped this check here, we'd see the following - * scenario: - * - install a rejected route for a scoped address prefix - * (like fe80::/10) - * - send a packet to a destination that matches the scoped prefix, - * with ambiguity about the scope zone. - * - pick the outgoing interface from the route, and disambiguate the - * scope zone with the interface. - * - ip6_output() would try to get another route with the "new" - * destination, which may be valid. - * - we'd see no error on output. - * Although this may not be very harmful, it should still be confusing. - * We thus reject the case here. - */ - if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { - int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); - - if (ro == &sro && rt && rt == sro.ro_rt) - RTFREE(rt); - return (flags); - } - - if (ro == &sro && rt && rt == sro.ro_rt) - RTFREE(rt); - return (0); -} - -/* - * Public wrapper function to selectroute(). - * - * XXX-BZ in6_selectroute() should and will grow the FIB argument. The - * in6_selectroute_fib() function is only there for backward compat on stable. - */ -int -in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, - struct ip6_moptions *mopts, struct route_in6 *ro, - struct ifnet **retifp, struct rtentry **retrt) -{ - - return (selectroute(dstsock, opts, mopts, ro, retifp, - retrt, 0, RT_DEFAULT_FIB)); -} - -#ifndef BURN_BRIDGES -int -in6_selectroute_fib(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, - struct ip6_moptions *mopts, struct route_in6 *ro, - struct ifnet **retifp, struct rtentry **retrt, u_int fibnum) -{ - - return (selectroute(dstsock, opts, mopts, ro, retifp, - retrt, 0, fibnum)); -} -#endif - /* * Default hop limit selection. The precedence is as follows: * 1. Hoplimit value specified via ioctl. @@ -898,7 +875,7 @@ in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp) fibnum = in6p->inp_inc.inc_fibnum; - if (fib6_lookup_nh_ext(fibnum, in6p->in6p_faddr, 0, 0, + if (fib6_lookup_nh_ext(fibnum, &in6p->in6p_faddr, 0, 0, NHOP_LOOKUP_REF, &nh_ext) == 0) { hlim = ND_IFINFO(nh_ext.nh_ifp)->chlim; fib6_free_nh_ext(fibnum, &nh_ext); diff --git a/sys/netinet6/ip6_mroute.c b/sys/netinet6/ip6_mroute.c index daf43f66875b..9f18f1b0438a 100644 --- a/sys/netinet6/ip6_mroute.c +++ b/sys/netinet6/ip6_mroute.c @@ -1550,7 +1550,7 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m) im6o.im6o_multicast_hlim = ip6->ip6_hlim; im6o.im6o_multicast_loop = 1; error = ip6_output(mb_copy, NULL, NULL, IPV6_FORWARDING, &im6o, - NULL, NULL); + NULL); MRT6_DLOG(DEBUG_XMIT, "mif %u err %d", (uint16_t)(mifp - mif6table), error); @@ -1568,7 +1568,7 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m) dst6.sin6_len = sizeof(struct sockaddr_in6); dst6.sin6_family = AF_INET6; dst6.sin6_addr = ip6->ip6_dst; - ip6_mloopback(ifp, m, &dst6); + ip6_mloopback(ifp, m, AF_INET6); } /* diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 298a0d3bd078..a2e1874312cb 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -105,6 +105,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include + #ifdef IPSEC #include #include @@ -146,8 +148,6 @@ static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, struct ip6_frag **); static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); -static int ip6_getpmtu(struct route_in6 *, struct route_in6 *, - struct ifnet *, struct in6_addr *, u_long *, int *, u_int); static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); @@ -224,41 +224,38 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset) * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one, * which is rt_mtu. - * - * ifpp - XXX: just for statistics */ /* * XXX TODO: no flowid is assigned for outbound flows? */ int ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, - struct route_in6 *ro, int flags, struct ip6_moptions *im6o, - struct ifnet **ifpp, struct inpcb *inp) + struct route_info *ri, int flags, struct ip6_moptions *im6o, + struct inpcb *inp) { struct ip6_hdr *ip6, *mhip6; struct ifnet *ifp, *origifp; struct mbuf *m = m0; struct mbuf *mprev = NULL; int hlen, tlen, len, off; - struct route_in6 ip6route; - struct rtentry *rt = NULL; - struct sockaddr_in6 *dst, src_sa, dst_sa; - struct in6_addr odst; + struct nhop_prepend local_nh, *nh; + struct sockaddr_in6 src_sa, dst_sa; + struct in6_addr dst, odst; int error = 0; - struct in6_ifaddr *ia = NULL; u_long mtu; int alwaysfrag, dontfrag; u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; struct ip6_exthdrs exthdrs; - struct in6_addr finaldst, src0, dst0; + struct in6_addr src0, dst0; u_int32_t zone; - struct route_in6 *ro_pmtu = NULL; int hdrsplit = 0; int sw_csum, tso; + uint32_t scopeid; int needfiblookup; uint32_t fibnum; struct m_tag *fwd_tag = NULL; + nh = NULL; ip6 = mtod(m, struct ip6_hdr *); if (ip6 == NULL) { printf ("ip6 is NULL"); @@ -274,7 +271,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, } } - finaldst = ip6->ip6_dst; bzero(&exthdrs, sizeof(exthdrs)); if (opt) { /* Hop-by-Hop options header */ @@ -435,6 +431,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, IP6STAT_INC(ip6s_localout); +#if 0 /* * Route packet. */ @@ -443,14 +440,15 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, bzero((caddr_t)ro, sizeof(*ro)); } ro_pmtu = ro; - if (opt && opt->ip6po_rthdr) - ro = &opt->ip6po_route; - dst = (struct sockaddr_in6 *)&ro->ro_dst; +#endif #ifdef FLOWTABLE if (ro->ro_rt == NULL) (void )flowtable_lookup(AF_INET6, m, (struct route *)ro); #endif fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); + /* Make compiler happy */ + memset(&dst, 0, sizeof(dst)); + scopeid = 0; again: /* * if specified, try to fill in the traffic class field. @@ -481,32 +479,28 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); - if (ro->ro_rt && fwd_tag == NULL) { - rt = ro->ro_rt; - ifp = ro->ro_rt->rt_ifp; - } else { - if (fwd_tag == NULL) { - bzero(&dst_sa, sizeof(dst_sa)); - dst_sa.sin6_family = AF_INET6; - dst_sa.sin6_len = sizeof(dst_sa); - dst_sa.sin6_addr = ip6->ip6_dst; - } - error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp, - &rt, fibnum); - if (error != 0) { - if (ifp != NULL) - in6_ifstat_inc(ifp, ifs6_out_discard); - goto bad; - } - } - if (rt == NULL) { - /* - * If in6_selectroute() does not return a route entry, - * dst may not have been updated. - */ - *dst = dst_sa; /* XXX */ + if (fwd_tag == NULL) { + in6_splitscope(&ip6->ip6_dst, &dst, &scopeid); } + error = fib6_selectroute(fibnum, &dst, scopeid, ri ? ri->ri_nh : NULL, + m, opt, im6o, &local_nh); + if (error != 0) + goto bad; + + nh = &local_nh; + ifp = NH_LIFP(nh); /* logical transmit interface */ + origifp = NH_AIFP(nh); /* ifp of address associated with route */ + mtu = nh->nh_mtu; + + /* + * Note the difference between @origifp and @ifp + * is in transmit-via-loopback case: if destination + * is our local address, @originifp will point to 'lo0' interface + * while @ifp will point to ifp which given address + * belongs to. + */ + /* * then rt (for unicast) and ifp must be non-NULL valid values. */ @@ -514,17 +508,11 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, /* XXX: the FORWARDING flag can be set for mrouting. */ in6_ifstat_inc(ifp, ifs6_out_request); } - if (rt != NULL) { - ia = (struct in6_ifaddr *)(rt->rt_ifa); - counter_u64_add(rt->rt_pksent, 1); - } - /* * The outgoing interface must be in the zone of source and * destination addresses. */ - origifp = ifp; src0 = ip6->ip6_src; if (in6_setscope(&src0, origifp, &zone)) @@ -548,12 +536,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, goto badscope; } - /* We should use ia_ifp to support the case of - * sending packets to an address of our own. - */ - if (ia != NULL && ia->ia_ifp) - ifp = ia->ia_ifp; - /* scope check is done. */ goto routefound; @@ -565,19 +547,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, goto bad; routefound: - if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { - if (opt && opt->ip6po_nextroute.ro_rt) { - /* - * The nexthop is explicitly specified by the - * application. We assume the next hop is an IPv6 - * address. - */ - dst = (struct sockaddr_in6 *)opt->ip6po_nexthop; - } - else if ((rt->rt_flags & RTF_GATEWAY)) - dst = (struct sockaddr_in6 *)rt->rt_gateway; - } - if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ } else { @@ -601,7 +570,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, * thus deferring a hash lookup and lock acquisition * at the expense of an m_copym(). */ - ip6_mloopback(ifp, m, dst); + ip6_mloopback(ifp, m, AF_INET6); } else { /* * If we are acting as a multicast router, perform @@ -641,19 +610,35 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, m_freem(m); goto done; } + + /* XXX: Check path MTU */ } /* * Fill the outgoing inteface to tell the upper layer * to increment per-interface statistics. */ +#if 0 if (ifpp) *ifpp = ifp; +#endif - /* Determine path MTU. */ - if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu, - &alwaysfrag, fibnum)) != 0) - goto bad; + + /* Check path MTU. */ + alwaysfrag = 0; + if (mtu < IPV6_MMTU) { + + /* + * RFC2460 section 5, last paragraph: + * if we record ICMPv6 too big message with + * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU + * or smaller, with framgent header attached. + * (fragment header is needed regardless from the + * packet size, for translators to identify packets) + */ + alwaysfrag = 1; + mtu = IPV6_MMTU; + } /* * The caller of this function may specify to use the minimum MTU @@ -756,12 +741,18 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, /* See if fib was changed by packet filter. */ if (fibnum != M_GETFIB(m)) { m->m_flags |= M_SKIP_FIREWALL; - fibnum = M_GETFIB(m); - RO_RTFREE(ro); needfiblookup = 1; } - if (needfiblookup) + if (needfiblookup) { + if (nh != NULL) { + fib6_free_nh_prepend(fibnum, nh); + nh = NULL; + } + if (ri != NULL) + ri->ri_nh = NULL; + fibnum = M_GETFIB(m); goto again; + } /* See if local, if yes, send it to netisr. */ if (m->m_flags & M_FASTFWD_OURS) { @@ -782,11 +773,18 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, /* Or forward to some other address? */ if ((m->m_flags & M_IP6_NEXTHOP) && (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { - dst = (struct sockaddr_in6 *)&ro->ro_dst; bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6)); + in6_splitscope(&dst_sa.sin6_addr, &dst, &scopeid); + m->m_flags |= M_SKIP_FIREWALL; m->m_flags &= ~M_IP6_NEXTHOP; m_tag_delete(m, fwd_tag); + if (nh != NULL) { + fib6_free_nh_prepend(fibnum, nh); + nh = NULL; + } + if (ri != NULL) + ri->ri_nh = NULL; goto again; } @@ -835,6 +833,10 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, m->m_pkthdr.csum_flags &= ifp->if_hwassist; tlen = m->m_pkthdr.len; + /* Save MTU */ + if (ri != NULL) + ri->ri_mtu = mtu; + if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso) dontfrag = 1; else @@ -859,7 +861,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, mtu32 = (u_int32_t)mtu; bzero(&ip6cp, sizeof(ip6cp)); ip6cp.ip6c_cmdarg = (void *)&mtu32; - pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst, + /* XXX-ME: what destination should we pass here ? */ + dst_sa.sin6_addr = ip6->ip6_dst; + pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&dst_sa, (void *)&ip6cp); error = EMSGSIZE; @@ -881,7 +885,8 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, m->m_pkthdr.len); ifa_free(&ia6->ia_ifa); } - error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); + + error = fib6_sendmbuf(ifp, origifp, m, nh); goto done; } @@ -912,8 +917,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, * Must be able to put at least 8 bytes per fragment. */ hlen = unfragpartlen; - if (mtu > IPV6_MAXPACKET) - mtu = IPV6_MAXPACKET; len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; if (len < 8) { @@ -1031,13 +1034,15 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, m0 = m->m_nextpkt; m->m_nextpkt = 0; if (error == 0) { +#if 0 /* Record statistics for this interface address. */ if (ia) { counter_u64_add(ia->ia_ifa.ifa_opackets, 1); counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } - error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); +#endif + error = fib6_sendmbuf(ifp, origifp, m, nh); } else m_freem(m); } @@ -1046,10 +1051,8 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, IP6STAT_INC(ip6s_fragmented); done: - if (ro == &ip6route) - RO_RTFREE(ro); - if (ro_pmtu == &ip6route) - RO_RTFREE(ro_pmtu); + if (nh != NULL) + fib6_free_nh_prepend(fibnum, nh); return (error); freehdrs: @@ -1218,86 +1221,6 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, return (0); } -static int -ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, - struct ifnet *ifp, struct in6_addr *dst, u_long *mtup, - int *alwaysfragp, u_int fibnum) -{ - u_int32_t mtu = 0; - int alwaysfrag = 0; - int error = 0; - - if (ro_pmtu != ro) { - /* The first hop and the final destination may differ. */ - struct sockaddr_in6 *sa6_dst = - (struct sockaddr_in6 *)&ro_pmtu->ro_dst; - if (ro_pmtu->ro_rt && - ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 || - !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) { - RTFREE(ro_pmtu->ro_rt); - ro_pmtu->ro_rt = (struct rtentry *)NULL; - } - if (ro_pmtu->ro_rt == NULL) { - bzero(sa6_dst, sizeof(*sa6_dst)); - sa6_dst->sin6_family = AF_INET6; - sa6_dst->sin6_len = sizeof(struct sockaddr_in6); - sa6_dst->sin6_addr = *dst; - - in6_rtalloc(ro_pmtu, fibnum); - } - } - if (ro_pmtu->ro_rt) { - u_int32_t ifmtu; - struct in_conninfo inc; - - bzero(&inc, sizeof(inc)); - inc.inc_flags |= INC_ISIPV6; - inc.inc6_faddr = *dst; - - if (ifp == NULL) - ifp = ro_pmtu->ro_rt->rt_ifp; - ifmtu = IN6_LINKMTU(ifp); - mtu = tcp_hc_getmtu(&inc); - if (mtu) - mtu = min(mtu, ro_pmtu->ro_rt->rt_mtu); - else - mtu = ro_pmtu->ro_rt->rt_mtu; - if (mtu == 0) - mtu = ifmtu; - else if (mtu < IPV6_MMTU) { - /* - * RFC2460 section 5, last paragraph: - * if we record ICMPv6 too big message with - * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU - * or smaller, with framgent header attached. - * (fragment header is needed regardless from the - * packet size, for translators to identify packets) - */ - alwaysfrag = 1; - mtu = IPV6_MMTU; - } else if (mtu > ifmtu) { - /* - * The MTU on the route is larger than the MTU on - * the interface! This shouldn't happen, unless the - * MTU of the interface has been changed after the - * interface was brought up. Change the MTU in the - * route to match the interface MTU (as long as the - * field isn't locked). - */ - mtu = ifmtu; - ro_pmtu->ro_rt->rt_mtu = mtu; - } - } else if (ifp) { - mtu = IN6_LINKMTU(ifp); - } else - error = EHOSTUNREACH; /* XXX */ - - *mtup = mtu; - if (alwaysfragp) - *alwaysfragp = alwaysfrag; - return (error); -} - /* * IP6 socket option processing. */ @@ -1935,9 +1858,7 @@ do { \ { u_long pmtu = 0; struct ip6_mtuinfo mtuinfo; - struct route_in6 sro; - - bzero(&sro, sizeof(sro)); + struct nhop6_basic nh6; if (!(so->so_state & SS_ISCONNECTED)) return (ENOTCONN); @@ -1945,14 +1866,15 @@ do { \ * XXX: we dot not consider the case of source * routing, or optional information to specify * the outgoing interface. + * TODO: embedded, Multipath */ - error = ip6_getpmtu(&sro, NULL, NULL, - &in6p->in6p_faddr, &pmtu, NULL, - so->so_fibnum); - if (sro.ro_rt) - RTFREE(sro.ro_rt); - if (error) + if (fib6_lookup_nh_basic(so->so_fibnum, + &in6p->in6p_faddr, 0, 0, &nh6) != 0) { + error = EHOSTUNREACH; break; + } + + pmtu = nh6.nh_mtu; if (pmtu > IPV6_MAXPACKET) pmtu = IPV6_MAXPACKET; @@ -2313,10 +2235,6 @@ ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) if (optname == -1 || optname == IPV6_TCLASS) pktopt->ip6po_tclass = -1; if (optname == -1 || optname == IPV6_NEXTHOP) { - if (pktopt->ip6po_nextroute.ro_rt) { - RTFREE(pktopt->ip6po_nextroute.ro_rt); - pktopt->ip6po_nextroute.ro_rt = NULL; - } if (pktopt->ip6po_nexthop) free(pktopt->ip6po_nexthop, M_IP6OPT); pktopt->ip6po_nexthop = NULL; @@ -2335,10 +2253,6 @@ ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT); pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; - if (pktopt->ip6po_route.ro_rt) { - RTFREE(pktopt->ip6po_route.ro_rt); - pktopt->ip6po_route.ro_rt = NULL; - } } if (optname == -1 || optname == IPV6_DSTOPTS) { if (pktopt->ip6po_dest2) @@ -2904,7 +2818,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, * pointer that might NOT be &loif -- easier than replicating that code here. */ void -ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst) +ip6_mloopback(struct ifnet *ifp, struct mbuf *m, int family) { struct mbuf *copym; struct ip6_hdr *ip6; @@ -2940,7 +2854,7 @@ ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst) in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); - (void)if_simloop(ifp, copym, dst->sin6_family, 0); + (void)if_simloop(ifp, copym, family, 0); } /* diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index b13d7ea07d5c..018f2ccf818c 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -372,6 +372,8 @@ VNET_DECLARE(int, ip6stealth); extern struct pr_usrreqs rip6_usrreqs; struct sockopt; +struct route_info; +struct nhop_prepend; struct inpcb; @@ -412,11 +414,11 @@ int ip6_sysctl(int *, u_int, void *, size_t *, void *, size_t); void ip6_forward(struct mbuf *, int); -void ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *); +void ip6_mloopback(struct ifnet *, struct mbuf *, int family); int ip6_output(struct mbuf *, struct ip6_pktopts *, - struct route_in6 *, + struct route_info *, int, - struct ip6_moptions *, struct ifnet **, + struct ip6_moptions *, struct inpcb *); int ip6_ctloutput(struct socket *, struct sockopt *); int ip6_raw_ctloutput(struct socket *, struct sockopt *); @@ -445,15 +447,14 @@ int rip6_usrreq(struct socket *, int dest6_input(struct mbuf **, int *, int); int none_input(struct mbuf **, int *, int); -int in6_selectsrc(struct sockaddr_in6 *, struct ip6_pktopts *, - struct inpcb *inp, struct route_in6 *, struct ucred *cred, - struct ifnet **, struct in6_addr *); -int in6_selectroute(struct sockaddr_in6 *, struct ip6_pktopts *, - struct ip6_moptions *, struct route_in6 *, struct ifnet **, - struct rtentry **); -int in6_selectroute_fib(struct sockaddr_in6 *, struct ip6_pktopts *, - struct ip6_moptions *, struct route_in6 *, struct ifnet **, - struct rtentry **, u_int); +int in6_selectsrc_addr(uint32_t, struct in6_addr *, uint32_t, + struct in6_addr *); +int in6_selectsrc_scope(struct sockaddr_in6 *, struct ip6_pktopts *, + struct inpcb *, struct ucred *, int, struct in6_addr *); +int fib6_selectroute(uint32_t fibnum, struct in6_addr *dst, + uint32_t scopeid, struct nhop_prepend *nh_src, struct mbuf *m, + struct ip6_pktopts *opts, struct ip6_moptions *mopts, + struct nhop_prepend *nh); u_int32_t ip6_randomid(void); u_int32_t ip6_randomflowlabel(void); void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset); diff --git a/sys/netinet6/mld6.c b/sys/netinet6/mld6.c index 2e46208ce73b..e75bacded81f 100644 --- a/sys/netinet6/mld6.c +++ b/sys/netinet6/mld6.c @@ -3120,14 +3120,14 @@ mld_dispatch_packet(struct mbuf *m) mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off); type = mld->mld_type; - error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, &im6o, - &oifp, NULL); + error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, &im6o, NULL); if (error) { CTR3(KTR_MLD, "%s: ip6_output(%p) = %d", __func__, m0, error); goto out; } ICMP6STAT_INC(icp6s_outhist[type]); - if (oifp != NULL) { + if (error == 0) { + oifp = ifp; icmp6_ifstat_inc(oifp, ifs6_out_msg); switch (type) { case MLD_LISTENER_REPORT: diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 682e7156d951..7c5018e3d538 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -392,7 +392,6 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, int icmp6len; int maxlen; caddr_t mac; - struct route_in6 ro; if (IN6_IS_ADDR_MULTICAST(taddr6)) return; @@ -415,8 +414,6 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, if (m == NULL) return; - bzero(&ro, sizeof(ro)); - if (daddr6 == NULL || IN6_IS_ADDR_MULTICAST(daddr6)) { m->m_flags |= M_MCAST; im6o.im6o_multicast_ifp = ifp; @@ -493,24 +490,20 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, ifa_free(ifa); } else { int error; - struct sockaddr_in6 dst_sa; - struct in6_addr src_in; - struct ifnet *oifp; + struct in6_addr dst_in, src_in; + uint32_t scopeid; - bzero(&dst_sa, sizeof(dst_sa)); - dst_sa.sin6_family = AF_INET6; - dst_sa.sin6_len = sizeof(dst_sa); - dst_sa.sin6_addr = ip6->ip6_dst; + in6_splitscope(&ip6->ip6_dst, &dst_in, &scopeid); - oifp = ifp; - error = in6_selectsrc(&dst_sa, NULL, - NULL, &ro, NULL, &oifp, &src_in); + error = in6_selectsrc_addr(ifp->if_fib, &dst_in, + scopeid, &src_in); + if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "nd6_ns_output: source can't be " "determined: dst=%s, error=%d\n", - ip6_sprintf(ip6buf, &dst_sa.sin6_addr), + ip6_sprintf(ip6buf, &dst_in), error)); goto bad; } @@ -574,20 +567,14 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, m_tag_prepend(m, mtag); } - ip6_output(m, NULL, &ro, dad ? IPV6_UNSPECSRC : 0, &im6o, NULL, NULL); + ip6_output(m, NULL, NULL, dad ? IPV6_UNSPECSRC : 0, &im6o, NULL); icmp6_ifstat_inc(ifp, ifs6_out_msg); icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit); ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]); - /* We don't cache this route. */ - RO_RTFREE(&ro); - return; bad: - if (ro.ro_rt) { - RTFREE(ro.ro_rt); - } m_freem(m); return; } @@ -956,13 +943,12 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, { struct mbuf *m; struct m_tag *mtag; - struct ifnet *oifp; struct ip6_hdr *ip6; struct nd_neighbor_advert *nd_na; struct ip6_moptions im6o; - struct in6_addr src, daddr6; - struct sockaddr_in6 dst_sa; + struct in6_addr dst, src, daddr6; int icmp6len, maxlen, error; + uint32_t scopeid; caddr_t mac = NULL; struct route_in6 ro; @@ -1020,22 +1006,18 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, flags &= ~ND_NA_FLAG_SOLICITED; } ip6->ip6_dst = daddr6; - bzero(&dst_sa, sizeof(struct sockaddr_in6)); - dst_sa.sin6_family = AF_INET6; - dst_sa.sin6_len = sizeof(struct sockaddr_in6); - dst_sa.sin6_addr = daddr6; /* * Select a source whose scope is the same as that of the dest. */ - bcopy(&dst_sa, &ro.ro_dst, sizeof(dst_sa)); - oifp = ifp; - error = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, &oifp, &src); + in6_splitscope(&daddr6, &dst, &scopeid); + + error = in6_selectsrc_addr(ifp->if_fib, &dst, scopeid, &src); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "nd6_na_output: source can't be " "determined: dst=%s, error=%d\n", - ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error)); + ip6_sprintf(ip6buf, &dst), error)); goto bad; } ip6->ip6_src = src; @@ -1101,20 +1083,14 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, m_tag_prepend(m, mtag); } - ip6_output(m, NULL, &ro, 0, &im6o, NULL, NULL); + ip6_output(m, NULL, NULL, 0, &im6o, NULL); icmp6_ifstat_inc(ifp, ifs6_out_msg); icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert); ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]); - /* We don't cache this route. */ - RO_RTFREE(&ro); - return; bad: - if (ro.ro_rt) { - RTFREE(ro.ro_rt); - } m_freem(m); return; } diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index 34e38e496b78..63c0b0eb254e 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -401,7 +401,6 @@ rip6_output(struct mbuf *m, struct socket *so, ...) u_int plen = m->m_pkthdr.len; int error = 0; struct ip6_pktopts opt, *optp; - struct ifnet *oifp = NULL; int type = 0, code = 0; /* for ICMPv6 output statistics only */ int scope_ambiguous = 0; int use_defzone = 0; @@ -468,8 +467,8 @@ rip6_output(struct mbuf *m, struct socket *so, ...) /* * Source address selection. */ - error = in6_selectsrc(dstsock, optp, in6p, NULL, so->so_cred, - &oifp, &in6a); + error = in6_selectsrc_scope(dstsock, optp, in6p, so->so_cred, + scope_ambiguous, &in6a); if (error) goto bad; error = prison_check_ip6(in6p->inp_cred, &in6a); @@ -477,19 +476,6 @@ rip6_output(struct mbuf *m, struct socket *so, ...) goto bad; ip6->ip6_src = in6a; - if (oifp && scope_ambiguous) { - /* - * Application should provide a proper zone ID or the use of - * default zone IDs should be enabled. Unfortunately, some - * applications do not behave as it should, so we need a - * workaround. Even if an appropriate ID is not determined - * (when it's required), if we can determine the outgoing - * interface. determine the zone ID based on the interface. - */ - error = in6_setscope(&dstsock->sin6_addr, oifp, NULL); - if (error != 0) - goto bad; - } ip6->ip6_dst = dstsock->sin6_addr; /* @@ -504,7 +490,8 @@ rip6_output(struct mbuf *m, struct socket *so, ...) * ip6_plen will be filled in ip6_output, so not fill it here. */ ip6->ip6_nxt = in6p->inp_ip_p; - ip6->ip6_hlim = in6_selecthlim(in6p, oifp); + /* XXX: Get proper HLIM from selectsrc */ + ip6->ip6_hlim = in6_selecthlim(in6p, NULL); /* oifp */ if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 || in6p->in6p_cksum != -1) { @@ -552,10 +539,13 @@ rip6_output(struct mbuf *m, struct socket *so, ...) } } - error = ip6_output(m, optp, NULL, 0, in6p->in6p_moptions, &oifp, in6p); + error = ip6_output(m, optp, NULL, 0, in6p->in6p_moptions, in6p); if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { +/* + * XXX: think about better way of doing interface statistics if (oifp) icmp6_ifoutstat_inc(oifp, type, code); +*/ ICMP6STAT_INC(icp6s_outhist[type]); } else RIP6STAT_INC(rip6s_opackets); @@ -792,7 +782,6 @@ rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) struct inpcb *inp; struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct in6_addr in6a; - struct ifnet *ifp = NULL; int error = 0, scope_ambiguous = 0; inp = sotoinpcb(so); @@ -821,21 +810,14 @@ rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); /* Source address selection. XXX: need pcblookup? */ - error = in6_selectsrc(addr, inp->in6p_outputopts, - inp, NULL, so->so_cred, &ifp, &in6a); + error = in6_selectsrc_scope(addr, inp->in6p_outputopts, + inp, so->so_cred, scope_ambiguous, &in6a); if (error) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (error); } - /* XXX: see above */ - if (ifp && scope_ambiguous && - (error = in6_setscope(&addr->sin6_addr, ifp, NULL)) != 0) { - INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_ripcbinfo); - return (error); - } inp->in6p_faddr = addr->sin6_addr; inp->in6p_laddr = in6a; soisconnected(so); diff --git a/sys/netinet6/scope6.c b/sys/netinet6/scope6.c index 63e925b99536..66250e947a35 100644 --- a/sys/netinet6/scope6.c +++ b/sys/netinet6/scope6.c @@ -383,6 +383,20 @@ sa6_recoverscope(struct sockaddr_in6 *sin6) return 0; } +/* + * Embed interface index for link-local addresses + * + */ +void +in6_setllascope(struct in6_addr *in6, struct ifnet *ifp) +{ + uint32_t zoneid; + + KASSERT(IN6_IS_SCOPE_LINKLOCAL(in6), ("Non-linklocal address")); + zoneid = ifp->if_index; + in6->s6_addr16[1] = htons(zoneid & 0xffff); +} + /* * Determine the appropriate scope zone ID for in6 and ifp. If ret_id is * non NULL, it is set to the zone ID. If the zone ID needs to be embedded @@ -460,6 +474,17 @@ in6_getscope(struct in6_addr *in6) return (0); } +void +in6_splitscope(struct in6_addr *src, struct in6_addr *dst, uint32_t *scopeid) +{ + uint32_t zoneid; + + *dst = *src; + zoneid = ntohs(in6_getscope(dst)); + in6_clearscope(dst); + *scopeid = zoneid; +} + /* * Return pointer to ifnet structure, corresponding to the zone id of * link-local scope. diff --git a/sys/netinet6/scope6_var.h b/sys/netinet6/scope6_var.h index 8a4b6fcf7754..5c9a01d16670 100644 --- a/sys/netinet6/scope6_var.h +++ b/sys/netinet6/scope6_var.h @@ -59,10 +59,13 @@ int sa6_embedscope(struct sockaddr_in6 *, int); int sa6_recoverscope(struct sockaddr_in6 *); int sa6_checkzone(struct sockaddr_in6 *); int in6_setscope(struct in6_addr *, struct ifnet *, u_int32_t *); +void in6_setllascope(struct in6_addr *in6, struct ifnet *ifp); int in6_clearscope(struct in6_addr *); uint16_t in6_getscope(struct in6_addr *); uint32_t in6_getscopezone(const struct ifnet *, int); struct ifnet* in6_getlinkifnet(uint32_t); +void in6_splitscope(struct in6_addr *src, struct in6_addr *dst, + uint32_t *scopeid); #endif /* _KERNEL */ #endif /* _NETINET6_SCOPE6_VAR_H_ */ diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index de79816f0eb9..11a97c55ba76 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -631,7 +631,6 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, struct udphdr *udp6; struct in6_addr *laddr, *faddr, in6a; struct sockaddr_in6 *sin6 = NULL; - struct ifnet *oifp = NULL; int cscov_partial = 0; int scope_ambiguous = 0; u_short fport; @@ -729,15 +728,10 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, } if (!IN6_IS_ADDR_V4MAPPED(faddr)) { - error = in6_selectsrc(sin6, optp, inp, NULL, - td->td_ucred, &oifp, &in6a); + error = in6_selectsrc_scope(sin6, optp, inp, + td->td_ucred, scope_ambiguous, &in6a); if (error) goto release; - if (oifp && scope_ambiguous && - (error = in6_setscope(&sin6->sin6_addr, - oifp, NULL))) { - goto release; - } laddr = &in6a; } else laddr = &inp->in6p_laddr; /* XXX */ @@ -867,7 +861,7 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, UDP_PROBE(send, NULL, inp, ip6, inp, udp6); UDPSTAT_INC(udps_opackets); error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions, - NULL, inp); + inp); break; case AF_INET: error = EAFNOSUPPORT; diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index 1f03196453d9..a64e3f2d5d2b 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -254,7 +254,7 @@ ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr) * We don't need massage, IPv6 header fields are always in * net endian. */ - return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); + return ip6_output(m, NULL, NULL, 0, NULL, NULL); #endif /* INET6 */ } panic("ipsec_process_done"); diff --git a/sys/netpfil/ipfw/ip_dn_io.c b/sys/netpfil/ipfw/ip_dn_io.c index b4cb69040f4a..0b3622a384ca 100644 --- a/sys/netpfil/ipfw/ip_dn_io.c +++ b/sys/netpfil/ipfw/ip_dn_io.c @@ -777,7 +777,7 @@ dummynet_send(struct mbuf *m) break; case DIR_OUT | PROTO_IPV6: - ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL); + ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL); break; #endif diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c index bbc000f6a099..c2438271299b 100644 --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -515,7 +515,8 @@ verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib) { struct nhop6_basic nh6; - if (fib6_lookup_nh_basic(fib, *src, 0, &nh6) != 0) + /* XXX: unembed scope? */ + if (fib6_lookup_nh_basic(fib, src, 0, 0, &nh6) != 0) return (0); /* If ifp is provided, check for equality with route table. */ @@ -563,8 +564,7 @@ send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6) ntohl(tcp->th_seq), ntohl(tcp->th_ack), tcp->th_flags | TH_RST); if (m0 != NULL) - ip6_output(m0, NULL, NULL, 0, NULL, NULL, - NULL); + ip6_output(m0, NULL, NULL, 0, NULL, NULL); } FREE_PKT(m); } else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */ diff --git a/sys/netpfil/ipfw/ip_fw_dynamic.c b/sys/netpfil/ipfw/ip_fw_dynamic.c index e58536228992..7b332f5d1c2b 100644 --- a/sys/netpfil/ipfw/ip_fw_dynamic.c +++ b/sys/netpfil/ipfw/ip_fw_dynamic.c @@ -1309,7 +1309,7 @@ check_dyn_rules(struct ip_fw_chain *chain, ipfw_range_tlv *rt, ip_output(m, NULL, NULL, 0, NULL, NULL); #ifdef INET6 else - ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); + ip6_output(m, NULL, NULL, 0, NULL, NULL); #endif } diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index eb12955e1848..7aeb3a993cb4 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -1378,8 +1378,7 @@ pf_intr(void *v) #endif /* INET */ #ifdef INET6 case PFSE_IP6: - ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL, - NULL); + ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL); break; case PFSE_ICMP6: icmp6_error(pfse->pfse_m, pfse->pfse_icmp_type, @@ -2923,7 +2922,8 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) #ifdef INET6 case AF_INET6: hlen = sizeof(struct ip6_hdr); - if (fib6_lookup_nh_basic(rtableid, addr->v6, 0, &nh.u.nh6) == 0) + if (fib6_lookup_nh_basic(rtableid, &addr->v6, 0, 0, &nh.u.nh6) + == 0) mss = nh.u.nh6.nh_mtu - hlen - sizeof(struct tcphdr); break; #endif /* INET6 */ @@ -5100,7 +5100,8 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, */ if (IN6_IS_SCOPE_EMBED(&addr->v6)) return (1); - if (fib6_lookup_nh_basic(rtableid, addr->v6, 0, &nh.u.nh6) != 0) + if (fib6_lookup_nh_basic(rtableid, &addr->v6, 0, 0, &nh.u.nh6) + != 0) return (0); break; #endif @@ -5360,7 +5361,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (s) PF_STATE_UNLOCK(s); m0->m_flags |= M_SKIP_FIREWALL; - ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); + ip6_output(m0, NULL, NULL, 0, NULL, NULL); return; }