From 0895aec30c96307691f5bbfdc697c697d0c4afb5 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Oct 2008 12:21:21 +0000 Subject: [PATCH] Implement IPv4 source address selection for unbound sockets. For the jail case we are already looping over the interface addresses before falling back to the only IP address of a jail in case of no match. This is in preparation for the upcoming multi-IPv4/v6/no-IP jail patch this change was developed with initially. This also changes the semantics of selecting the IP for processes within a jail as it now uses the same logic as outside the jail (with additional checks) but no longer is on a mutually exclusive code path. Benchmarks had shown no difference at 95.0% confidence for neither the plain nor the jail case (even with the additional overhead). See: http://lists.freebsd.org/pipermail/freebsd-net/2008-September/019531.html Inpsired by a patch from: Yahoo! (partially) Tested by: latest multi-IP jail patch users (implictly) Discussed with: rwatson (general things around this) Reviewed by: mostly silence (feedback from bms) Help with benchmarking from: kris MFC after: 2 months --- sys/netinet/in_pcb.c | 246 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 204 insertions(+), 42 deletions(-) diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index f41296ce7672..ea98a6fba898 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -534,6 +534,205 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) return (0); } +/* + * Do proper source address selection on an unbound socket in case + * of connect. Take jails into account as well. + */ +static int +in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, + struct ucred *cred) +{ + struct in_ifaddr *ia; + struct ifaddr *ifa; + struct sockaddr *sa; + struct sockaddr_in *sin; + struct route sro; + int error; + + KASSERT(laddr != NULL, ("%s: null laddr", __func__)); + + error = 0; + ia = NULL; + bzero(&sro, sizeof(sro)); + + sin = (struct sockaddr_in *)&sro.ro_dst; + sin->sin_family = AF_INET; + sin->sin_len = sizeof(struct sockaddr_in); + sin->sin_addr.s_addr = faddr->s_addr; + + /* + * If route is known our src addr is taken from the i/f, + * else punt. + * + * Find out route to destination. + */ + if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) + in_rtalloc_ign(&sro, RTF_CLONING, inp->inp_inc.inc_fibnum); + + /* + * If we found a route, use the address corresponding to + * the outgoing interface. + * + * Otherwise assume faddr is reachable on a directly connected + * network and try to find a corresponding interface to take + * the source address from. + */ + if (sro.ro_rt == NULL || sro.ro_rt->rt_ifp == NULL) { + struct ifnet *ifp; + + ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin)); + if (ia == NULL) + ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin)); + if (ia == NULL) { + error = ENETUNREACH; + goto done; + } + + if (cred == NULL || !jailed(cred)) { + laddr->s_addr = ia->ia_addr.sin_addr.s_addr; + goto done; + } + + ifp = ia->ia_ifp; + ia = NULL; + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + + sa = ifa->ifa_addr; + if (sa->sa_family != AF_INET) + continue; + sin = (struct sockaddr_in *)sa; + if (htonl(prison_getip(cred)) == sin->sin_addr.s_addr) { + ia = (struct in_ifaddr *)ifa; + break; + } + } + if (ia != NULL) { + laddr->s_addr = ia->ia_addr.sin_addr.s_addr; + goto done; + } + + /* 3. As a last resort return the 'default' jail address. */ + laddr->s_addr = htonl(prison_getip(cred)); + goto done; + } + + /* + * If the outgoing interface on the route found is not + * a loopback interface, use the address from that interface. + * In case of jails do those three steps: + * 1. check if the interface address belongs to the jail. If so use it. + * 2. check if we have any address on the outgoing interface + * belonging to this jail. If so use it. + * 3. as a last resort return the 'default' jail address. + */ + if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) { + + /* If not jailed, use the default returned. */ + if (cred == NULL || !jailed(cred)) { + ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa; + laddr->s_addr = ia->ia_addr.sin_addr.s_addr; + goto done; + } + + /* Jailed. */ + /* 1. Check if the iface address belongs to the jail. */ + sin = (struct sockaddr_in *)sro.ro_rt->rt_ifa->ifa_addr; + if (htonl(prison_getip(cred)) == sin->sin_addr.s_addr) { + ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa; + laddr->s_addr = ia->ia_addr.sin_addr.s_addr; + goto done; + } + + /* + * 2. Check if we have any address on the outgoing interface + * belonging to this jail. + */ + TAILQ_FOREACH(ifa, &sro.ro_rt->rt_ifp->if_addrhead, ifa_link) { + + sa = ifa->ifa_addr; + if (sa->sa_family != AF_INET) + continue; + sin = (struct sockaddr_in *)sa; + if (htonl(prison_getip(cred)) == sin->sin_addr.s_addr) { + ia = (struct in_ifaddr *)ifa; + break; + } + } + if (ia != NULL) { + laddr->s_addr = ia->ia_addr.sin_addr.s_addr; + goto done; + } + + /* 3. As a last resort return the 'default' jail address. */ + laddr->s_addr = htonl(prison_getip(cred)); + goto done; + } + + /* + * The outgoing interface is marked with 'loopback net', so a route + * to ourselves is here. + * Try to find the interface of the destination address and then + * take the address from there. That interface is not necessarily + * a loopback interface. + * In case of jails, check that it is an address of the jail + * and if we cannot find, fall back to the 'default' jail address. + */ + if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { + struct sockaddr_in sain; + + bzero(&sain, sizeof(struct sockaddr_in)); + sain.sin_family = AF_INET; + sain.sin_len = sizeof(struct sockaddr_in); + sain.sin_addr.s_addr = faddr->s_addr; + + ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain))); + if (ia == NULL) + ia = ifatoia(ifa_ifwithnet(sintosa(&sain))); + + if (cred == NULL || !jailed(cred)) { + if (ia == NULL) { + error = ENETUNREACH; + goto done; + } + laddr->s_addr = ia->ia_addr.sin_addr.s_addr; + goto done; + } + + /* Jailed. */ + if (ia != NULL) { + struct ifnet *ifp; + + ifp = ia->ia_ifp; + ia = NULL; + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + + sa = ifa->ifa_addr; + if (sa->sa_family != AF_INET) + continue; + sin = (struct sockaddr_in *)sa; + if (htonl(prison_getip(cred)) == + sin->sin_addr.s_addr) { + ia = (struct in_ifaddr *)ifa; + break; + } + } + if (ia != NULL) { + laddr->s_addr = ia->ia_addr.sin_addr.s_addr; + goto done; + } + } + + /* 3. As a last resort return the 'default' jail address. */ + laddr->s_addr = htonl(prison_getip(cred)); + goto done; + } + +done: + if (sro.ro_rt != NULL) + RTFREE(sro.ro_rt); + return (error); +} + /* * Set up for a connect from a socket to the specified address. * On entry, *laddrp and *lportp should contain the current local @@ -557,8 +756,6 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam, INIT_VNET_INET(inp->inp_vnet); struct sockaddr_in *sin = (struct sockaddr_in *)nam; struct in_ifaddr *ia; - struct sockaddr_in sa; - struct ucred *socred; struct inpcb *oinp; struct in_addr laddr, faddr; u_short lport, fport; @@ -583,17 +780,7 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam, lport = *lportp; faddr = sin->sin_addr; fport = sin->sin_port; - socred = inp->inp_socket->so_cred; - if (laddr.s_addr == INADDR_ANY && jailed(socred)) { - bzero(&sa, sizeof(sa)); - sa.sin_addr.s_addr = htonl(prison_getip(socred)); - sa.sin_len = sizeof(sa); - sa.sin_family = AF_INET; - error = in_pcbbind_setup(inp, (struct sockaddr *)&sa, - &laddr.s_addr, &lport, cred); - if (error) - return (error); - } + if (!TAILQ_EMPTY(&V_in_ifaddrhead)) { /* * If the destination address is INADDR_ANY, @@ -611,35 +798,10 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam, &V_in_ifaddrhead)->ia_broadaddr)->sin_addr; } if (laddr.s_addr == INADDR_ANY) { - ia = NULL; - /* - * If route is known our src addr is taken from the i/f, - * else punt. - * - * Find out route to destination - */ - if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) - ia = ip_rtaddr(faddr, inp->inp_inc.inc_fibnum); - /* - * If we found a route, use the address corresponding to - * the outgoing interface. - * - * Otherwise assume faddr is reachable on a directly connected - * network and try to find a corresponding interface to take - * the source address from. - */ - if (ia == NULL) { - bzero(&sa, sizeof(sa)); - sa.sin_addr = faddr; - sa.sin_len = sizeof(sa); - sa.sin_family = AF_INET; + error = in_pcbladdr(inp, &faddr, &laddr, cred); + if (error) + return (error); - ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa))); - if (ia == NULL) - ia = ifatoia(ifa_ifwithnet(sintosa(&sa))); - if (ia == NULL) - return (ENETUNREACH); - } /* * If the destination address is multicast and an outgoing * interface has been set as a multicast option, use the @@ -658,9 +820,9 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam, break; if (ia == NULL) return (EADDRNOTAVAIL); + laddr = ia->ia_addr.sin_addr; } } - laddr = ia->ia_addr.sin_addr; } oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport,