diff --git a/sys/net/route.c b/sys/net/route.c index 9673a7abb2e1..64f8c7a7ff20 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)route.c 8.2 (Berkeley) 11/15/93 - * $Id$ + * $Id: route.c,v 1.3 1994/08/02 07:46:40 davidg Exp $ */ #include @@ -273,7 +273,12 @@ rtioctl(req, data, p) caddr_t data; struct proc *p; { +#ifdef MULTICAST + /* Multicast goop, grrr... */ + return mrt_ioctl(cmd, data, p); +#else return (EOPNOTSUPP); +#endif } struct ifaddr * diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c index e39d2d9945d1..ca1db1ea69d2 100644 --- a/sys/netinet/igmp.c +++ b/sys/netinet/igmp.c @@ -35,11 +35,17 @@ * SUCH DAMAGE. * * @(#)igmp.c 8.1 (Berkeley) 7/19/93 - * $Id$ + * $Id: igmp.c,v 1.3 1994/08/02 07:48:04 davidg Exp $ */ -/* Internet Group Management Protocol (IGMP) routines. */ - +/* + * Internet Group Management Protocol (IGMP) routines. + * + * Written by Steve Deering, Stanford, May 1988. + * Modified by Rosen Sharma, Stanford, Aug 1994. + * + * MULTICAST 1.4 + */ #include #include @@ -60,10 +66,14 @@ extern struct ifnet loif; +struct igmpstat igmpstat; + static int igmp_timers_are_running = 0; static u_long igmp_all_hosts_group; +static struct router_info *Head = 0; -static void igmp_sendreport __P((struct in_multi *)); +static void igmp_sendpkt(struct in_multi *, int); +static void igmp_sendleave(struct in_multi *); void igmp_init() @@ -72,6 +82,73 @@ igmp_init() * To avoid byte-swapping the same value over and over again. */ igmp_all_hosts_group = htonl(INADDR_ALLHOSTS_GROUP); + Head = (struct router_info *) 0; +} + +int +fill_rti(inm) + struct in_multi *inm; +{ + register struct router_info *rti = Head; + +#ifdef IGMP_DEBUG + printf("[igmp.c, _fill_rti] --> entering \n"); +#endif + while (rti) { + if (rti->ifp == inm->inm_ifp){ /* ? is it ok to compare */ + /* pointers */ + inm->inm_rti = rti; +#ifdef IGMP_DEBUG + printf("[igmp.c, _fill_rti] --> found old entry \n"); +#endif + if (rti->type == IGMP_OLD_ROUTER) + return IGMP_HOST_MEMBERSHIP_REPORT; + else + return IGMP_HOST_NEW_MEMBERSHIP_REPORT; + } + rti = rti->next; + } + MALLOC(rti, struct router_info *, sizeof *rti, M_MRTABLE, M_NOWAIT); + rti->ifp = inm->inm_ifp; + rti->type = IGMP_NEW_ROUTER; + rti->time = IGMP_AGE_THRESHOLD; + rti->next = Head; + Head = rti; + inm->inm_rti = rti; +#ifdef IGMP_DEBUG + printf("[igmp.c, _fill_rti] --> created new entry \n"); +#endif + return IGMP_HOST_NEW_MEMBERSHIP_REPORT; +} + +struct router_info * +find_rti(ifp) + struct ifnet *ifp; +{ + register struct router_info *rti = Head; + +#ifdef IGMP_DEBUG + printf("[igmp.c, _find_rti] --> entering \n"); +#endif + while (rti) { + if (rti->ifp == ifp){ /* ? is it ok to compare pointers */ +#ifdef IGMP_DEBUG + printf("[igmp.c, _find_rti] --> found old entry \n"); +#endif + return rti; + } + rti = rti->next; + } + MALLOC(rti, struct router_info *, sizeof *rti, M_MRTABLE, M_NOWAIT); + rti->ifp = ifp; + rti->type = IGMP_NEW_ROUTER; + rti->time = IGMP_AGE_THRESHOLD; + rti->next = Head; + Head = rti; +#ifdef IGMP_DEBUG + printf("[igmp.c, _find_rti] --> created an entry \n"); +#endif + return rti; } void @@ -87,6 +164,9 @@ igmp_input(m, iphlen) register struct in_multi *inm; register struct in_ifaddr *ia; struct in_multistep step; + struct router_info *rti; + + static int timer; /** timer value in the igmp query header **/ ++igmpstat.igps_rcv_total; @@ -121,7 +201,10 @@ igmp_input(m, iphlen) } m->m_data -= iphlen; m->m_len += iphlen; + ip = mtod(m, struct ip *); + timer = ntohs(igmp->igmp_code); + rti = find_rti(ifp); switch (igmp->igmp_type) { @@ -131,29 +214,127 @@ igmp_input(m, iphlen) if (ifp == &loif) break; - if (ip->ip_dst.s_addr != igmp_all_hosts_group) { - ++igmpstat.igps_rcv_badqueries; - m_freem(m); - return; - } + if (igmp->igmp_code == 0) { + if (ip->ip_dst.s_addr != igmp_all_hosts_group) { + ++igmpstat.igps_rcv_badqueries; + m_freem(m); + return; + } - /* - * Start the timers in all of our membership records for - * the interface on which the query arrived, except those - * that are already running and those that belong to the - * "all-hosts" group. - */ - IN_FIRST_MULTI(step, inm); - while (inm != NULL) { - if (inm->inm_ifp == ifp && inm->inm_timer == 0 && - inm->inm_addr.s_addr != igmp_all_hosts_group) { - inm->inm_timer = - IGMP_RANDOM_DELAY(inm->inm_addr); - igmp_timers_are_running = 1; + /* + * Start the timers in all of our membership records for + * the interface on which the query arrived, except those + * that are already running and those that belong to the + * "all-hosts" group. + */ + IN_FIRST_MULTI(step, inm); + while (inm != NULL) { + if (inm->inm_ifp == ifp + && inm->inm_timer == 0 + && inm->inm_addr.s_addr + != igmp_all_hosts_group) { + + inm->inm_state = IGMP_DELAYING_MEMBER; + inm->inm_timer = IGMP_RANDOM_DELAY( + IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ ); + + igmp_timers_are_running = 1; + } + IN_NEXT_MULTI(step, inm); + } + } else { + /* + ** New Router + */ + + if (ip->ip_dst.s_addr != igmp_all_hosts_group) { + if (!(m->m_flags & M_MCAST)) { + ++igmpstat.igps_rcv_badqueries; + m_freem(m); + return; + } + } + if (ip->ip_dst.s_addr == igmp_all_hosts_group) { + + /* + * - Start the timers in all of our membership records + * for the interface on which the query arrived + * excl. those that belong to the "all-hosts" group. + * - For timers already running check if they need to + * be reset. + * - Use the igmp->igmp_code filed as the maximum + * delay possible + */ + IN_FIRST_MULTI(step, inm); + while (inm != NULL){ + switch(inm->inm_state){ + case IGMP_IDLE_MEMBER: + case IGMP_LAZY_MEMBER: + case IGMP_AWAKENING_MEMBER: + if (inm->inm_ifp == ifp && + inm->inm_addr.s_addr != + igmp_all_hosts_group) { + inm->inm_timer = IGMP_RANDOM_DELAY(timer); + igmp_timers_are_running = 1; + inm->inm_state = IGMP_DELAYING_MEMBER; + } + break; + case IGMP_DELAYING_MEMBER: + if (inm->inm_ifp == ifp && + (inm->inm_timer > + timer * PR_FASTHZ / IGMP_TIMER_SCALE) + && + inm->inm_addr.s_addr != + igmp_all_hosts_group) { + inm->inm_timer = IGMP_RANDOM_DELAY(timer); + igmp_timers_are_running = 1; + inm->inm_state = IGMP_DELAYING_MEMBER; + } + break; + case IGMP_SLEEPING_MEMBER: + inm->inm_state = IGMP_AWAKENING_MEMBER; + break; + } + IN_NEXT_MULTI(step, inm); + } + } else { + /* + ** group specific query + */ + + IN_FIRST_MULTI(step, inm); + while (inm != NULL) { + if (inm->inm_addr.s_addr == ip->ip_dst.s_addr) { + switch(inm->inm_state ){ + case IGMP_IDLE_MEMBER: + case IGMP_LAZY_MEMBER: + case IGMP_AWAKENING_MEMBER: + inm->inm_state = IGMP_DELAYING_MEMBER; + if (inm->inm_ifp == ifp ) { + inm->inm_timer = IGMP_RANDOM_DELAY(timer); + igmp_timers_are_running = 1; + inm->inm_state = IGMP_DELAYING_MEMBER; + } + break; + case IGMP_DELAYING_MEMBER: + inm->inm_state = IGMP_DELAYING_MEMBER; + if (inm->inm_ifp == ifp && + (inm->inm_timer > + timer * PR_FASTHZ / IGMP_TIMER_SCALE) ) { + inm->inm_timer = IGMP_RANDOM_DELAY(timer); + igmp_timers_are_running = 1; + inm->inm_state = IGMP_DELAYING_MEMBER; + } + break; + case IGMP_SLEEPING_MEMBER: + inm->inm_state = IGMP_AWAKENING_MEMBER; + break; + } } IN_NEXT_MULTI(step, inm); } - + } + } break; case IGMP_HOST_MEMBERSHIP_REPORT: @@ -193,7 +374,80 @@ igmp_input(m, iphlen) ++igmpstat.igps_rcv_ourreports; } + if (inm != NULL) { + inm->inm_timer = 0; + ++igmpstat.igps_rcv_ourreports; + + switch(inm->inm_state){ + case IGMP_IDLE_MEMBER: + case IGMP_LAZY_MEMBER: + case IGMP_AWAKENING_MEMBER: + case IGMP_SLEEPING_MEMBER: + inm->inm_state = IGMP_SLEEPING_MEMBER; + break; + case IGMP_DELAYING_MEMBER: + /** check this out - this was if (oldrouter) **/ + if (inm->inm_rti->type == IGMP_OLD_ROUTER) + inm->inm_state = IGMP_LAZY_MEMBER; + else inm->inm_state = IGMP_SLEEPING_MEMBER; + break; + } + } + break; + + case IGMP_HOST_NEW_MEMBERSHIP_REPORT: + /* + * an new report + */ + ++igmpstat.igps_rcv_reports; + + if (ifp == &loif) + break; + + if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) || + igmp->igmp_group.s_addr != ip->ip_dst.s_addr) { + ++igmpstat.igps_rcv_badreports; + m_freem(m); + return; + } + + /* + * KLUDGE: if the IP source address of the report has an + * unspecified (i.e., zero) subnet number, as is allowed for + * a booting host, replace it with the correct subnet number + * so that a process-level multicast routing demon can + * determine which subnet it arrived from. This is necessary + * to compensate for the lack of any way for a process to + * determine the arrival interface of an incoming packet. + */ + if ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) == 0) { + IFP_TO_IA(ifp, ia); + if (ia) ip->ip_src.s_addr = htonl(ia->ia_subnet); + } + + /* + * If we belong to the group being reported, stop + * our timer for that group. + */ + IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm); + if (inm != NULL) { + inm->inm_timer = 0; + ++igmpstat.igps_rcv_ourreports; + + switch(inm->inm_state){ + case IGMP_DELAYING_MEMBER: + case IGMP_IDLE_MEMBER: + inm->inm_state = IGMP_LAZY_MEMBER; + break; + case IGMP_AWAKENING_MEMBER: + inm->inm_state = IGMP_LAZY_MEMBER; + break; + case IGMP_LAZY_MEMBER: + case IGMP_SLEEPING_MEMBER: + break; + } + } } /* @@ -209,12 +463,16 @@ igmp_joingroup(inm) { register int s = splnet(); + inm->inm_state = IGMP_IDLE_MEMBER; + if (inm->inm_addr.s_addr == igmp_all_hosts_group || inm->inm_ifp == &loif) inm->inm_timer = 0; else { - igmp_sendreport(inm); - inm->inm_timer = IGMP_RANDOM_DELAY(inm->inm_addr); + igmp_sendpkt(inm,fill_rti(inm)); + inm->inm_timer = IGMP_RANDOM_DELAY( + IGMP_MAX_HOST_REPORT_DELAY*PR_FASTHZ); + inm->inm_state = IGMP_DELAYING_MEMBER; igmp_timers_are_running = 1; } splx(s); @@ -227,6 +485,19 @@ igmp_leavegroup(inm) /* * No action required on leaving a group. */ + switch(inm->inm_state){ + case IGMP_DELAYING_MEMBER: + case IGMP_IDLE_MEMBER: + if (!(inm->inm_addr.s_addr == igmp_all_hosts_group || + inm->inm_ifp == &loif)) + if (inm->inm_rti->type != IGMP_OLD_ROUTER) + igmp_sendleave(inm); + break; + case IGMP_LAZY_MEMBER: + case IGMP_AWAKENING_MEMBER: + case IGMP_SLEEPING_MEMBER: + break; + } } void @@ -250,7 +521,13 @@ igmp_fasttimo() if (inm->inm_timer == 0) { /* do nothing */ } else if (--inm->inm_timer == 0) { - igmp_sendreport(inm); + if (inm->inm_state == IGMP_DELAYING_MEMBER) { + if (inm->inm_rti->type == IGMP_OLD_ROUTER) + igmp_sendpkt(inm, IGMP_HOST_MEMBERSHIP_REPORT); + else + igmp_sendpkt(inm, IGMP_HOST_NEW_MEMBERSHIP_REPORT); + inm->inm_state = IGMP_IDLE_MEMBER; + } } else { igmp_timers_are_running = 1; } @@ -259,57 +536,93 @@ igmp_fasttimo() splx(s); } -static void -igmp_sendreport(inm) - register struct in_multi *inm; +void +igmp_slowtimo() { - register struct mbuf *m; - register struct igmp *igmp; - register struct ip *ip; - register struct ip_moptions *imo; - struct ip_moptions simo; + int s = splnet(); + register struct router_info *rti = Head; - MGETHDR(m, M_DONTWAIT, MT_HEADER); - if (m == NULL) - return; - /* - * Assume max_linkhdr + sizeof(struct ip) + IGMP_MINLEN - * is smaller than mbuf size returned by MGETHDR. - */ - m->m_data += max_linkhdr; - m->m_len = sizeof(struct ip) + IGMP_MINLEN; - m->m_pkthdr.len = sizeof(struct ip) + IGMP_MINLEN; - - ip = mtod(m, struct ip *); - ip->ip_tos = 0; - ip->ip_len = sizeof(struct ip) + IGMP_MINLEN; - ip->ip_off = 0; - ip->ip_p = IPPROTO_IGMP; - ip->ip_src.s_addr = INADDR_ANY; - ip->ip_dst = inm->inm_addr; - - igmp = (struct igmp *)(ip + 1); - igmp->igmp_type = IGMP_HOST_MEMBERSHIP_REPORT; - igmp->igmp_code = 0; - igmp->igmp_group = inm->inm_addr; - igmp->igmp_cksum = 0; - igmp->igmp_cksum = in_cksum(m, IGMP_MINLEN); - - imo = &simo; - bzero((caddr_t)imo, sizeof(*imo)); - imo->imo_multicast_ifp = inm->inm_ifp; - imo->imo_multicast_ttl = 1; - /* - * Request loopback of the report if we are acting as a multicast - * router, so that the process-level routing demon can hear it. - */ -#ifdef MROUTING - { - extern struct socket *ip_mrouter; - imo->imo_multicast_loop = (ip_mrouter != NULL); - } +#ifdef IGMP_DEBUG + printf("[igmp.c,_slowtimo] -- > entering \n"); #endif - ip_output(m, NULL, NULL, 0, imo); - - ++igmpstat.igps_snd_reports; + while (rti) { + rti->time ++; + if (rti->time >= IGMP_AGE_THRESHOLD){ + rti->type = IGMP_NEW_ROUTER; + rti->time = IGMP_AGE_THRESHOLD; + } + rti = rti->next; + } +#ifdef IGMP_DEBUG + printf("[igmp.c,_slowtimo] -- > exiting \n"); +#endif + splx(s); +} + +static void +igmp_sendpkt(inm, type) + struct in_multi *inm; + int type; +{ + struct mbuf *m; + struct igmp *igmp; + struct ip *ip; + struct ip_moptions *imo; + + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m == NULL) + return; + + MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_DONTWAIT); + if (!imo) { + m_free(m); + return; + } + + m->m_pkthdr.rcvif = &loif; + m->m_pkthdr.len = sizeof(struct ip) + IGMP_MINLEN; + MH_ALIGN(m, IGMP_MINLEN + sizeof(struct ip)); + m->m_data += sizeof(struct ip); + m->m_len = IGMP_MINLEN; + igmp = mtod(m, struct igmp *); + igmp->igmp_type = type; + igmp->igmp_code = 0; + igmp->igmp_group = inm->inm_addr; + igmp->igmp_cksum = 0; + igmp->igmp_cksum = in_cksum(m, IGMP_MINLEN); + + m->m_data -= sizeof(struct ip); + m->m_len += sizeof(struct ip); + ip = mtod(m, struct ip *); + ip->ip_tos = 0; + ip->ip_len = sizeof(struct ip) + IGMP_MINLEN; + ip->ip_off = 0; + ip->ip_p = IPPROTO_IGMP; + ip->ip_src.s_addr = INADDR_ANY; + ip->ip_dst = igmp->igmp_group; + + imo->imo_multicast_ifp = inm->inm_ifp; + imo->imo_multicast_ttl = 1; + /* + * Request loopback of the report if we are acting as a multicast + * router, so that the process-level routing demon can hear it. + */ +#ifdef MROUTING + imo->imo_multicast_loop = (ip_mrouter != NULL); +#else + imo->imo_multicast_loop = 0; +#endif + + ip_output(m, (struct mbuf *)0, (struct route *)0, 0, imo); + + FREE(imo, M_IPMOPTS); + ++igmpstat.igps_snd_reports; + +} + +static void +igmp_sendleave(inm) + struct in_multi *inm; +{ + igmp_sendpkt(inm, IGMP_HOST_LEAVE_MESSAGE); } diff --git a/sys/netinet/igmp.h b/sys/netinet/igmp.h index 52c672bdd3f8..1e082c4e570d 100644 --- a/sys/netinet/igmp.h +++ b/sys/netinet/igmp.h @@ -35,13 +35,19 @@ * SUCH DAMAGE. * * @(#)igmp.h 8.1 (Berkeley) 6/10/93 - * $Id: igmp.h,v 1.2 1994/08/02 07:48:07 davidg Exp $ + * $Id: igmp.h,v 1.3 1994/08/21 05:27:25 paul Exp $ */ #ifndef _NETINET_IGMP_H_ #define _NETINET_IGMP_H_ -/* Internet Group Management Protocol (IGMP) definitions. */ +/* + * Internet Group Management Protocol (IGMP) definitions. + * + * Written by Steve Deering, Stanford, May 1988. + * + * MULTICAST 1.2 + */ /* * IGMP packet format. @@ -59,7 +65,29 @@ struct igmp { #define IGMP_HOST_MEMBERSHIP_REPORT 0x12 #define IGMP_DVMRP 0x13 /* for experimental multicast */ /* routing protocol */ +#define IGMP_HOST_NEW_MEMBERSHIP_REPORT 0x16 +#define IGMP_HOST_LEAVE_MESSAGE 0x17 +#define IGMP_MTRACE 0x1f /* mcast traceroute messages */ +#define IGMP_MTRACE_RESP 0x1e /* traceroute resp. (to sender) */ #define IGMP_MAX_HOST_REPORT_DELAY 10 /* max delay for response to */ +#define IGMP_TIMER_SCALE 10 /* denotes that the igmp->timer filed */ + /*specifies time in 10th os seconds */ -#endif +#define IGMP_DELAYING_MEMBER 1 +#define IGMP_IDLE_MEMBER 2 +#define IGMP_LAZY_MEMBER 3 +#define IGMP_SLEEPING_MEMBER 4 +#define IGMP_AWAKENING_MEMBER 5 + + +#define IGMP_OLD_ROUTER 0 +#define IGMP_NEW_ROUTER 1 + +#define IGMP_AGE_THRESHOLD 540 + +#ifdef IGMP_STATES +static char *tostate[]={"","DELAYING_MEMBER","IDLE","LAZY","SLEEPING", + "AWAKENING" }; +#endif /* IGMP_STATES */ +#endif /* _NETINET_IGMP_H_ */ diff --git a/sys/netinet/igmp_var.h b/sys/netinet/igmp_var.h index 0018cd22f3ea..403a9115b4c1 100644 --- a/sys/netinet/igmp_var.h +++ b/sys/netinet/igmp_var.h @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * @(#)igmp_var.h 8.1 (Berkeley) 7/19/93 - * $Id: igmp_var.h,v 1.2 1994/08/02 07:48:09 davidg Exp $ + * $Id: igmp_var.h,v 1.3 1994/08/21 05:27:26 paul Exp $ */ #ifndef _NETINET_IGMP_VAR_H_ @@ -63,29 +63,16 @@ struct igmpstat { }; #ifdef KERNEL -struct igmpstat igmpstat; +extern struct igmpstat igmpstat; -/* - * Macro to compute a random timer value between 1 and (IGMP_MAX_REPORTING_ - * DELAY * countdown frequency). We generate a "random" number by adding - * the total number of IP packets received, our primary IP address, and the - * multicast address being timed-out. The 4.3 random() routine really - * ought to be available in the kernel! - */ -#define IGMP_RANDOM_DELAY(multiaddr) \ - /* struct in_addr multiaddr; */ \ - ( (ipstat.ips_total + \ - ntohl(IA_SIN(in_ifaddr)->sin_addr.s_addr) + \ - ntohl((multiaddr).s_addr) \ - ) \ - % (IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ) + 1 \ - ) +#define IGMP_RANDOM_DELAY(X) (random() % (X) + 1) -void igmp_init __P(()); +void igmp_init __P((void)); void igmp_input __P((struct mbuf *, int)); void igmp_joingroup __P((struct in_multi *)); void igmp_leavegroup __P((struct in_multi *)); -void igmp_fasttimo __P(()); +void igmp_fasttimo __P((void)); +void igmp_slowtimo __P((void)); #endif #endif diff --git a/sys/netinet/in.h b/sys/netinet/in.h index 0bf7ed436d05..c04336eda1b1 100644 --- a/sys/netinet/in.h +++ b/sys/netinet/in.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)in.h 8.3 (Berkeley) 1/3/94 - * $Id: in.h,v 1.2 1994/08/02 07:48:14 davidg Exp $ + * $Id: in.h,v 1.3 1994/08/21 05:27:27 paul Exp $ */ #ifndef _NETINET_IN_H_ @@ -55,6 +55,7 @@ #define IPPROTO_UDP 17 /* user datagram protocol */ #define IPPROTO_IDP 22 /* xns idp */ #define IPPROTO_TP 29 /* tp-4 w/ class negotiation */ +#define IPPROTO_RSVP 46 /* resource reservation */ #define IPPROTO_EON 80 /* ISO cnlp */ #define IPPROTO_ENCAP 98 /* encapsulation header */ @@ -162,6 +163,10 @@ struct ip_opts { #define IP_MULTICAST_LOOP 11 /* u_char; set/get IP multicast loopback */ #define IP_ADD_MEMBERSHIP 12 /* ip_mreq; add an IP group membership */ #define IP_DROP_MEMBERSHIP 13 /* ip_mreq; drop an IP group membership */ +#define IP_MULTICAST_VIF 14 /* set/get IP mcast virt. iface */ +#define IP_RSVP_ON 15 /* enable RSVP in kernel */ +#define IP_RSVP_OFF 16 /* disable RSVP in kernel */ + /* * Defaults and limits for options diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index fda64a13c940..b3a8841bcfb8 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -1,115 +1,131 @@ -/* - * Copyright (c) 1982, 1986, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)in_proto.c 8.1 (Berkeley) 6/10/93 - * $Id$ - */ + /* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_proto.c 8.1 (Berkeley) 6/10/93 + * $Id: in_proto.c,v 1.3 1994/08/02 07:48:23 davidg Exp $ + */ -#include -#include -#include -#include -#include + #include + #include + #include + #include + #include -#include -#include -#include + #include + #include + #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -/* - * TCP/IP protocol family: IP, ICMP, UDP, TCP. - */ + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + /* + * TCP/IP protocol family: IP, ICMP, UDP, TCP. + */ -#ifdef NSIP -void idpip_input(), nsip_ctlinput(); -#endif + #ifdef NSIP + void idpip_input(), nsip_ctlinput(); + #endif -#ifdef TPIP -void tpip_input(), tpip_ctlinput(), tp_ctloutput(); -int tp_init(), tp_slowtimo(), tp_drain(), tp_usrreq(); -#endif + #ifdef TPIP + void tpip_input(), tpip_ctlinput(), tp_ctloutput(); + int tp_init(), tp_slowtimo(), tp_drain(), tp_usrreq(); + #endif -#ifdef EON -void eoninput(), eonctlinput(), eonprotoinit(); -#endif /* EON */ + #ifdef EON + void eoninput(), eonctlinput(), eonprotoinit(); + #endif /* EON */ -extern struct domain inetdomain; + #ifdef MROUTING + void multiencap_decap(struct mbuf *); + #endif -struct protosw inetsw[] = { -{ 0, &inetdomain, 0, 0, - 0, ip_output, 0, 0, - 0, - ip_init, 0, ip_slowtimo, ip_drain, ip_sysctl -}, -{ SOCK_DGRAM, &inetdomain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR, - udp_input, 0, udp_ctlinput, ip_ctloutput, - udp_usrreq, - udp_init, 0, 0, 0, udp_sysctl -}, -{ SOCK_STREAM, &inetdomain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD, - tcp_input, 0, tcp_ctlinput, tcp_ctloutput, - tcp_usrreq, - tcp_init, tcp_fasttimo, tcp_slowtimo, tcp_drain, -}, -{ SOCK_RAW, &inetdomain, IPPROTO_RAW, PR_ATOMIC|PR_ADDR, - rip_input, rip_output, 0, rip_ctloutput, - rip_usrreq, - 0, 0, 0, 0, -}, -{ SOCK_RAW, &inetdomain, IPPROTO_ICMP, PR_ATOMIC|PR_ADDR, - icmp_input, rip_output, 0, rip_ctloutput, - rip_usrreq, - 0, 0, 0, 0, icmp_sysctl -}, -{ SOCK_RAW, &inetdomain, IPPROTO_IGMP, PR_ATOMIC|PR_ADDR, - igmp_input, rip_output, 0, rip_ctloutput, - rip_usrreq, - igmp_init, igmp_fasttimo, 0, 0, -}, + extern struct domain inetdomain; + + struct protosw inetsw[] = { + { 0, &inetdomain, 0, 0, + 0, ip_output, 0, 0, + 0, + ip_init, 0, ip_slowtimo, ip_drain, ip_sysctl + }, + { SOCK_DGRAM, &inetdomain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR, + udp_input, 0, udp_ctlinput, ip_ctloutput, + udp_usrreq, + udp_init, 0, 0, 0, udp_sysctl + }, + { SOCK_STREAM, &inetdomain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD, + tcp_input, 0, tcp_ctlinput, tcp_ctloutput, + tcp_usrreq, + tcp_init, tcp_fasttimo, tcp_slowtimo, tcp_drain, + }, + { SOCK_RAW, &inetdomain, IPPROTO_RAW, PR_ATOMIC|PR_ADDR, + rip_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + 0, 0, 0, 0, + }, + { SOCK_RAW, &inetdomain, IPPROTO_ICMP, PR_ATOMIC|PR_ADDR, + icmp_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + 0, 0, 0, 0, icmp_sysctl + }, + { SOCK_RAW, &inetdomain, IPPROTO_IGMP, PR_ATOMIC|PR_ADDR, + igmp_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + igmp_init, igmp_fasttimo, igmp_slowtimo, 0, + }, + { SOCK_RAW, &inetdomain, IPPROTO_RSVP, PR_ATOMIC|PR_ADDR, + rip_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + 0, 0, 0, 0, + }, +#ifdef MROUTING + { SOCK_RAW, &inetdomain, IPPROTO_ENCAP, PR_ATOMIC|PR_ADDR, + multiencap_decap, rip_output, 0, rip_ctloutput, + rip_usrreq, + 0, 0, 0, 0, + }, +#endif /* MROUTING */ #ifdef TPIP { SOCK_SEQPACKET,&inetdomain, IPPROTO_TP, PR_CONNREQUIRED|PR_WANTRCVD, tpip_input, 0, tpip_ctlinput, tp_ctloutput, diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index 768364484799..352e19a4f7b2 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)in_var.h 8.1 (Berkeley) 6/10/93 - * $Id: in_var.h,v 1.4 1994/08/18 22:35:29 wollman Exp $ + * $Id: in_var.h,v 1.5 1994/08/21 05:27:30 paul Exp $ */ #ifndef _NETINET_IN_VAR_H_ @@ -114,6 +114,18 @@ extern struct ifqueue ipintrq; /* ip packet input queue */ } #endif +/* + * This information should be part of the ifnet structure but we don't wish + * to change that - as it might break a number of things + */ + +struct router_info { + struct ifnet *ifp; + int type; /* type of router which is querier on this interface */ + int time; /* # of slow timeouts since last old query */ + struct router_info *next; +}; + /* * Internet multicast address structure. There is one of these for each IP * multicast group to which this host belongs on a given network interface. @@ -127,6 +139,8 @@ struct in_multi { u_int inm_refcount; /* no. membership claims by sockets */ u_int inm_timer; /* IGMP membership report timer */ struct in_multi *inm_next; /* ptr to next multicast address */ + u_int inm_state; /* state of the membership */ + struct router_info *inm_rti; /* router info*/ }; #ifdef KERNEL diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 4841fb1e4bea..40dd6bf93c06 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 - * $Id: ip_input.c,v 1.3 1994/08/02 07:48:38 davidg Exp $ + * $Id: ip_input.c,v 1.4 1994/08/18 22:35:30 wollman Exp $ */ #include @@ -56,6 +56,9 @@ #include #include +#include +struct socket *ip_rsvpd; + #ifndef IPFORWARDING #ifdef GATEWAY #define IPFORWARDING 1 /* forward IP packets not for us */ @@ -237,6 +240,15 @@ ipintr() if (hlen > sizeof (struct ip) && ip_dooptions(m)) goto next; + /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no + * matter if it is destined to another node, or whether it is + * a multicast one, RSVP wants it! and prevents it from being forwarded + * anywhere else. Also checks if the rsvp daemon is running before + * grabbing the packet. + */ + if (ip_rsvpd != NULL && ip->ip_p==IPPROTO_RSVP) + goto ours; + /* * Check our list of addresses, to see if the packet is for us. */ @@ -271,8 +283,6 @@ ipintr() if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { struct in_multi *inm; #ifdef MROUTING - extern struct socket *ip_mrouter; - if (ip_mrouter) { /* * If we are acting as a multicast router, all @@ -287,7 +297,7 @@ ipintr() * ip_output().) */ ip->ip_id = htons(ip->ip_id); - if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) { + if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) { ipstat.ips_cantforward++; m_freem(m); goto next; @@ -1168,3 +1178,25 @@ ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen) } /* NOTREACHED */ } + +int +ip_rsvp_init(struct socket *so) +{ + if (so->so_type != SOCK_RAW || + so->so_proto->pr_protocol != IPPROTO_RSVP) + return EOPNOTSUPP; + + if (ip_rsvpd != NULL) + return EADDRINUSE; + + ip_rsvpd = so; + + return 0; +} + +int +ip_rsvp_done(void) +{ + ip_rsvpd = NULL; + return 0; +} diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c index b07d9193e1bc..b14951d7450d 100644 --- a/sys/netinet/ip_mroute.c +++ b/sys/netinet/ip_mroute.c @@ -1,210 +1,428 @@ /* - * Copyright (c) 1989 Stephen Deering - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Stephen Deering of Stanford University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93 - * $Id$ - */ - -/* - * Procedures for the kernel part of DVMRP, - * a Distance-Vector Multicast Routing Protocol. - * (See RFC-1075.) + * IP multicast forwarding procedures * * Written by David Waitzman, BBN Labs, August 1988. * Modified by Steve Deering, Stanford, February 1989. + * Modified by Mark J. Steiglitz, Stanford, May, 1991 + * Modified by Van Jacobson, LBL, January 1993 + * Modified by Ajit Thyagarajan, PARC, August 1993 * - * MROUTING 1.1 + * MROUTING 1.8 */ -#ifndef MROUTING -int ip_mrtproto; /* for netstat only */ -#else #include #include -#include -#include -#include #include -#include #include #include +#include +#include #include - +#include +#include #include #include #include - #include #include #include +#include #include #include -#include - #include #include #include -/* Static forwards */ -static int ip_mrouter_init __P((struct socket *)); -static int add_vif __P((struct vifctl *)); -static int del_vif __P((vifi_t *vifip)); -static int add_lgrp __P((struct lgrplctl *)); -static int del_lgrp __P((struct lgrplctl *)); -static int grplst_member __P((struct vif *, struct in_addr)); -static u_long nethash __P((struct in_addr in)); -static int add_mrt __P((struct mrtctl *)); -static int del_mrt __P((struct in_addr *)); -static struct mrt *mrtfind __P((struct in_addr)); -static void phyint_send __P((struct mbuf *, struct vif *)); -static void tunnel_send __P((struct mbuf *, struct vif *)); +#ifndef NTOHL +#if BYTE_ORDER != BIG_ENDIAN +#define NTOHL(d) ((d) = ntohl((d))) +#define NTOHS(d) ((d) = ntohs((u_short)(d))) +#define HTONL(d) ((d) = htonl((d))) +#define HTONS(d) ((d) = htons((u_short)(d))) +#else +#define NTOHL(d) +#define NTOHS(d) +#define HTONL(d) +#define HTONS(d) +#endif +#endif -#define INSIZ sizeof(struct in_addr) -#define same(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) -#define satosin(sa) ((struct sockaddr_in *)(sa)) +#ifndef MROUTING +/* + * Dummy routines and globals used when multicast routing is not compiled in. + */ + +struct socket *ip_mrouter = NULL; +u_int ip_mrtproto = 0; + +int +ip_mrouter_cmd(cmd, so, m) + int cmd; + struct socket *so; + struct mbuf *m; +{ + return(EOPNOTSUPP); +} + +int +ip_mrouter_done() +{ + return(0); +} + +int +ip_mforward(ip, ifp, m) + struct ip *ip; + struct ifnet *ifp; + struct mbuf *m; +{ + return(0); +} +#else + +#define INSIZ sizeof(struct in_addr) +#define same(a1, a2) \ + (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) + +#define MT_MRTABLE MT_RTABLE /* since nothing else uses it */ /* * Globals. All but ip_mrouter and ip_mrtproto could be static, * except for netstat or debugging purposes. */ -struct socket *ip_mrouter = NULL; -int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ +struct socket *ip_mrouter = NULL; +int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ -struct mrt *mrttable[MRTHASHSIZ]; -struct vif viftable[MAXVIFS]; -struct mrtstat mrtstat; +#define NO_RTE_FOUND 0x1 +#define RTE_FOUND 0x2 + +struct mbuf *mfctable[MFCTBLSIZ]; +struct vif viftable[MAXVIFS]; +struct mrtstat mrtstat; +u_int mrtdebug = 0; /* debug level */ +u_int tbfdebug = 0; /* tbf debug level */ + +u_long timeout_val = 0; /* count of outstanding upcalls */ + +/* + * Define the token bucket filter structures + * tbftable -> each vif has one of these for storing info + * qtable -> each interface has an associated queue of pkts + */ + +struct tbf tbftable[MAXVIFS]; +struct pkt_queue qtable[MAXVIFS][MAXQSIZE]; + +/* + * 'Interfaces' associated with decapsulator (so we can tell + * packets that went through it from ones that get reflected + * by a broken gateway). These interfaces are never linked into + * the system ifnet list & no routes point to them. I.e., packets + * can't be sent this way. They only exist as a placeholder for + * multicast source verification. + */ +struct ifnet multicast_decap_if[MAXVIFS]; + +#define ENCAP_TTL 64 +#define ENCAP_PROTO 4 + +/* prototype IP hdr for encapsulated packets */ +struct ip multicast_encap_iphdr = { +#if defined(ultrix) || defined(i386) + sizeof(struct ip) >> 2, IPVERSION, +#else + IPVERSION, sizeof(struct ip) >> 2, +#endif + 0, /* tos */ + sizeof(struct ip), /* total length */ + 0, /* id */ + 0, /* frag offset */ + ENCAP_TTL, ENCAP_PROTO, + 0, /* checksum */ +}; /* * Private variables. */ -static vifi_t numvifs = 0; -static struct mrt *cached_mrt = NULL; -static u_long cached_origin; -static u_long cached_originmask; +static vifi_t numvifs = 0; + +/* + * one-back cache used by multiencap_decap to locate a tunnel's vif + * given a datagram's src ip address. + */ +static u_long last_encap_src; +static struct vif *last_encap_vif; + +static u_long nethash_fc(u_long, u_long); +static struct mfc *mfcfind(u_long, u_long); +int get_sg_cnt(struct sioc_sg_req *); +int get_vif_cnt(struct sioc_vif_req *); +int get_vifs(caddr_t); +static int add_vif(struct vifctl *); +static int del_vif(vifi_t *); +static int add_mfc(struct mfcctl *); +static int del_mfc(struct delmfcctl *); +static void cleanup_cache(void *); +static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *, + struct ip_moptions *); +int legal_vif_num(int); +static void phyint_send(struct ip *, struct vif *, struct mbuf *); +static void srcrt_send(struct ip *, struct vif *, struct mbuf *); +static void encap_send(struct ip *, struct vif *, struct mbuf *); +void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long, + struct ip_moptions *); +void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *); +void tbf_process_q(struct vif *); +void tbf_dequeue(struct vif *, int); +void tbf_reprocess_q(void *); +int tbf_dq_sel(struct vif *, struct ip *); +void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *); +void tbf_update_tokens(struct vif *); +static int priority(struct vif *, struct ip *); +static int ip_mrouter_init(struct socket *); + +/* + * A simple hash function: returns MFCHASHMOD of the low-order octet of + * the argument's network or subnet number and the multicast group assoc. + */ +static u_long +nethash_fc(m,n) + register u_long m; + register u_long n; +{ + struct in_addr in1; + struct in_addr in2; + + in1.s_addr = m; + m = in_netof(in1); + while ((m & 0xff) == 0) m >>= 8; + + in2.s_addr = n; + n = in_netof(in2); + while ((n & 0xff) == 0) n >>= 8; + + return (MFCHASHMOD(m) ^ MFCHASHMOD(n)); +} + +/* + * this is a direct-mapped cache used to speed the mapping from a + * datagram source address to the associated multicast route. Note + * that unlike mrttable, the hash is on IP address, not IP net number. + */ +#define MFCHASHSIZ 1024 +#define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ + ((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1)) +struct mfc *mfchash[MFCHASHSIZ]; + +/* + * Find a route for a given origin IP address and Multicast group address + * Type of service parameter to be added in the future!!! + */ +#define MFCFIND(o, g, rt) { \ + register u_int _mrhasho = o; \ + register u_int _mrhashg = g; \ + _mrhasho = MFCHASH(_mrhasho, _mrhashg); \ + ++mrtstat.mrts_mfc_lookups; \ + rt = mfchash[_mrhasho]; \ + if ((rt == NULL) || \ + ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \ + (g != rt->mfc_mcastgrp.s_addr)) \ + if ((rt = mfcfind(o, g)) != NULL) \ + mfchash[_mrhasho] = rt; \ +} + +/* + * Find route by examining hash table entries + */ +static struct mfc * +mfcfind(origin, mcastgrp) + u_long origin; + u_long mcastgrp; +{ + register struct mbuf *mb_rt; + register struct mfc *rt; + register u_long hash; + + hash = nethash_fc(origin, mcastgrp); + for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { + rt = mtod(mb_rt, struct mfc *); + if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) && + (mcastgrp == rt->mfc_mcastgrp.s_addr) && + (mb_rt->m_act == NULL)) + return (rt); + } + mrtstat.mrts_mfc_misses++; + return NULL; +} + +/* + * Macros to compute elapsed time efficiently + * Borrowed from Van Jacobson's scheduling code + */ +#define TV_DELTA(a, b, delta) { \ + register int xxs; \ + \ + delta = (a).tv_usec - (b).tv_usec; \ + if ((xxs = (a).tv_sec - (b).tv_sec)) { \ + switch (xxs) { \ + case 2: \ + delta += 1000000; \ + /* fall through */ \ + case 1: \ + delta += 1000000; \ + break; \ + default: \ + delta += (1000000 * xxs); \ + } \ + } \ +} + +#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ + (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) /* * Handle DVMRP setsockopt commands to modify the multicast routing tables. */ int ip_mrouter_cmd(cmd, so, m) - register int cmd; - register struct socket *so; - register struct mbuf *m; + int cmd; + struct socket *so; + struct mbuf *m; { - register int error = 0; + if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES; - if (cmd != DVMRP_INIT && so != ip_mrouter) - error = EACCES; - else switch (cmd) { - - case DVMRP_INIT: - error = ip_mrouter_init(so); - break; - - case DVMRP_DONE: - error = ip_mrouter_done(); - break; - - case DVMRP_ADD_VIF: - if (m == NULL || m->m_len < sizeof(struct vifctl)) - error = EINVAL; - else - error = add_vif(mtod(m, struct vifctl *)); - break; - - case DVMRP_DEL_VIF: - if (m == NULL || m->m_len < sizeof(short)) - error = EINVAL; - else - error = del_vif(mtod(m, vifi_t *)); - break; - - case DVMRP_ADD_LGRP: - if (m == NULL || m->m_len < sizeof(struct lgrplctl)) - error = EINVAL; - else - error = add_lgrp(mtod(m, struct lgrplctl *)); - break; - - case DVMRP_DEL_LGRP: - if (m == NULL || m->m_len < sizeof(struct lgrplctl)) - error = EINVAL; - else - error = del_lgrp(mtod(m, struct lgrplctl *)); - break; - - case DVMRP_ADD_MRT: - if (m == NULL || m->m_len < sizeof(struct mrtctl)) - error = EINVAL; - else - error = add_mrt(mtod(m, struct mrtctl *)); - break; - - case DVMRP_DEL_MRT: - if (m == NULL || m->m_len < sizeof(struct in_addr)) - error = EINVAL; - else - error = del_mrt(mtod(m, struct in_addr *)); - break; - - default: - error = EOPNOTSUPP; - break; - } - return (error); + switch (cmd) { + case DVMRP_INIT: return ip_mrouter_init(so); + case DVMRP_DONE: return ip_mrouter_done(); + case DVMRP_ADD_VIF: return add_vif (mtod(m, struct vifctl *)); + case DVMRP_DEL_VIF: return del_vif (mtod(m, vifi_t *)); + case DVMRP_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *)); + case DVMRP_DEL_MFC: return del_mfc (mtod(m, struct delmfcctl *)); + default: return EOPNOTSUPP; + } } + +/* + * Handle ioctl commands to obtain information from the cache + */ +int +mrt_ioctl(cmd, data) + int cmd; + caddr_t data; +{ + int error = 0; + + switch (cmd) { + case (SIOCGETVIFINF): /* Read Virtual Interface (m/cast) */ + return (get_vifs(data)); + break; + case (SIOCGETVIFCNT): + return (get_vif_cnt((struct sioc_vif_req *)data)); + break; + case (SIOCGETSGCNT): + return (get_sg_cnt((struct sioc_sg_req *)data)); + break; + default: + return (EINVAL); + break; + } + return error; +} + +/* + * returns the packet count for the source group provided + */ +int +get_sg_cnt(req) + register struct sioc_sg_req *req; +{ + register struct mfc *rt; + int s; + + s = splnet(); + MFCFIND(req->src.s_addr, req->grp.s_addr, rt); + splx(s); + if (rt != NULL) + req->count = rt->mfc_pkt_cnt; + else + req->count = 0xffffffff; + + return 0; +} + +/* + * returns the input and output packet counts on the interface provided + */ +int +get_vif_cnt(req) + register struct sioc_vif_req *req; +{ + register vifi_t vifi = req->vifi; + + req->icount = viftable[vifi].v_pkt_in; + req->ocount = viftable[vifi].v_pkt_out; + + return 0; +} + +int +get_vifs(data) + char *data; +{ + struct vif_conf *vifc = (struct vif_conf *)data; + struct vif_req *vifrp, vifr; + int space, error=0; + + vifi_t vifi; + int s; + + space = vifc->vifc_len; + vifrp = vifc->vifc_req; + + s = splnet(); + vifc->vifc_num=numvifs; + + for (vifi = 0; vifi < numvifs; vifi++, vifrp++) { + if (viftable[vifi].v_lcl_addr.s_addr != 0) { + vifr.v_flags=viftable[vifi].v_flags; + vifr.v_threshold=viftable[vifi].v_threshold; + vifr.v_lcl_addr=viftable[vifi].v_lcl_addr; + vifr.v_rmt_addr=viftable[vifi].v_rmt_addr; + strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ); + if ((space -= sizeof(vifr)) < 0) { + splx(s); + return(ENOSPC); + } + error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr)); + if (error) { + splx(s); + return(error); + } + } + } + splx(s); + return 0; +} /* * Enable multicast routing */ static int ip_mrouter_init(so) - register struct socket *so; + struct socket *so; { - if (so->so_type != SOCK_RAW || - so->so_proto->pr_protocol != IPPROTO_IGMP) - return (EOPNOTSUPP); + if (so->so_type != SOCK_RAW || + so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; - if (ip_mrouter != NULL) - return (EADDRINUSE); + if (ip_mrouter != NULL) return EADDRINUSE; - ip_mrouter = so; + ip_mrouter = so; - return (0); + if (mrtdebug) + log(LOG_DEBUG, "ip_mrouter_init"); + + return 0; } /* @@ -213,45 +431,82 @@ ip_mrouter_init(so) int ip_mrouter_done() { - register vifi_t vifi; - register int i; - register struct ifnet *ifp; - register int s; - struct ifreq ifr; + vifi_t vifi; + int i; + struct ifnet *ifp; + struct ifreq ifr; + struct mbuf *mb_rt; + struct mbuf *m; + struct rtdetq *rte; + int s; - s = splnet(); + s = splnet(); - /* - * For each phyint in use, free its local group list and - * disable promiscuous reception of all IP multicasts. - */ - for (vifi = 0; vifi < numvifs; vifi++) { - if (viftable[vifi].v_lcl_addr.s_addr != 0 && - !(viftable[vifi].v_flags & VIFF_TUNNEL)) { - if (viftable[vifi].v_lcl_grps) - free(viftable[vifi].v_lcl_grps, M_MRTABLE); - satosin(&ifr.ifr_addr)->sin_family = AF_INET; - satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY; - ifp = viftable[vifi].v_ifp; - (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); - } + /* + * For each phyint in use, disable promiscuous reception of all IP + * multicasts. + */ + for (vifi = 0; vifi < numvifs; vifi++) { + if (viftable[vifi].v_lcl_addr.s_addr != 0 && + !(viftable[vifi].v_flags & VIFF_TUNNEL)) { + ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; + ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr + = INADDR_ANY; + ifp = viftable[vifi].v_ifp; + (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); } - bzero((caddr_t)viftable, sizeof(viftable)); - numvifs = 0; + } + bzero((caddr_t)qtable, sizeof(qtable)); + bzero((caddr_t)tbftable, sizeof(tbftable)); + bzero((caddr_t)viftable, sizeof(viftable)); + numvifs = 0; - /* - * Free any multicast route entries. - */ - for (i = 0; i < MRTHASHSIZ; i++) - if (mrttable[i]) - free(mrttable[i], M_MRTABLE); - bzero((caddr_t)mrttable, sizeof(mrttable)); - cached_mrt = NULL; + /* + * Check if any outstanding timeouts remain + */ + if (timeout_val != 0) + for (i = 0; i < MFCTBLSIZ; i++) { + mb_rt = mfctable[i]; + while (mb_rt) { + if ( mb_rt->m_act != NULL) { + untimeout(cleanup_cache, (caddr_t)mb_rt); + while (m = mb_rt->m_act) { + mb_rt->m_act = m->m_act; + rte = mtod(m, struct rtdetq *); + m_freem(rte->m); + m_free(m); + } + timeout_val--; + } + mb_rt = mb_rt->m_next; + } + if (timeout_val == 0) + break; + } - ip_mrouter = NULL; + /* + * Free all multicast forwarding cache entries. + */ + for (i = 0; i < MFCTBLSIZ; i++) + m_freem(mfctable[i]); - splx(s); - return (0); + bzero((caddr_t)mfctable, sizeof(mfctable)); + bzero((caddr_t)mfchash, sizeof(mfchash)); + + /* + * Reset de-encapsulation cache + */ + last_encap_src = NULL; + last_encap_vif = NULL; + + ip_mrouter = NULL; + + splx(s); + + if (mrtdebug) + log(LOG_DEBUG, "ip_mrouter_done"); + + return 0; } /* @@ -259,61 +514,85 @@ ip_mrouter_done() */ static int add_vif(vifcp) - register struct vifctl *vifcp; + register struct vifctl *vifcp; { - register struct vif *vifp = viftable + vifcp->vifc_vifi; - register struct ifaddr *ifa; - register struct ifnet *ifp; - struct ifreq ifr; - register int error, s; - static struct sockaddr_in sin = { sizeof(sin), AF_INET }; + register struct vif *vifp = viftable + vifcp->vifc_vifi; + static struct sockaddr_in sin = {AF_INET}; + struct ifaddr *ifa; + struct ifnet *ifp; + struct ifreq ifr; + int error, s; + struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; - if (vifcp->vifc_vifi >= MAXVIFS) - return (EINVAL); - if (vifp->v_lcl_addr.s_addr != 0) - return (EADDRINUSE); + if (vifcp->vifc_vifi >= MAXVIFS) return EINVAL; + if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; - /* Find the interface with an address in AF_INET family */ - sin.sin_addr = vifcp->vifc_lcl_addr; - ifa = ifa_ifwithaddr((struct sockaddr *)&sin); - if (ifa == 0) - return (EADDRNOTAVAIL); + /* Find the interface with an address in AF_INET family */ + sin.sin_addr = vifcp->vifc_lcl_addr; + ifa = ifa_ifwithaddr((struct sockaddr *)&sin); + if (ifa == 0) return EADDRNOTAVAIL; + ifp = ifa->ifa_ifp; - s = splnet(); - - if (vifcp->vifc_flags & VIFF_TUNNEL) - vifp->v_rmt_addr = vifcp->vifc_rmt_addr; - else { - /* Make sure the interface supports multicast */ - ifp = ifa->ifa_ifp; - if ((ifp->if_flags & IFF_MULTICAST) == 0) { - splx(s); - return (EOPNOTSUPP); - } - /* - * Enable promiscuous reception of all IP multicasts - * from the interface. - */ - satosin(&ifr.ifr_addr)->sin_family = AF_INET; - satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY; - error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); - if (error) { - splx(s); - return (error); + if (vifcp->vifc_flags & VIFF_TUNNEL) { + if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { + static int inited = 0; + if(!inited) { + for (s = 0; s < MAXVIFS; ++s) { + multicast_decap_if[s].if_name = "mdecap"; + multicast_decap_if[s].if_unit = s; } + inited = 1; + } + ifp = &multicast_decap_if[vifcp->vifc_vifi]; + } else { + ifp = 0; } + } else { + /* Make sure the interface supports multicast */ + if ((ifp->if_flags & IFF_MULTICAST) == 0) + return EOPNOTSUPP; - vifp->v_flags = vifcp->vifc_flags; - vifp->v_threshold = vifcp->vifc_threshold; - vifp->v_lcl_addr = vifcp->vifc_lcl_addr; - vifp->v_ifp = ifa->ifa_ifp; - - /* Adjust numvifs up if the vifi is higher than numvifs */ - if (numvifs <= vifcp->vifc_vifi) - numvifs = vifcp->vifc_vifi + 1; - + /* Enable promiscuous reception of all IP multicasts from the if */ + ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; + ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; + s = splnet(); + error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); splx(s); - return (0); + if (error) + return error; + } + + s = splnet(); + /* define parameters for the tbf structure */ + vifp->v_tbf = v_tbf; + vifp->v_tbf->q_len = 0; + vifp->v_tbf->n_tok = 0; + vifp->v_tbf->last_pkt_t = 0; + + vifp->v_flags = vifcp->vifc_flags; + vifp->v_threshold = vifcp->vifc_threshold; + vifp->v_lcl_addr = vifcp->vifc_lcl_addr; + vifp->v_rmt_addr = vifcp->vifc_rmt_addr; + vifp->v_ifp = ifp; + vifp->v_rate_limit= vifcp->vifc_rate_limit; + /* initialize per vif pkt counters */ + vifp->v_pkt_in = 0; + vifp->v_pkt_out = 0; + splx(s); + + /* Adjust numvifs up if the vifi is higher than numvifs */ + if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; + + if (mrtdebug) + log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d", + vifcp->vifc_vifi, + ntohl(vifcp->vifc_lcl_addr.s_addr), + (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", + ntohl(vifcp->vifc_rmt_addr.s_addr), + vifcp->vifc_threshold, + vifcp->vifc_rate_limit); + + return 0; } /* @@ -321,296 +600,258 @@ add_vif(vifcp) */ static int del_vif(vifip) - register vifi_t *vifip; + vifi_t *vifip; { - register struct vif *vifp = viftable + *vifip; - register struct ifnet *ifp; - register int i, s; - struct ifreq ifr; + register struct vif *vifp = viftable + *vifip; + register vifi_t vifi; + struct ifnet *ifp; + struct ifreq ifr; + int s; - if (*vifip >= numvifs) - return (EINVAL); - if (vifp->v_lcl_addr.s_addr == 0) - return (EADDRNOTAVAIL); + if (*vifip >= numvifs) return EINVAL; + if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; - s = splnet(); + s = splnet(); - if (!(vifp->v_flags & VIFF_TUNNEL)) { - if (vifp->v_lcl_grps) - free(vifp->v_lcl_grps, M_MRTABLE); - satosin(&ifr.ifr_addr)->sin_family = AF_INET; - satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY; - ifp = vifp->v_ifp; - (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); - } + if (!(vifp->v_flags & VIFF_TUNNEL)) { + ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; + ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; + ifp = vifp->v_ifp; + (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); + } - bzero((caddr_t)vifp, sizeof (*vifp)); + if (vifp == last_encap_vif) { + last_encap_vif = 0; + last_encap_src = 0; + } - /* Adjust numvifs down */ - for (i = numvifs - 1; i >= 0; i--) - if (viftable[i].v_lcl_addr.s_addr != 0) - break; - numvifs = i + 1; + bzero((caddr_t)qtable[*vifip], + sizeof(qtable[*vifip])); + bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); + bzero((caddr_t)vifp, sizeof (*vifp)); - splx(s); - return (0); + /* Adjust numvifs down */ + for (vifi = numvifs; vifi > 0; vifi--) + if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; + numvifs = vifi; + + splx(s); + + if (mrtdebug) + log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs); + + return 0; } /* - * Add the multicast group in the lgrpctl to the list of local multicast - * group memberships associated with the vif indexed by gcp->lgc_vifi. + * Add an mfc entry */ static int -add_lgrp(gcp) - register struct lgrplctl *gcp; +add_mfc(mfccp) + struct mfcctl *mfccp; { - register struct vif *vifp; - register int s; + struct mfc *rt; + struct mfc *rt1; + register struct mbuf *mb_rt; + struct mbuf *prev_mb_rt; + u_long hash; + struct mbuf *mb_ntry; + struct rtdetq *rte; + register u_short nstl; + int s; + int i; - if (gcp->lgc_vifi >= numvifs) - return (EINVAL); + rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); - vifp = viftable + gcp->lgc_vifi; - if (vifp->v_lcl_addr.s_addr == 0 || (vifp->v_flags & VIFF_TUNNEL)) - return (EADDRNOTAVAIL); + /* If an entry already exists, just update the fields */ + if (rt) { + if (mrtdebug) + log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x", + ntohl(mfccp->mfcc_origin.s_addr), + ntohl(mfccp->mfcc_mcastgrp.s_addr), + ntohl(mfccp->mfcc_originmask.s_addr), + mfccp->mfcc_parent); - /* If not enough space in existing list, allocate a larger one */ s = splnet(); - if (vifp->v_lcl_grps_n + 1 >= vifp->v_lcl_grps_max) { - register int num; - register struct in_addr *ip; + rt->mfc_parent = mfccp->mfcc_parent; + for (i = 0; i < numvifs; i++) + VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); + splx(s); + return 0; + } - num = vifp->v_lcl_grps_max; - if (num <= 0) - num = 32; /* initial number */ - else - num += num; /* double last number */ - ip = (struct in_addr *)malloc(num * sizeof(*ip), - M_MRTABLE, M_NOWAIT); - if (ip == NULL) { - splx(s); - return (ENOBUFS); - } + /* + * Find the entry for which the upcall was made and update + */ + s = splnet(); + hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); + for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0; + mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { - bzero((caddr_t)ip, num * sizeof(*ip)); /* XXX paranoid */ - bcopy((caddr_t)vifp->v_lcl_grps, (caddr_t)ip, - vifp->v_lcl_grps_n * sizeof(*ip)); + rt = mtod(mb_rt, struct mfc *); + if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) + == mfccp->mfcc_origin.s_addr) && + (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && + (mb_rt->m_act != NULL)) { - vifp->v_lcl_grps_max = num; - if (vifp->v_lcl_grps) - free(vifp->v_lcl_grps, M_MRTABLE); - vifp->v_lcl_grps = ip; + if (!nstl++) { + if (mrtdebug) + log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x", + ntohl(mfccp->mfcc_origin.s_addr), + ntohl(mfccp->mfcc_mcastgrp.s_addr), + ntohl(mfccp->mfcc_originmask.s_addr), + mfccp->mfcc_parent, mb_rt->m_act); + rt->mfc_origin = mfccp->mfcc_origin; + rt->mfc_originmask = mfccp->mfcc_originmask; + rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; + rt->mfc_parent = mfccp->mfcc_parent; + for (i = 0; i < numvifs; i++) + VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); + /* initialize pkt counters per src-grp */ + rt->mfc_pkt_cnt = 0; + rt1 = rt; + } + + /* prevent cleanup of cache entry */ + untimeout(cleanup_cache, (caddr_t)mb_rt); + timeout_val--; + + /* free packets Qed at the end of this entry */ + while (mb_rt->m_act) { + mb_ntry = mb_rt->m_act; + rte = mtod(mb_ntry, struct rtdetq *); + ip_mdq(rte->m, rte->ifp, rte->tunnel_src, + rt1, rte->imo); + mb_rt->m_act = mb_ntry->m_act; + m_freem(rte->m); + m_free(mb_ntry); + } + + /* + * If more than one entry was created for a single upcall + * delete that entry + */ + if (nstl > 1) { + MFREE(mb_rt, prev_mb_rt->m_next); + mb_rt = prev_mb_rt; + } + } + } + + /* + * It is possible that an entry is being inserted without an upcall + */ + if (nstl == 0) { + if (mrtdebug) + log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x", + hash, ntohl(mfccp->mfcc_origin.s_addr), + ntohl(mfccp->mfcc_mcastgrp.s_addr), + ntohl(mfccp->mfcc_originmask.s_addr), + mfccp->mfcc_parent); + + for (prev_mb_rt = mb_rt = mfctable[hash]; + mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { + + rt = mtod(mb_rt, struct mfc *); + if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) + == mfccp->mfcc_origin.s_addr) && + (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { + + rt->mfc_origin = mfccp->mfcc_origin; + rt->mfc_originmask = mfccp->mfcc_originmask; + rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; + rt->mfc_parent = mfccp->mfcc_parent; + for (i = 0; i < numvifs; i++) + VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); + /* initialize pkt counters per src-grp */ + rt->mfc_pkt_cnt = 0; + } + } + if (mb_rt == NULL) { + /* no upcall, so make a new entry */ + MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); + if (mb_rt == NULL) { splx(s); + return ENOBUFS; + } + + rt = mtod(mb_rt, struct mfc *); + + /* insert new entry at head of hash chain */ + rt->mfc_origin = mfccp->mfcc_origin; + rt->mfc_originmask = mfccp->mfcc_originmask; + rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; + rt->mfc_parent = mfccp->mfcc_parent; + for (i = 0; i < numvifs; i++) + VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); + /* initialize pkt counters per src-grp */ + rt->mfc_pkt_cnt = 0; + + /* link into table */ + mb_rt->m_next = mfctable[hash]; + mfctable[hash] = mb_rt; + mb_rt->m_act = NULL; } - - vifp->v_lcl_grps[vifp->v_lcl_grps_n++] = gcp->lgc_gaddr; - - if (gcp->lgc_gaddr.s_addr == vifp->v_cached_group) - vifp->v_cached_result = 1; - - splx(s); - return (0); + } + splx(s); + return 0; } /* - * Delete the the local multicast group associated with the vif - * indexed by gcp->lgc_vifi. - */ - -static int -del_lgrp(gcp) - register struct lgrplctl *gcp; -{ - register struct vif *vifp; - register int i, error, s; - - if (gcp->lgc_vifi >= numvifs) - return (EINVAL); - vifp = viftable + gcp->lgc_vifi; - if (vifp->v_lcl_addr.s_addr == 0 || (vifp->v_flags & VIFF_TUNNEL)) - return (EADDRNOTAVAIL); - - s = splnet(); - - if (gcp->lgc_gaddr.s_addr == vifp->v_cached_group) - vifp->v_cached_result = 0; - - error = EADDRNOTAVAIL; - for (i = 0; i < vifp->v_lcl_grps_n; ++i) - if (same(&gcp->lgc_gaddr, &vifp->v_lcl_grps[i])) { - error = 0; - vifp->v_lcl_grps_n--; - bcopy((caddr_t)&vifp->v_lcl_grps[i + 1], - (caddr_t)&vifp->v_lcl_grps[i], - (vifp->v_lcl_grps_n - i) * sizeof(struct in_addr)); - error = 0; - break; - } - - splx(s); - return (error); -} - -/* - * Return 1 if gaddr is a member of the local group list for vifp. + * Delete an mfc entry */ static int -grplst_member(vifp, gaddr) - register struct vif *vifp; - struct in_addr gaddr; +del_mfc(mfccp) + struct delmfcctl *mfccp; { - register int i, s; - register u_long addr; + struct in_addr origin; + struct in_addr mcastgrp; + struct mfc *rt; + struct mbuf *mb_rt; + struct mbuf *prev_mb_rt; + u_long hash; + struct mfc **cmfc; + struct mfc **cmfcend; + int s, i; - mrtstat.mrts_grp_lookups++; + origin = mfccp->mfcc_origin; + mcastgrp = mfccp->mfcc_mcastgrp; + hash = nethash_fc(origin.s_addr, mcastgrp.s_addr); - addr = gaddr.s_addr; - if (addr == vifp->v_cached_group) - return (vifp->v_cached_result); + if (mrtdebug) + log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x", + ntohl(origin.s_addr), ntohl(mcastgrp.s_addr)); - mrtstat.mrts_grp_misses++; + for (prev_mb_rt = mb_rt = mfctable[hash] + ; mb_rt + ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { + rt = mtod(mb_rt, struct mfc *); + if (origin.s_addr == rt->mfc_origin.s_addr && + mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && + mb_rt->m_act == NULL) + break; + } + if (mb_rt == NULL) { + return ESRCH; + } - for (i = 0; i < vifp->v_lcl_grps_n; ++i) - if (addr == vifp->v_lcl_grps[i].s_addr) { - s = splnet(); - vifp->v_cached_group = addr; - vifp->v_cached_result = 1; - splx(s); - return (1); - } - s = splnet(); - vifp->v_cached_group = addr; - vifp->v_cached_result = 0; - splx(s); - return (0); -} + s = splnet(); -/* - * A simple hash function: returns MRTHASHMOD of the low-order octet of - * the argument's network or subnet number. - */ -static u_long -nethash(in) - struct in_addr in; -{ - register u_long n; + cmfc = mfchash; + cmfcend = cmfc + MFCHASHSIZ; + for ( ; cmfc < cmfcend; ++cmfc) + if (*cmfc == rt) + *cmfc = 0; - n = in_netof(in); - while ((n & 0xff) == 0) - n >>= 8; - return (MRTHASHMOD(n)); -} + if (prev_mb_rt != mb_rt) { /* if moved past head of list */ + MFREE(mb_rt, prev_mb_rt->m_next); + } else /* delete head of list, it is in the table */ + mfctable[hash] = m_free(mb_rt); -/* - * Add an mrt entry - */ -static int -add_mrt(mrtcp) - register struct mrtctl *mrtcp; -{ - struct mrt *rt; - u_long hash; - int s; + splx(s); - if (rt = mrtfind(mrtcp->mrtc_origin)) { - /* Just update the route */ - s = splnet(); - rt->mrt_parent = mrtcp->mrtc_parent; - VIFM_COPY(mrtcp->mrtc_children, rt->mrt_children); - VIFM_COPY(mrtcp->mrtc_leaves, rt->mrt_leaves); - splx(s); - return (0); - } - - s = splnet(); - - rt = (struct mrt *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); - if (rt == NULL) { - splx(s); - return (ENOBUFS); - } - - /* - * insert new entry at head of hash chain - */ - rt->mrt_origin = mrtcp->mrtc_origin; - rt->mrt_originmask = mrtcp->mrtc_originmask; - rt->mrt_parent = mrtcp->mrtc_parent; - VIFM_COPY(mrtcp->mrtc_children, rt->mrt_children); - VIFM_COPY(mrtcp->mrtc_leaves, rt->mrt_leaves); - /* link into table */ - hash = nethash(mrtcp->mrtc_origin); - rt->mrt_next = mrttable[hash]; - mrttable[hash] = rt; - - splx(s); - return (0); -} - -/* - * Delete an mrt entry - */ -static int -del_mrt(origin) - register struct in_addr *origin; -{ - register struct mrt *rt, *prev_rt; - register u_long hash = nethash(*origin); - register int s; - - for (prev_rt = rt = mrttable[hash]; rt; prev_rt = rt, rt = rt->mrt_next) - if (origin->s_addr == rt->mrt_origin.s_addr) - break; - if (!rt) - return (ESRCH); - - s = splnet(); - - if (rt == cached_mrt) - cached_mrt = NULL; - - if (prev_rt == rt) - mrttable[hash] = rt->mrt_next; - else - prev_rt->mrt_next = rt->mrt_next; - free(rt, M_MRTABLE); - - splx(s); - return (0); -} - -/* - * Find a route for a given origin IP address. - */ -static struct mrt * -mrtfind(origin) - struct in_addr origin; -{ - register struct mrt *rt; - register u_int hash; - register int s; - - mrtstat.mrts_mrt_lookups++; - - if (cached_mrt != NULL && - (origin.s_addr & cached_originmask) == cached_origin) - return (cached_mrt); - - mrtstat.mrts_mrt_misses++; - - hash = nethash(origin); - for (rt = mrttable[hash]; rt; rt = rt->mrt_next) - if ((origin.s_addr & rt->mrt_originmask.s_addr) == - rt->mrt_origin.s_addr) { - s = splnet(); - cached_mrt = rt; - cached_origin = rt->mrt_origin.s_addr; - cached_originmask = rt->mrt_originmask.s_addr; - splx(s); - return (rt); - } - return (NULL); + return 0; } /* @@ -628,209 +869,914 @@ mrtfind(origin) #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ int -ip_mforward(m, ifp) - register struct mbuf *m; - register struct ifnet *ifp; +ip_mforward(ip, ifp, m, imo) + struct mbuf *m; + register struct ip *ip; + struct ifnet *ifp; + struct ip_moptions *imo; { - register struct ip *ip = mtod(m, struct ip *); - register struct mrt *rt; - register struct vif *vifp; - register int vifi; - register u_char *ipoptions; - u_long tunnel_src; + register struct mfc *rt; + register struct vif *vifp; + register u_char *ipoptions; + u_long tunnel_src; + static struct sockproto k_igmpproto = { AF_INET, IPPROTO_IGMP }; + static struct sockaddr_in k_igmpsrc = { AF_INET }; + static struct sockaddr_in k_igmpdst = { AF_INET }; + register struct mbuf *mm; + register struct mbuf *mn; + register struct ip *k_data; + int s; - if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || - (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { - /* - * Packet arrived via a physical interface. - */ - tunnel_src = 0; - } else { - /* - * Packet arrived through a tunnel. - * - * A tunneled packet has a single NOP option and a - * two-element loose-source-and-record-route (LSRR) - * option immediately following the fixed-size part of - * the IP header. At this point in processing, the IP - * header should contain the following IP addresses: - * - * original source - in the source address field - * destination group - in the destination address field - * remote tunnel end-point - in the first element of LSRR - * one of this host's addrs - in the second element of LSRR - * - * NOTE: RFC-1075 would have the original source and - * remote tunnel end-point addresses swapped. However, - * that could cause delivery of ICMP error messages to - * innocent applications on intermediate routing - * hosts! Therefore, we hereby change the spec. - */ - - /* - * Verify that the tunnel options are well-formed. - */ - if (ipoptions[0] != IPOPT_NOP || - ipoptions[2] != 11 || /* LSRR option length */ - ipoptions[3] != 12 || /* LSRR address pointer */ - (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) { - mrtstat.mrts_bad_tunnel++; - return (1); - } - - /* - * Delete the tunnel options from the packet. - */ - ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, - (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); - m->m_len -= TUNNEL_LEN; - ip->ip_len -= TUNNEL_LEN; - ip->ip_hl -= TUNNEL_LEN >> 2; - } + if (mrtdebug > 1) + log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x", + ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp); + if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || + (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { /* - * Don't forward a packet with time-to-live of zero or one, - * or a packet destined to a local-only group. + * Packet arrived via a physical interface. */ - if (ip->ip_ttl <= 1 || - ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) - return ((int)tunnel_src); - + tunnel_src = 0; + } else { /* - * Don't forward if we don't have a route for the packet's origin. - */ - if (!(rt = mrtfind(ip->ip_src))) { - mrtstat.mrts_no_route++; - return ((int)tunnel_src); - } - - /* - * Don't forward if it didn't arrive from the parent vif for its origin. - */ - vifi = rt->mrt_parent; - if (tunnel_src == 0 ) { - if ((viftable[vifi].v_flags & VIFF_TUNNEL) || - viftable[vifi].v_ifp != ifp ) - return ((int)tunnel_src); - } else { - if (!(viftable[vifi].v_flags & VIFF_TUNNEL) || - viftable[vifi].v_rmt_addr.s_addr != tunnel_src ) - return ((int)tunnel_src); - } - - /* - * For each vif, decide if a copy of the packet should be forwarded. - * Forward if: - * - the ttl exceeds the vif's threshold AND - * - the vif is a child in the origin's route AND - * - ( the vif is not a leaf in the origin's route OR - * the destination group has members on the vif ) + * Packet arrived through a source-route tunnel. * - * (This might be speeded up with some sort of cache -- someday.) + * A source-route tunneled packet has a single NOP option and a + * two-element + * loose-source-and-record-route (LSRR) option immediately following + * the fixed-size part of the IP header. At this point in processing, + * the IP header should contain the following IP addresses: + * + * original source - in the source address field + * destination group - in the destination address field + * remote tunnel end-point - in the first element of LSRR + * one of this host's addrs - in the second element of LSRR + * + * NOTE: RFC-1075 would have the original source and remote tunnel + * end-point addresses swapped. However, that could cause + * delivery of ICMP error messages to innocent applications + * on intermediate routing hosts! Therefore, we hereby + * change the spec. */ - for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) { - if (ip->ip_ttl > vifp->v_threshold && - VIFM_ISSET(vifi, rt->mrt_children) && - (!VIFM_ISSET(vifi, rt->mrt_leaves) || - grplst_member(vifp, ip->ip_dst))) { - if (vifp->v_flags & VIFF_TUNNEL) - tunnel_send(m, vifp); - else - phyint_send(m, vifp); - } + + /* + * Verify that the tunnel options are well-formed. + */ + if (ipoptions[0] != IPOPT_NOP || + ipoptions[2] != 11 || /* LSRR option length */ + ipoptions[3] != 12 || /* LSRR address pointer */ + (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) { + mrtstat.mrts_bad_tunnel++; + if (mrtdebug) + log(LOG_DEBUG, + "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)", + ntohl(ip->ip_src.s_addr), + ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3], + *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8])); + return 1; } - return ((int)tunnel_src); + /* + * Delete the tunnel options from the packet. + */ + ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, + (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); + m->m_len -= TUNNEL_LEN; + ip->ip_len -= TUNNEL_LEN; + ip->ip_hl -= TUNNEL_LEN >> 2; + + ifp = 0; + } + + /* + * Don't forward a packet with time-to-live of zero or one, + * or a packet destined to a local-only group. + */ + if (ip->ip_ttl <= 1 || + ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) + return (int)tunnel_src; + + /* + * Determine forwarding vifs from the forwarding cache table + */ + s = splnet(); + MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); + + /* Entry exists, so forward if necessary */ + if (rt != NULL) { + splx(s); + return (ip_mdq(m, ifp, tunnel_src, rt, imo)); + } + + else { + /* + * If we don't have a route for packet's origin, + * Make a copy of the packet & + * send message to routing daemon + */ + + register struct mbuf *mb_rt; + register struct mbuf *mb_ntry; + register struct mbuf *mb0; + register struct rtdetq *rte; + register struct mbuf *rte_m; + register u_long hash; + register struct timeval tp; + + mrtstat.mrts_no_route++; + if (mrtdebug) + log(LOG_DEBUG, "ip_mforward: no rte s %x g %x", + ntohl(ip->ip_src.s_addr), + ntohl(ip->ip_dst.s_addr)); + + /* is there an upcall waiting for this packet? */ + hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr); + for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { + rt = mtod(mb_rt, struct mfc *); + if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) == + rt->mfc_origin.s_addr) && + (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && + (mb_rt->m_act != NULL)) + break; + } + + if (mb_rt == NULL) { + /* no upcall, so make a new entry */ + MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); + if (mb_rt == NULL) { + splx(s); + return ENOBUFS; + } + + rt = mtod(mb_rt, struct mfc *); + + /* insert new entry at head of hash chain */ + rt->mfc_origin.s_addr = ip->ip_src.s_addr; + rt->mfc_originmask.s_addr = (u_long)0xffffffff; + rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; + + /* link into table */ + hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); + mb_rt->m_next = mfctable[hash]; + mfctable[hash] = mb_rt; + mb_rt->m_act = NULL; + + } + + /* determine if q has overflowed */ + for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act) + hash++; + + if (hash > MAX_UPQ) { + mrtstat.mrts_upq_ovflw++; + splx(s); + return 0; + } + + /* add this packet and timing, ifp info to m_act */ + MGET(mb_ntry, M_DONTWAIT, MT_DATA); + if (mb_ntry == NULL) { + splx(s); + return ENOBUFS; + } + + mb_ntry->m_act = NULL; + rte = mtod(mb_ntry, struct rtdetq *); + + mb0 = m_copy(m, 0, M_COPYALL); + if (mb0 == NULL) { + splx(s); + return ENOBUFS; + } + + rte->m = mb0; + rte->ifp = ifp; + rte->tunnel_src = tunnel_src; + rte->imo = imo; + + rte_m->m_act = mb_ntry; + + splx(s); + + if (hash == 0) { + /* + * Send message to routing daemon to install + * a route into the kernel table + */ + k_igmpsrc.sin_addr = ip->ip_src; + k_igmpdst.sin_addr = ip->ip_dst; + + mm = m_copy(m, 0, M_COPYALL); + if (mm == NULL) { + splx(s); + return ENOBUFS; + } + + k_data = mtod(mm, struct ip *); + k_data->ip_p = 0; + + mrtstat.mrts_upcalls++; + + raw_input(mm, &k_igmpproto, + (struct sockaddr *)&k_igmpsrc, + (struct sockaddr *)&k_igmpdst); + + /* set timer to cleanup entry if upcall is lost */ + timeout(cleanup_cache, (caddr_t)mb_rt, 100); + timeout_val++; + } + + return 0; + } +} + +/* + * Clean up the cache entry if upcall is not serviced + */ +static void +cleanup_cache(xmb_rt) + void *xmb_rt; +{ + struct mbuf *mb_rt = xmb_rt; + struct mfc *rt; + u_long hash; + struct mbuf *prev_m0; + struct mbuf *m0; + struct mbuf *m; + struct rtdetq *rte; + int s; + + rt = mtod(mb_rt, struct mfc *); + hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); + + if (mrtdebug) + log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x", + ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr), + ntohl(rt->mfc_mcastgrp.s_addr)); + + mrtstat.mrts_cache_cleanups++; + + /* + * determine entry to be cleaned up in cache table + */ + s = splnet(); + for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next) + if (m0 == mb_rt) + break; + + /* + * drop all the packets + * free the mbuf with the pkt, if, timing info + */ + while (mb_rt->m_act) { + m = mb_rt->m_act; + mb_rt->m_act = m->m_act; + + rte = mtod(m, struct rtdetq *); + m_freem(rte->m); + m_free(m); + } + + /* + * Delete the entry from the cache + */ + if (prev_m0 != m0) { /* if moved past head of list */ + MFREE(m0, prev_m0->m_next); + } else /* delete head of list, it is in the table */ + mfctable[hash] = m_free(m0); + + timeout_val--; + splx(s); +} + +/* + * Packet forwarding routine once entry in the cache is made + */ +static int +ip_mdq(m, ifp, tunnel_src, rt, imo) + register struct mbuf *m; + register struct ifnet *ifp; + register u_long tunnel_src; + register struct mfc *rt; + register struct ip_moptions *imo; +{ + register struct ip *ip = mtod(m, struct ip *); + register vifi_t vifi; + register struct vif *vifp; + + /* + * Don't forward if it didn't arrive from the parent vif for its origin. + * Notes: v_ifp is zero for src route tunnels, multicast_decap_if + * for encapsulated tunnels and a real ifnet for non-tunnels so + * the first part of the if catches wrong physical interface or + * tunnel type; v_rmt_addr is zero for non-tunneled packets so + * the 2nd part catches both packets that arrive via a tunnel + * that shouldn't and packets that arrive via the wrong tunnel. + */ + vifi = rt->mfc_parent; + if (viftable[vifi].v_ifp != ifp || + (ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { + /* came in the wrong interface */ + if (mrtdebug) + log(LOG_DEBUG, "wrong if: ifp %x vifi %d", + ifp, vifi); + ++mrtstat.mrts_wrong_if; + return (int)tunnel_src; + } + + /* increment the interface and s-g counters */ + viftable[vifi].v_pkt_in++; + rt->mfc_pkt_cnt++; + + /* + * For each vif, decide if a copy of the packet should be forwarded. + * Forward if: + * - the ttl exceeds the vif's threshold + * - there are group members downstream on interface + */ +#define MC_SEND(ip,vifp,m) { \ + (vifp)->v_pkt_out++; \ + if ((vifp)->v_flags & VIFF_SRCRT) \ + srcrt_send((ip), (vifp), (m)); \ + else if ((vifp)->v_flags & VIFF_TUNNEL) \ + encap_send((ip), (vifp), (m)); \ + else \ + phyint_send((ip), (vifp), (m)); \ + } + +/* If no options or the imo_multicast_vif option is 0, don't do this part + */ + if ((imo != NULL) && + (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/) + { + MC_SEND(ip,viftable+vifi,m); + return (1); /* make sure we are done: No more physical sends */ + } + + for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) + if ((rt->mfc_ttls[vifi] > 0) && + (ip->ip_ttl > rt->mfc_ttls[vifi])) + MC_SEND(ip, vifp, m); + + return 0; +} + +/* check if a vif number is legal/ok. This is used by ip_output, to export + * numvifs there, + */ +int +legal_vif_num(vif) + int vif; +{ if (vif>=0 && vif<=numvifs) + return(1); + else + return(0); } static void -phyint_send(m, vifp) - register struct mbuf *m; - register struct vif *vifp; +phyint_send(ip, vifp, m) + struct ip *ip; + struct vif *vifp; + struct mbuf *m; { - register struct ip *ip = mtod(m, struct ip *); - register struct mbuf *mb_copy; - register struct ip_moptions *imo; - register int error; - struct ip_moptions simo; + register struct mbuf *mb_copy; + register struct mbuf *mopts; + register struct ip_moptions *imo; - mb_copy = m_copy(m, 0, M_COPYALL); - if (mb_copy == NULL) - return; + if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) + return; - imo = &simo; - imo->imo_multicast_ifp = vifp->v_ifp; - imo->imo_multicast_ttl = ip->ip_ttl - 1; - imo->imo_multicast_loop = 1; + MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT); + if (imo == NULL) { + m_freem(mb_copy); + return; + } - error = ip_output(mb_copy, NULL, NULL, IP_FORWARDING, imo); + imo->imo_multicast_ifp = vifp->v_ifp; + imo->imo_multicast_ttl = ip->ip_ttl - 1; + imo->imo_multicast_loop = 1; + + if (vifp->v_rate_limit <= 0) + tbf_send_packet(vifp, mb_copy, imo); + else + tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len, + imo); } static void -tunnel_send(m, vifp) - register struct mbuf *m; - register struct vif *vifp; +srcrt_send(ip, vifp, m) + struct ip *ip; + struct vif *vifp; + struct mbuf *m; { - register struct ip *ip = mtod(m, struct ip *); - register struct mbuf *mb_copy, *mb_opts; - register struct ip *ip_copy; - register int error; - register u_char *cp; + struct mbuf *mb_copy, *mb_opts; + register struct ip *ip_copy; + u_char *cp; - /* - * Make sure that adding the tunnel options won't exceed the - * maximum allowed number of option bytes. - */ - if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { - mrtstat.mrts_cant_tunnel++; - return; - } + /* + * Make sure that adding the tunnel options won't exceed the + * maximum allowed number of option bytes. + */ + if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { + mrtstat.mrts_cant_tunnel++; + if (mrtdebug) + log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u", + ntohl(ip->ip_src.s_addr)); + return; + } - /* - * Get a private copy of the IP header so that changes to some - * of the IP fields don't damage the original header, which is - * examined later in ip_input.c. - */ - mb_copy = m_copy(m, IP_HDR_LEN, M_COPYALL); - if (mb_copy == NULL) - return; - MGETHDR(mb_opts, M_DONTWAIT, MT_HEADER); - if (mb_opts == NULL) { - m_freem(mb_copy); - return; - } - /* - * Make mb_opts be the new head of the packet chain. - * Any options of the packet were left in the old packet chain head - */ - mb_opts->m_next = mb_copy; - mb_opts->m_len = IP_HDR_LEN + TUNNEL_LEN; - mb_opts->m_data += MSIZE - mb_opts->m_len; + if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) + return; - ip_copy = mtod(mb_opts, struct ip *); - /* - * Copy the base ip header to the new head mbuf. - */ - *ip_copy = *ip; - ip_copy->ip_ttl--; - ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ - /* - * Adjust the ip header length to account for the tunnel options. - */ - ip_copy->ip_hl += TUNNEL_LEN >> 2; - ip_copy->ip_len += TUNNEL_LEN; - /* - * Add the NOP and LSRR after the base ip header - */ - cp = (u_char *)(ip_copy + 1); - *cp++ = IPOPT_NOP; - *cp++ = IPOPT_LSRR; - *cp++ = 11; /* LSRR option length */ - *cp++ = 8; /* LSSR pointer to second element */ - *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ - cp += 4; - *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */ + ip_copy = mtod(mb_copy, struct ip *); + ip_copy->ip_ttl--; + ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ + /* + * Adjust the ip header length to account for the tunnel options. + */ + ip_copy->ip_hl += TUNNEL_LEN >> 2; + ip_copy->ip_len += TUNNEL_LEN; + MGET(mb_opts, M_DONTWAIT, MT_HEADER); + if (mb_opts == NULL) { + m_freem(mb_copy); + return; + } + /* + * 'Delete' the base ip header from the mb_copy chain + */ + mb_copy->m_len -= IP_HDR_LEN; + mb_copy->m_data += IP_HDR_LEN; + /* + * Make mb_opts be the new head of the packet chain. + * Any options of the packet were left in the old packet chain head + */ + mb_opts->m_next = mb_copy; + mb_opts->m_data += 16; + mb_opts->m_len = IP_HDR_LEN + TUNNEL_LEN; + /* + * Copy the base ip header from the mb_copy chain to the new head mbuf + */ + bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), IP_HDR_LEN); + /* + * Add the NOP and LSRR after the base ip header + */ + cp = mtod(mb_opts, u_char *) + IP_HDR_LEN; + *cp++ = IPOPT_NOP; + *cp++ = IPOPT_LSRR; + *cp++ = 11; /* LSRR option length */ + *cp++ = 8; /* LSSR pointer to second element */ + *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ + cp += 4; + *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */ - error = ip_output(mb_opts, NULL, NULL, IP_FORWARDING, NULL); + if (vifp->v_rate_limit <= 0) + tbf_send_packet(vifp, mb_opts, 0); + else + tbf_control(vifp, mb_opts, + mtod(mb_opts, struct ip *), ip_copy->ip_len, 0); } + +static void +encap_send(ip, vifp, m) + register struct ip *ip; + register struct vif *vifp; + register struct mbuf *m; +{ + register struct mbuf *mb_copy; + register struct ip *ip_copy; + register int i, len = ip->ip_len; + + /* + * copy the old packet & pullup it's IP header into the + * new mbuf so we can modify it. Try to fill the new + * mbuf since if we don't the ethernet driver will. + */ + MGET(mb_copy, M_DONTWAIT, MT_DATA); + if (mb_copy == NULL) + return; + mb_copy->m_data += 16; + mb_copy->m_len = sizeof(multicast_encap_iphdr); + + if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { + m_freem(mb_copy); + return; + } + i = MHLEN - M_LEADINGSPACE(mb_copy); + if (i > len) + i = len; + mb_copy = m_pullup(mb_copy, i); + if (mb_copy == NULL) + return; + + /* + * fill in the encapsulating IP header. + */ + ip_copy = mtod(mb_copy, struct ip *); + *ip_copy = multicast_encap_iphdr; + ip_copy->ip_id = htons(ip_id++); + ip_copy->ip_len += len; + ip_copy->ip_src = vifp->v_lcl_addr; + ip_copy->ip_dst = vifp->v_rmt_addr; + + /* + * turn the encapsulated IP header back into a valid one. + */ + ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); + --ip->ip_ttl; + HTONS(ip->ip_len); + HTONS(ip->ip_off); + ip->ip_sum = 0; +#if defined(LBL) && !defined(ultrix) + ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0); +#else + mb_copy->m_data += sizeof(multicast_encap_iphdr); + ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); + mb_copy->m_data -= sizeof(multicast_encap_iphdr); #endif + + if (vifp->v_rate_limit <= 0) + tbf_send_packet(vifp, mb_copy, 0); + else + tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0); +} + +/* + * De-encapsulate a packet and feed it back through ip input (this + * routine is called whenever IP gets a packet with proto type + * ENCAP_PROTO and a local destination address). + */ +void +multiencap_decap(m) + register struct mbuf *m; +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + register struct ip *ip = mtod(m, struct ip *); + register int hlen = ip->ip_hl << 2; + register int s; + register struct ifqueue *ifq; + register struct vif *vifp; + + if (ip->ip_p != ENCAP_PROTO) { + rip_input(m); + return; + } + /* + * dump the packet if it's not to a multicast destination or if + * we don't have an encapsulating tunnel with the source. + * Note: This code assumes that the remote site IP address + * uniquely identifies the tunnel (i.e., that this site has + * at most one tunnel with the remote site). + */ + if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { + ++mrtstat.mrts_bad_tunnel; + m_freem(m); + return; + } + if (ip->ip_src.s_addr != last_encap_src) { + register struct vif *vife; + + vifp = viftable; + vife = vifp + numvifs; + last_encap_src = ip->ip_src.s_addr; + last_encap_vif = 0; + for ( ; vifp < vife; ++vifp) + if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { + if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) + == VIFF_TUNNEL) + last_encap_vif = vifp; + break; + } + } + if ((vifp = last_encap_vif) == 0) { + last_encap_src = 0; + mrtstat.mrts_cant_tunnel++; /*XXX*/ + m_freem(m); + if (mrtdebug) + log(LOG_DEBUG, "ip_mforward: no tunnel with %u", + ntohl(ip->ip_src.s_addr)); + return; + } + ifp = vifp->v_ifp; + hlen -= sizeof(struct ifnet *); + m->m_data += hlen; + m->m_len -= hlen; + *(mtod(m, struct ifnet **)) = ifp; + ifq = &ipintrq; + s = splimp(); + if (IF_QFULL(ifq)) { + IF_DROP(ifq); + m_freem(m); + } else { + IF_ENQUEUE(ifq, m); + /* + * normally we would need a "schednetisr(NETISR_IP)" + * here but we were called by ip_input and it is going + * to loop back & try to dequeue the packet we just + * queued as soon as we return so we avoid the + * unnecessary software interrrupt. + */ + } + splx(s); +} + +/* + * Token bucket filter module + */ +void +tbf_control(vifp, m, ip, p_len, imo) + register struct vif *vifp; + register struct mbuf *m; + register struct ip *ip; + register u_long p_len; + struct ip_moptions *imo; +{ + tbf_update_tokens(vifp); + + /* if there are enough tokens, + * and the queue is empty, + * send this packet out + */ + + if (vifp->v_tbf->q_len == 0) { + if (p_len <= vifp->v_tbf->n_tok) { + vifp->v_tbf->n_tok -= p_len; + tbf_send_packet(vifp, m, imo); + } else if (p_len > MAX_BKT_SIZE) { + /* drop if packet is too large */ + mrtstat.mrts_pkt2large++; + m_freem(m); + return; + } else { + /* queue packet and timeout till later */ + tbf_queue(vifp, m, ip, imo); + timeout(tbf_reprocess_q, (caddr_t)vifp, 1); + } + } else if (vifp->v_tbf->q_len < MAXQSIZE) { + /* finite queue length, so queue pkts and process queue */ + tbf_queue(vifp, m, ip, imo); + tbf_process_q(vifp); + } else { + /* queue length too much, try to dq and queue and process */ + if (!tbf_dq_sel(vifp, ip)) { + mrtstat.mrts_q_overflow++; + m_freem(m); + return; + } else { + tbf_queue(vifp, m, ip, imo); + tbf_process_q(vifp); + } + } + return; +} + +/* + * adds a packet to the queue at the interface + */ +void +tbf_queue(vifp, m, ip, imo) + register struct vif *vifp; + register struct mbuf *m; + register struct ip *ip; + struct ip_moptions *imo; +{ + register u_long ql; + register int index = (vifp - viftable); + register int s = splnet(); + + ql = vifp->v_tbf->q_len; + + qtable[index][ql].pkt_m = m; + qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len; + qtable[index][ql].pkt_ip = ip; + qtable[index][ql].pkt_imo = imo; + + vifp->v_tbf->q_len++; + splx(s); +} + + +/* + * processes the queue at the interface + */ +void +tbf_process_q(vifp) + register struct vif *vifp; +{ + register struct mbuf *m; + register struct pkt_queue pkt_1; + register int index = (vifp - viftable); + register int s = splnet(); + + /* loop through the queue at the interface and send as many packets + * as possible + */ + while (vifp->v_tbf->q_len > 0) { + /* locate the first packet */ + pkt_1.pkt_len = ((qtable[index][0]).pkt_len); + pkt_1.pkt_m = (qtable[index][0]).pkt_m; + pkt_1.pkt_ip = (qtable[index][0]).pkt_ip; + pkt_1.pkt_imo = (qtable[index][0]).pkt_imo; + + /* determine if the packet can be sent */ + if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) { + /* if so, + * reduce no of tokens, dequeue the queue, + * send the packet. + */ + vifp->v_tbf->n_tok -= pkt_1.pkt_len; + + tbf_dequeue(vifp, 0); + + tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo); + + } else break; + } + splx(s); +} + +/* + * removes the jth packet from the queue at the interface + */ +void +tbf_dequeue(vifp,j) + register struct vif *vifp; + register int j; +{ + register u_long index = vifp - viftable; + register int i; + + for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) { + qtable[index][i-1].pkt_m = qtable[index][i].pkt_m; + qtable[index][i-1].pkt_len = qtable[index][i].pkt_len; + qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip; + qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo; + } + qtable[index][i-1].pkt_m = NULL; + qtable[index][i-1].pkt_len = NULL; + qtable[index][i-1].pkt_ip = NULL; + qtable[index][i-1].pkt_imo = NULL; + + vifp->v_tbf->q_len--; + + if (tbfdebug > 1) + log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1); +} + +void +tbf_reprocess_q(xvifp) + void *xvifp; +{ + register struct vif *vifp = xvifp; + if (ip_mrouter == NULL) + return; + + tbf_update_tokens(vifp); + + tbf_process_q(vifp); + + if (vifp->v_tbf->q_len) + timeout(tbf_reprocess_q, (caddr_t)vifp, 1); +} + +/* function that will selectively discard a member of the queue + * based on the precedence value and the priority obtained through + * a lookup table - not yet implemented accurately! + */ +int +tbf_dq_sel(vifp, ip) + register struct vif *vifp; + register struct ip *ip; +{ + register int i; + register int s = splnet(); + register u_int p; + + p = priority(vifp, ip); + + for(i=vifp->v_tbf->q_len-1;i >= 0;i--) { + if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) { + m_freem(qtable[vifp-viftable][i].pkt_m); + tbf_dequeue(vifp,i); + splx(s); + mrtstat.mrts_drop_sel++; + return(1); + } + } + splx(s); + return(0); +} + +void +tbf_send_packet(vifp, m, imo) + register struct vif *vifp; + register struct mbuf *m; + struct ip_moptions *imo; +{ + register struct mbuf *mcp; + int error; + int s = splnet(); + + /* if source route tunnels */ + if (vifp->v_flags & VIFF_SRCRT) { + error = ip_output(m, (struct mbuf *)0, (struct route *)0, + IP_FORWARDING, imo); + if (mrtdebug > 1) + log(LOG_DEBUG, "srcrt_send on vif %d err %d", vifp-viftable, error); + } else if (vifp->v_flags & VIFF_TUNNEL) { + /* If tunnel options */ + ip_output(m, (struct mbuf *)0, (struct route *)0, + IP_FORWARDING, imo); + } else { + /* if physical interface option, extract the options and then send */ + error = ip_output(m, (struct mbuf *)0, (struct route *)0, + IP_FORWARDING, imo); + FREE(imo, M_IPMOPTS); + + if (mrtdebug > 1) + log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error); + } + splx(s); +} + +/* determine the current time and then + * the elapsed time (between the last time and time now) + * in milliseconds & update the no. of tokens in the bucket + */ +void +tbf_update_tokens(vifp) + register struct vif *vifp; +{ + struct timeval tp; + register u_long t; + register u_long elapsed; + register int s = splnet(); + + GET_TIME(tp); + + t = tp.tv_sec*1000 + tp.tv_usec/1000; + + elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8; + vifp->v_tbf->n_tok += elapsed; + vifp->v_tbf->last_pkt_t = t; + + if (vifp->v_tbf->n_tok > MAX_BKT_SIZE) + vifp->v_tbf->n_tok = MAX_BKT_SIZE; + + splx(s); +} + +static int +priority(vifp, ip) + register struct vif *vifp; + register struct ip *ip; +{ + register u_long graddr; + register int prio; + + /* temporary hack; will add general packet classifier some day */ + + prio = 50; /* default priority */ + + /* check for source route options and add option length to get dst */ + if (vifp->v_flags & VIFF_SRCRT) + graddr = ntohl((ip+8)->ip_dst.s_addr); + else + graddr = ntohl(ip->ip_dst.s_addr); + + switch (graddr & 0xf) { + case 0x0: break; + case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */ + break; + case 0x2: break; + case 0x3: break; + case 0x4: break; + case 0x5: break; + case 0x6: break; + case 0x7: break; + case 0x8: break; + case 0x9: break; + case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */ + break; + case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */ + break; + case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */ + break; + case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */ + break; + case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */ + break; + case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */ + break; + } + + if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d", graddr, prio); + + return prio; +} + +/* + * End of token bucket filter modifications + */ +#endif + + diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h index 6f0382a08c7b..304b3ceebf1f 100644 --- a/sys/netinet/ip_mroute.h +++ b/sys/netinet/ip_mroute.h @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * @(#)ip_mroute.h 8.1 (Berkeley) 6/10/93 - * $Id: ip_mroute.h,v 1.2 1994/08/02 07:48:42 davidg Exp $ + * $Id: ip_mroute.h,v 1.3 1994/08/21 05:27:32 paul Exp $ */ #ifndef _NETINET_IP_MROUTE_H_ @@ -48,23 +48,24 @@ * * Written by David Waitzman, BBN Labs, August 1988. * Modified by Steve Deering, Stanford, February 1989. + * Modified by Ajit Thyagarajan, PARC, August 1993. + * Modified by Ajit Thyagarajan, PARC, August 1994. * - * MROUTING 1.0 + * MROUTING 1.5 */ /* * DVMRP-specific setsockopt commands. */ -#define DVMRP_INIT 100 -#define DVMRP_DONE 101 -#define DVMRP_ADD_VIF 102 -#define DVMRP_DEL_VIF 103 -#define DVMRP_ADD_LGRP 104 -#define DVMRP_DEL_LGRP 105 -#define DVMRP_ADD_MRT 106 -#define DVMRP_DEL_MRT 107 +#define DVMRP_INIT 100 /* initialize forwarder */ +#define DVMRP_DONE 101 /* shut down forwarder */ +#define DVMRP_ADD_VIF 102 /* create virtual interface */ +#define DVMRP_DEL_VIF 103 /* delete virtual interface */ +#define DVMRP_ADD_MFC 104 /* insert forwarding cache entry */ +#define DVMRP_DEL_MFC 105 /* delete forwarding cache entry */ +#define GET_TIME(t) microtime(&t) /* * Types and macros for handling bitmaps with one bit per virtual interface. @@ -82,97 +83,172 @@ typedef u_short vifi_t; /* type of a vif index */ /* - * Agument structure for DVMRP_ADD_VIF. + * Argument structure for DVMRP_ADD_VIF. * (DVMRP_DEL_VIF takes a single vifi_t argument.) */ struct vifctl { vifi_t vifc_vifi; /* the index of the vif to be added */ u_char vifc_flags; /* VIFF_ flags defined below */ u_char vifc_threshold; /* min ttl required to forward on vif */ + u_int vifc_rate_limit; /* max tate */ struct in_addr vifc_lcl_addr; /* local interface address */ struct in_addr vifc_rmt_addr; /* remote address (tunnels only) */ }; #define VIFF_TUNNEL 0x1 /* vif represents a tunnel end-point */ - +#define VIFF_SRCRT 0x2 /* tunnel uses IP source routing */ /* - * Argument structure for DVMRP_ADD_LGRP and DVMRP_DEL_LGRP. + * Argument structure for DVMRP_ADD_MFC + * (mfcc_tos to be added at a future point) */ -struct lgrplctl { - vifi_t lgc_vifi; - struct in_addr lgc_gaddr; -}; - - -/* - * Argument structure for DVMRP_ADD_MRT. - * (DVMRP_DEL_MRT takes a single struct in_addr argument, containing origin.) - */ -struct mrtctl { - struct in_addr mrtc_origin; /* subnet origin of multicasts */ - struct in_addr mrtc_originmask; /* subnet mask for origin */ - vifi_t mrtc_parent; /* incoming vif */ - vifbitmap_t mrtc_children; /* outgoing children vifs */ - vifbitmap_t mrtc_leaves; /* subset of outgoing children vifs */ -}; - - -#ifdef KERNEL - -/* - * The kernel's virtual-interface structure. - */ -struct vif { - u_char v_flags; /* VIFF_ flags defined above */ - u_char v_threshold; /* min ttl required to forward on vif */ - struct in_addr v_lcl_addr; /* local interface address */ - struct in_addr v_rmt_addr; /* remote address (tunnels only) */ - struct ifnet *v_ifp; /* pointer to interface */ - struct in_addr *v_lcl_grps; /* list of local grps (phyints only) */ - int v_lcl_grps_max; /* malloc'ed number of v_lcl_grps */ - int v_lcl_grps_n; /* used number of v_lcl_grps */ - u_long v_cached_group; /* last grp looked-up (phyints only) */ - int v_cached_result; /* last look-up result (phyints only) */ +struct mfcctl { + struct in_addr mfcc_origin; /* subnet origin of mcasts */ + struct in_addr mfcc_mcastgrp; /* multicast group associated*/ + struct in_addr mfcc_originmask; /* subnet mask for origin */ + vifi_t mfcc_parent; /* incoming vif */ + u_char mfcc_ttls[MAXVIFS]; /* forwarding ttls on vifs */ }; /* - * The kernel's multicast route structure. + * Argument structure for DVMRP_DEL_MFC */ -struct mrt { - struct in_addr mrt_origin; /* subnet origin of multicasts */ - struct in_addr mrt_originmask; /* subnet mask for origin */ - vifi_t mrt_parent; /* incoming vif */ - vifbitmap_t mrt_children; /* outgoing children vifs */ - vifbitmap_t mrt_leaves; /* subset of outgoing children vifs */ - struct mrt *mrt_next; /* forward link */ +struct delmfcctl { + struct in_addr mfcc_origin; /* subnet origin of multicasts */ + struct in_addr mfcc_mcastgrp; /* multicast group assoc. w/ origin */ }; +/* + * Argument structure used by RSVP daemon to get vif information + */ +struct vif_req { + u_char v_flags; /* VIFF_ flags defined above */ + u_char v_threshold; /* min ttl required to forward on vif */ + struct in_addr v_lcl_addr; /* local interface address */ + struct in_addr v_rmt_addr; + char v_if_name[IFNAMSIZ]; /* if name */ +}; -#define MRTHASHSIZ 64 -#if (MRTHASHSIZ & (MRTHASHSIZ - 1)) == 0 /* from sys:route.h */ -#define MRTHASHMOD(h) ((h) & (MRTHASHSIZ - 1)) -#else -#define MRTHASHMOD(h) ((h) % MRTHASHSIZ) -#endif +struct vif_conf { + u_int vifc_len; + u_int vifc_num; + struct vif_req *vifc_req; +}; /* * The kernel's multicast routing statistics. */ struct mrtstat { - u_long mrts_mrt_lookups; /* # multicast route lookups */ - u_long mrts_mrt_misses; /* # multicast route cache misses */ - u_long mrts_grp_lookups; /* # group address lookups */ - u_long mrts_grp_misses; /* # group address cache misses */ - u_long mrts_no_route; /* no route for packet's origin */ - u_long mrts_bad_tunnel; /* malformed tunnel options */ - u_long mrts_cant_tunnel; /* no room for tunnel options */ + u_long mrts_mfc_lookups; /* # forw. cache hash table hits */ + u_long mrts_mfc_misses; /* # forw. cache hash table misses */ + u_long mrts_upcalls; /* # calls to mrouted */ + u_long mrts_no_route; /* no route for packet's origin */ + u_long mrts_bad_tunnel; /* malformed tunnel options */ + u_long mrts_cant_tunnel; /* no room for tunnel options */ + u_long mrts_wrong_if; /* arrived on wrong interface */ + u_long mrts_upq_ovflw; /* upcall Q overflow */ + u_long mrts_cache_cleanups; /* # entries with no upcalls */ + u_long mrts_drop_sel; /* pkts dropped selectively */ + u_long mrts_q_overflow; /* pkts dropped - Q overflow */ + u_long mrts_pkt2large; /* pkts dropped - size > BKT SIZE */ }; +/* + * Argument structure used by mrouted to get src-grp pkt counts + */ +struct sioc_sg_req { + struct in_addr src; + struct in_addr grp; + u_long count; +}; + +/* + * Argument structure used by mrouted to get vif pkt counts + */ +struct sioc_vif_req { + vifi_t vifi; + u_long icount; + u_long ocount; +}; + + +#ifdef KERNEL + +struct vif { + u_char v_flags; /* VIFF_ flags defined above */ + u_char v_threshold; /* min ttl required to forward on vif*/ + u_int v_rate_limit; /* max rate */ + struct tbf *v_tbf; /* token bucket structure at intf. */ + struct in_addr v_lcl_addr; /* local interface address */ + struct in_addr v_rmt_addr; /* remote address (tunnels only) */ + struct ifnet *v_ifp; /* pointer to interface */ + u_long v_pkt_in; /* # pkts in on interface */ + u_long v_pkt_out; /* # pkts out on interface */ +}; + +/* + * The kernel's multicast forwarding cache entry structure + * (A field for the type of service (mfc_tos) is to be added + * at a future point) + */ +struct mfc { + struct in_addr mfc_origin; /* subnet origin of mcasts */ + struct in_addr mfc_mcastgrp; /* multicast group associated*/ + struct in_addr mfc_originmask; /* subnet mask for origin */ + vifi_t mfc_parent; /* incoming vif */ + u_char mfc_ttls[MAXVIFS]; /* forwarding ttls on vifs */ + u_long mfc_pkt_cnt; /* pkt count for src-grp */ +}; + +/* + * Argument structure used for pkt info. while upcall is made + */ +struct rtdetq { + struct mbuf *m; + struct ifnet *ifp; + u_long tunnel_src; + struct ip_moptions *imo; +}; + +#define MFCTBLSIZ 256 +#if (MFCTBLSIZ & (MFCTBLSIZ - 1)) == 0 /* from sys:route.h */ +#define MFCHASHMOD(h) ((h) & (MFCTBLSIZ - 1)) +#else +#define MFCHASHMOD(h) ((h) % MFCTBLSIZ) +#endif + +#define MAX_UPQ 4 /* max. no of pkts in upcall Q */ + +/* + * Token Bucket filter code + */ +#define MAX_BKT_SIZE 10000 /* 10K bytes size */ +#define MAXQSIZE 10 /* max # of pkts in queue */ + +/* + * queue structure at each vif + */ +struct pkt_queue +{ + u_long pkt_len; /* length of packet in queue */ + struct mbuf *pkt_m; /* pointer to packet mbuf */ + struct ip *pkt_ip; /* pointer to ip header */ + struct ip_moptions *pkt_imo; /* IP multicast options assoc. with pkt */ +}; + +/* + * the token bucket filter at each vif + */ +struct tbf +{ + u_long last_pkt_t; /* arr. time of last pkt */ + u_long n_tok; /* no of tokens in bucket */ + u_long q_len; /* length of queue at this vif */ +}; int ip_mrouter_cmd __P((int, struct socket *, struct mbuf *)); int ip_mrouter_done __P((void)); #endif /* KERNEL */ -#endif +#endif /* _NETINET_IP_MROUTE_H_ */ diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 994cc980f5cc..57ec677656d8 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 - * $Id: ip_output.c,v 1.4 1994/08/02 07:48:45 davidg Exp $ + * $Id: ip_output.c,v 1.5 1994/08/18 22:35:31 wollman Exp $ */ #include @@ -225,9 +225,16 @@ ip_output(m0, opt, ro, flags, imo) * above, will be forwarded by the ip_input() routine, * if necessary. */ - extern struct socket *ip_mrouter; if (ip_mrouter && (flags & IP_FORWARDING) == 0) { - if (ip_mforward(m, ifp) != 0) { + /* + * Check if rsvp daemon is running. If not, don't + * set ip_moptions. This ensures that the packet + * is multicast and not just sent down one link + * as prescribed by rsvpd. + */ + if (ip_rsvpd == NULL) + imo = NULL; + if (ip_mforward(ip, ifp, m, imo) != 0) { m_freem(m); goto done; } @@ -557,6 +564,7 @@ ip_ctloutput(op, so, level, optname, mp) #undef OPTSET case IP_MULTICAST_IF: + case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: case IP_ADD_MEMBERSHIP: @@ -620,6 +628,7 @@ ip_ctloutput(op, so, level, optname, mp) break; case IP_MULTICAST_IF: + case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: case IP_ADD_MEMBERSHIP: @@ -774,6 +783,7 @@ ip_setmoptions(optname, imop, m) return (ENOBUFS); *imop = imo; imo->imo_multicast_ifp = NULL; + imo->imo_multicast_vif = 0; imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; imo->imo_num_memberships = 0; @@ -781,6 +791,20 @@ ip_setmoptions(optname, imop, m) switch (optname) { + /* store an index number for the vif you wanna use in the send */ + case IP_MULTICAST_VIF: + if (m == NULL || m->m_len != sizeof(int)) { + error = EINVAL; + break; + } + i = *(mtod(m, int *)); + if (!legal_vif_num(i)) { + error = EINVAL; + break; + } + imo->imo_multicast_vif = i; + break; + case IP_MULTICAST_IF: /* * Select the interface for outgoing multicast packets. @@ -972,6 +996,7 @@ ip_setmoptions(optname, imop, m) * If all options have default values, no need to keep the mbuf. */ if (imo->imo_multicast_ifp == NULL && + imo->imo_multicast_vif == 0 && imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && imo->imo_num_memberships == 0) { @@ -1000,6 +1025,14 @@ ip_getmoptions(optname, imo, mp) switch (optname) { + case IP_MULTICAST_VIF: + if (imo != NULL) + *(mtod(*mp, int *)) = imo->imo_multicast_vif; + else + *(mtod(*mp, int *)) = 7890; + (*mp)->m_len = sizeof(int); + return(0); + case IP_MULTICAST_IF: addr = mtod(*mp, struct in_addr *); (*mp)->m_len = sizeof(struct in_addr); diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 6c83c4ff417d..3e8db9640335 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ip_var.h 8.1 (Berkeley) 6/10/93 - * $Id: ip_var.h,v 1.3 1994/08/18 22:35:31 wollman Exp $ + * $Id: ip_var.h,v 1.4 1994/08/21 05:27:33 paul Exp $ */ #ifndef _NETINET_IP_VAR_H_ @@ -111,6 +111,7 @@ struct ipoption { */ struct ip_moptions { struct ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */ + u_long imo_multicast_vif; /* vif num outgoing multicasts */ u_char imo_multicast_ttl; /* TTL for outgoing multicasts */ u_char imo_multicast_loop; /* 1 => hear sends if a member */ u_short imo_num_memberships; /* no. memberships this socket */ @@ -155,6 +156,8 @@ extern struct ipstat ipstat; extern struct ipq ipq; /* ip reass. queue */ extern u_short ip_id; /* ip packet ctr, for ids */ extern int ip_defttl; /* default IP ttl */ +extern struct socket *ip_rsvpd; /* reservation protocol daemon */ +extern struct socket *ip_mrouter; /* multicast routing daemon */ int ip_ctloutput __P((int, struct socket *, int, int, struct mbuf **)); void ip_deq __P((struct ipasfrag *)); @@ -166,7 +169,8 @@ void ip_freef __P((struct ipq *)); void ip_freemoptions __P((struct ip_moptions *)); int ip_getmoptions __P((int, struct ip_moptions *, struct mbuf **)); void ip_init __P((void)); -int ip_mforward __P((struct mbuf *, struct ifnet *)); +int ip_mforward __P((struct ip *, struct ifnet *, struct mbuf *, + struct ip_moptions *)); int ip_optcopy __P((struct ip *, struct ip *)); int ip_output __P((struct mbuf *, struct mbuf *, struct route *, int, struct ip_moptions *)); diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 86c2b5a618e7..e0cddca4fe42 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)raw_ip.c 8.2 (Berkeley) 1/4/94 - * $Id$ + * $Id: raw_ip.c,v 1.2 1994/08/02 07:48:49 davidg Exp $ */ #include @@ -203,14 +203,20 @@ rip_ctloutput(op, so, level, optname, m) } break; + case IP_RSVP_ON: + error = ip_rsvp_init(so); + break; + + case IP_RSVP_OFF: + error = ip_rsvp_done(); + break; + case DVMRP_INIT: case DVMRP_DONE: case DVMRP_ADD_VIF: case DVMRP_DEL_VIF: - case DVMRP_ADD_LGRP: - case DVMRP_DEL_LGRP: - case DVMRP_ADD_MRT: - case DVMRP_DEL_MRT: + case DVMRP_ADD_MFC: + case DVMRP_DEL_MFC: #ifdef MROUTING if (op == PRCO_SETOPT) { error = ip_mrouter_cmd(optname, so, *m); @@ -240,9 +246,6 @@ rip_usrreq(so, req, m, nam, control) { register int error = 0; register struct inpcb *inp = sotoinpcb(so); -#ifdef MROUTING - extern struct socket *ip_mrouter; -#endif switch (req) { case PRU_ATTACH: diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h index b253044fc0d1..d6569de704fb 100644 --- a/sys/sys/sockio.h +++ b/sys/sys/sockio.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)sockio.h 8.1 (Berkeley) 3/28/94 - * $Id: sockio.h,v 1.2 1994/08/02 07:53:37 davidg Exp $ + * $Id: sockio.h,v 1.3 1994/08/08 10:49:26 davidg Exp $ */ #ifndef _SYS_SOCKIO_H_ @@ -50,6 +50,11 @@ #define SIOCADDRT _IOW('r', 10, struct ortentry) /* add route */ #define SIOCDELRT _IOW('r', 11, struct ortentry) /* delete route */ +#define SIOCSETRTINFO _IOWR('r', 12, struct fullrtentry) /* change aux info */ +#define SIOCGETRTINFO _IOWR('r', 13, struct fullrtentry) /* read aux info */ +#define SIOCGETVIFINF _IOWR('r', 14, struct vif_conf) /* read m/c vifs */ +#define SIOCGETVIFCNT _IOWR('r', 15, struct sioc_vif_req)/* get vif pkt cnt */ +#define SIOCGETSGCNT _IOWR('r', 16, struct sioc_sg_req) /* get s,g pkt cnt */ #define SIOCSIFADDR _IOW('i', 12, struct ifreq) /* set ifnet address */ #define OSIOCGIFADDR _IOWR('i', 13, struct ifreq) /* get ifnet address */