From bc29160df3d0b3a65d9b85a63a4589f1f9652e6b Mon Sep 17 00:00:00 2001 From: Marko Zec Date: Mon, 8 Jun 2009 17:15:40 +0000 Subject: [PATCH] Introduce an infrastructure for dismantling vnet instances. Vnet modules and protocol domains may now register destructor functions to clean up and release per-module state. The destructor mechanisms can be triggered by invoking "vimage -d", or a future equivalent command which will be provided via the new jail framework. While this patch introduces numerous placeholder destructor functions, many of those are currently incomplete, thus leaking memory or (even worse) failing to stop all running timers. Many of such issues are already known and will be incrementaly fixed over the next weeks in smaller incremental commits. Apart from introducing new fields in structs ifnet, domain, protosw and vnet_net, which requires the kernel and modules to be rebuilt, this change should have no impact on nooptions VIMAGE builds, since vnet destructors can only be called in VIMAGE kernels. Moreover, destructor functions should be in general compiled in only in options VIMAGE builds, except for kernel modules which can be safely kldunloaded at run time. Bump __FreeBSD_version to 800097. Reviewed by: bz, julian Approved by: rwatson, kib (re), julian (mentor) --- UPDATING | 5 +++ sys/kern/kern_vimage.c | 73 +++++++++++++++++++++++++++++++++---- sys/kern/uipc_domain.c | 28 +++++++++++++- sys/net/if.c | 26 ++++++++++++- sys/net/if_gif.c | 8 ++-- sys/net/if_loop.c | 35 +++++++++++++++--- sys/net/if_var.h | 4 ++ sys/net/route.c | 41 ++++++++++++++++++++- sys/net/vnet.h | 2 + sys/netgraph/ng_base.c | 45 +++++++++++++++++++++-- sys/netinet/in_proto.c | 13 +++++++ sys/netinet/in_rmx.c | 14 +++++++ sys/netinet/ip_var.h | 3 ++ sys/netinet/raw_ip.c | 13 +++++++ sys/netinet/tcp_hostcache.c | 12 ++++++ sys/netinet/tcp_subr.c | 19 ++++++++++ sys/netinet/tcp_syncache.c | 13 +++++++ sys/netinet/tcp_syncache.h | 3 ++ sys/netinet/tcp_timewait.c | 14 +++++++ sys/netinet/tcp_var.h | 9 +++++ sys/netinet/udp_usrreq.c | 14 +++++++ sys/netinet/udp_var.h | 3 ++ sys/netinet6/in6_proto.c | 9 +++++ sys/netinet6/in6_rmx.c | 15 ++++++++ sys/netinet6/ip6_input.c | 11 ++++++ sys/netinet6/ip6_var.h | 3 ++ sys/netinet6/ip6protosw.h | 2 + sys/netinet6/nd6.c | 23 +++++++----- sys/netinet6/nd6.h | 3 ++ sys/netipsec/ipsec.c | 20 +++++++++- sys/netipsec/key.c | 70 ++++++++++++++++++++++++++++++++++- sys/netipsec/key.h | 3 ++ sys/netipsec/keysock.c | 3 ++ sys/sys/domain.h | 4 ++ sys/sys/param.h | 2 +- sys/sys/protosw.h | 2 + sys/sys/vimage.h | 1 + 37 files changed, 529 insertions(+), 39 deletions(-) diff --git a/UPDATING b/UPDATING index 2c4313235ee3..d5e05f936cfc 100644 --- a/UPDATING +++ b/UPDATING @@ -22,6 +22,11 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.x IS SLOW: to maximize performance. (To disable malloc debugging, run ln -s aj /etc/malloc.conf.) +20090608: + The layout of structs ifnet, domain, protosw and vnet_net has + changed. Kernel modules need to be rebuilt. + Bump __FreeBSD_version to 800097. + 20090602: window(1) has been removed from the base system. It can now be installed from ports. The port is called misc/window. diff --git a/sys/kern/kern_vimage.c b/sys/kern/kern_vimage.c index c78217add5a2..bb2c509576bf 100644 --- a/sys/kern/kern_vimage.c +++ b/sys/kern/kern_vimage.c @@ -65,8 +65,8 @@ static int vnet_mod_constructor(struct vnet_modlink *); static int vnet_mod_destructor(struct vnet_modlink *); #ifdef VIMAGE -static struct vimage *vimage_by_name(struct vimage *, char *); static struct vimage *vi_alloc(struct vimage *, char *); +static int vi_destroy(struct vimage *); static struct vimage *vimage_get_next(struct vimage *, struct vimage *, int); static void vimage_relative_name(struct vimage *, struct vimage *, char *, int); @@ -122,7 +122,7 @@ vi_if_move(struct vi_req *vi_req, struct ifnet *ifp, struct vimage *vip) struct vnet *new_vnet = NULL; /* Check for API / ABI version mismatch. */ - if (vi_req->vi_api_cookie != VI_API_COOKIE) + if (vi_req != NULL && vi_req->vi_api_cookie != VI_API_COOKIE) return (EDOOFUS); /* Find the target vnet. */ @@ -216,11 +216,7 @@ vi_td_ioctl(u_long cmd, struct vi_req *vi_req, struct thread *td) case SIOCSPVIMAGE: if (vi_req->vi_req_action == VI_DESTROY) { -#ifdef NOTYET error = vi_destroy(vip_r); -#else - error = EOPNOTSUPP; -#endif break; } @@ -283,7 +279,7 @@ vi_child_of(struct vimage *parent, struct vimage *child) return (0); } -static struct vimage * +struct vimage * vimage_by_name(struct vimage *top, char *name) { struct vimage *vip; @@ -541,7 +537,6 @@ vnet_mod_constructor(struct vnet_modlink *vml) return (0); } - static int vnet_mod_destructor(struct vnet_modlink *vml) { @@ -663,6 +658,68 @@ vi_alloc(struct vimage *parent, char *name) return (vip); } + +/* + * Destroy a vnet - unlink all linked lists, hashtables etc., free all + * the memory, stop all the timers... + */ +static int +vi_destroy(struct vimage *vip) +{ + struct vnet *vnet = vip->v_net; + struct vprocg *vprocg = vip->v_procg; + struct ifnet *ifp, *nifp; + struct vnet_modlink *vml; + + /* XXX Beware of races -> more locking to be done... */ + if (!LIST_EMPTY(&vip->vi_child_head)) + return (EBUSY); + + if (vprocg->nprocs != 0) + return (EBUSY); + + if (vnet->sockcnt != 0) + return (EBUSY); + +#ifdef INVARIANTS + if (vip->vi_ucredrefc != 0) + printf("vi_destroy: %s ucredrefc %d\n", + vip->vi_name, vip->vi_ucredrefc); +#endif + + /* Point with no return - cleanup MUST succeed! */ + LIST_REMOVE(vip, vi_le); + LIST_REMOVE(vip, vi_sibling); + LIST_REMOVE(vprocg, vprocg_le); + + VNET_LIST_WLOCK(); + LIST_REMOVE(vnet, vnet_le); + VNET_LIST_WUNLOCK(); + + CURVNET_SET_QUIET(vnet); + INIT_VNET_NET(vnet); + + /* Return all inherited interfaces to their parent vnets. */ + TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { + if (ifp->if_home_vnet != ifp->if_vnet) + vi_if_move(NULL, ifp, vip); + } + + /* Detach / free per-module state instances. */ + TAILQ_FOREACH_REVERSE(vml, &vnet_modlink_head, + vnet_modlink_head, vml_mod_le) + vnet_mod_destructor(vml); + + CURVNET_RESTORE(); + + /* Hopefully, we are OK to free the vnet container itself. */ + vnet->vnet_magic_n = 0xdeadbeef; + free(vnet, M_VNET); + free(vprocg, M_VPROCG); + free(vip, M_VIMAGE); + + return (0); +} #endif /* VIMAGE */ static void diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c index df35bcbce5c7..9ee6047dd2ea 100644 --- a/sys/kern/uipc_domain.c +++ b/sys/kern/uipc_domain.c @@ -66,6 +66,9 @@ SYSINIT(domainfin, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, domainfinalize, NULL); static vnet_attach_fn net_init_domain; +#ifdef VIMAGE +static vnet_detach_fn net_detach_domain; +#endif static struct callout pffast_callout; static struct callout pfslow_callout; @@ -107,7 +110,10 @@ struct pr_usrreqs nousrreqs = { vnet_modinfo_t vnet_domain_modinfo = { .vmi_id = VNET_MOD_DOMAIN, .vmi_name = "domain", - .vmi_iattach = net_init_domain + .vmi_iattach = net_init_domain, +#ifdef VIMAGE + .vmi_idetach = net_detach_domain, +#endif }; #endif @@ -190,6 +196,26 @@ net_init_domain(const void *arg) return (0); } +#ifdef VIMAGE +/* + * Detach / free a domain instance. + */ +static int +net_detach_domain(const void *arg) +{ + const struct domain *dp = arg; + struct protosw *pr; + + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_destroy) + (*pr->pr_destroy)(); + if (dp->dom_destroy) + (*dp->dom_destroy)(); + + return (0); +} +#endif + /* * Add a new protocol domain to the list of supported domains * Note: you cant unload it again because a socket may be using it. diff --git a/sys/net/if.c b/sys/net/if.c index 1a5dcd29e651..35a5e0b5df23 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -153,6 +153,9 @@ extern void nd6_setmtu(struct ifnet *); #endif static int vnet_net_iattach(const void *); +#ifdef VIMAGE +static int vnet_net_idetach(const void *); +#endif #ifdef VIMAGE_GLOBALS struct ifnethead ifnet; /* depend on static init XXX */ @@ -189,7 +192,10 @@ static const vnet_modinfo_t vnet_net_modinfo = { .vmi_name = "net", .vmi_size = sizeof(struct vnet_net), .vmi_symmap = vnet_net_symmap, - .vmi_iattach = vnet_net_iattach + .vmi_iattach = vnet_net_iattach, +#ifdef VIMAGE + .vmi_idetach = vnet_net_idetach +#endif }; #endif /* !VIMAGE_GLOBALS */ @@ -446,6 +452,22 @@ vnet_net_iattach(const void *unused __unused) return (0); } +#ifdef VIMAGE +static int +vnet_net_idetach(const void *unused __unused) +{ + INIT_VNET_NET(curvnet); + + VNET_ASSERT(TAILQ_EMPTY(&V_ifnet)); + VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head)); + VNET_ASSERT(SLIST_EMPTY(&V_ifklist.kl_list)); + + free((caddr_t)V_ifindex_table, M_IFNET); + + return (0); +} +#endif + void if_grow(void) { @@ -688,6 +710,8 @@ if_attach_internal(struct ifnet *ifp, int vmove) #ifdef VIMAGE ifp->if_vnet = curvnet; + if (ifp->if_home_vnet == NULL) + ifp->if_home_vnet = curvnet; #endif if_addgroup(ifp, IFG_ALL); diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c index 69e14e9de495..c6f7285ff7fd 100644 --- a/sys/net/if_gif.c +++ b/sys/net/if_gif.c @@ -303,12 +303,10 @@ gifmodevent(mod, type, data) break; case MOD_UNLOAD: if_clone_detach(&gif_cloner); +#ifdef VIMAGE + vnet_mod_deregister(&vnet_gif_modinfo); +#endif mtx_destroy(&gif_mtx); -#ifdef INET6 -#ifndef VIMAGE - V_ip6_gif_hlim = 0; /* XXX -> vnet_gif_idetach() */ -#endif -#endif break; default: return EOPNOTSUPP; diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c index 2bbccac2c379..b0f22c89758d 100644 --- a/sys/net/if_loop.c +++ b/sys/net/if_loop.c @@ -105,6 +105,9 @@ int looutput(struct ifnet *ifp, struct mbuf *m, static int lo_clone_create(struct if_clone *, int, caddr_t); static void lo_clone_destroy(struct ifnet *); static int vnet_loif_iattach(const void *); +#ifdef VIMAGE +static int vnet_loif_idetach(const void *); +#endif #ifdef VIMAGE_GLOBALS struct ifnet *loif; /* Used externally */ @@ -119,7 +122,10 @@ static const vnet_modinfo_t vnet_loif_modinfo = { .vmi_id = VNET_MOD_LOIF, .vmi_dependson = VNET_MOD_IF_CLONE, .vmi_name = "loif", - .vmi_iattach = vnet_loif_iattach + .vmi_iattach = vnet_loif_iattach, +#ifdef VIMAGE + .vmi_idetach = vnet_loif_idetach +#endif }; #endif /* !VIMAGE_GLOBALS */ @@ -128,12 +134,11 @@ IFC_SIMPLE_DECLARE(lo, 1); static void lo_clone_destroy(struct ifnet *ifp) { -#ifdef INVARIANTS - INIT_VNET_NET(ifp->if_vnet); -#endif +#ifndef VIMAGE /* XXX: destroying lo0 will lead to panics. */ KASSERT(V_loif != ifp, ("%s: destroying lo0", __func__)); +#endif bpfdetach(ifp); if_detach(ifp); @@ -166,7 +171,8 @@ lo_clone_create(struct if_clone *ifc, int unit, caddr_t params) return (0); } -static int vnet_loif_iattach(const void *unused __unused) +static int +vnet_loif_iattach(const void *unused __unused) { INIT_VNET_NET(curvnet); @@ -175,7 +181,11 @@ static int vnet_loif_iattach(const void *unused __unused) #ifdef VIMAGE V_lo_cloner = malloc(sizeof(*V_lo_cloner), M_LO_CLONER, M_WAITOK | M_ZERO); + V_lo_cloner_data = malloc(sizeof(*V_lo_cloner_data), M_LO_CLONER, + M_WAITOK | M_ZERO); bcopy(&lo_cloner, V_lo_cloner, sizeof(*V_lo_cloner)); + bcopy(lo_cloner.ifc_data, V_lo_cloner_data, sizeof(*V_lo_cloner_data)); + V_lo_cloner->ifc_data = V_lo_cloner_data; if_clone_attach(V_lo_cloner); #else if_clone_attach(&lo_cloner); @@ -183,6 +193,21 @@ static int vnet_loif_iattach(const void *unused __unused) return (0); } +#ifdef VIMAGE +static int +vnet_loif_idetach(const void *unused __unused) +{ + INIT_VNET_NET(curvnet); + + if_clone_detach(V_lo_cloner); + free(V_lo_cloner, M_LO_CLONER); + free(V_lo_cloner_data, M_LO_CLONER); + V_loif = NULL; + + return (0); +} +#endif + static int loop_modevent(module_t mod, int type, void *data) { diff --git a/sys/net/if_var.h b/sys/net/if_var.h index d811f227acad..2d0a0abade46 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -71,6 +71,7 @@ struct ether_header; struct carp_if; struct ifvlantrunk; struct route; +struct vnet; #endif #include /* get TAILQ macros */ @@ -169,6 +170,9 @@ struct ifnet { (struct ifnet *); int (*if_transmit) /* initiate output routine */ (struct ifnet *, struct mbuf *); + void (*if_reassign) /* reassign to vnet routine */ + (struct ifnet *, struct vnet *, char *); + struct vnet *if_home_vnet; /* where this ifnet originates from */ struct ifaddr *if_addr; /* pointer to link-level address */ void *if_llsoftc; /* link layer softc */ int if_drv_flags; /* driver-managed status flags */ diff --git a/sys/net/route.c b/sys/net/route.c index 8705a541f3e4..aaa38d76c6b0 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -99,12 +99,18 @@ static int rttrash; /* routes not in table but not freed */ static void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *); static int vnet_route_iattach(const void *); +#ifdef VIMAGE +static int vnet_route_idetach(const void *); +#endif #ifndef VIMAGE_GLOBALS static const vnet_modinfo_t vnet_rtable_modinfo = { .vmi_id = VNET_MOD_RTABLE, .vmi_name = "rtable", - .vmi_iattach = vnet_route_iattach + .vmi_iattach = vnet_route_iattach, +#ifdef VIMAGE + .vmi_idetach = vnet_route_idetach +#endif }; #endif /* !VIMAGE_GLOBALS */ @@ -194,7 +200,8 @@ route_init(void) #endif } -static int vnet_route_iattach(const void *unused __unused) +static int +vnet_route_iattach(const void *unused __unused) { INIT_VNET_NET(curvnet); struct domain *dom; @@ -235,6 +242,36 @@ static int vnet_route_iattach(const void *unused __unused) return (0); } +#ifdef VIMAGE +static int +vnet_route_idetach(const void *unused __unused) +{ + int table; + int fam; + struct domain *dom; + struct radix_node_head **rnh; + + for (dom = domains; dom; dom = dom->dom_next) { + if (dom->dom_rtdetach) { + for (table = 0; table < rt_numfibs; table++) { + if ( (fam = dom->dom_family) == AF_INET || + table == 0) { + /* For now only AF_INET has > 1 tbl. */ + rnh = rt_tables_get_rnh_ptr(table, fam); + if (rnh == NULL) + panic("%s: rnh NULL", __func__); + dom->dom_rtdetach((void **)rnh, + dom->dom_rtoffset); + } else { + break; + } + } + } + } + return (0); +} +#endif + #ifndef _SYS_SYSPROTO_H_ struct setfib_args { int fibnum; diff --git a/sys/net/vnet.h b/sys/net/vnet.h index d36c30373b29..696e47203a86 100644 --- a/sys/net/vnet.h +++ b/sys/net/vnet.h @@ -51,6 +51,7 @@ struct vnet_net { struct ifnet * _loif; struct if_clone * _lo_cloner; + struct ifc_simple_data *_lo_cloner_data; LIST_HEAD(, rawcb) _rawcb_list; @@ -87,6 +88,7 @@ extern struct vnet_net vnet_net_0; #define V_ifklist VNET_NET(ifklist) #define V_ifnet VNET_NET(ifnet) #define V_lo_cloner VNET_NET(lo_cloner) +#define V_lo_cloner_data VNET_NET(lo_cloner_data) #define V_loif VNET_NET(loif) #define V_rawcb_list VNET_NET(rawcb_list) #define V_rt_tables VNET_NET(rt_tables) diff --git a/sys/netgraph/ng_base.c b/sys/netgraph/ng_base.c index 2daf9611b460..39084b57d596 100644 --- a/sys/netgraph/ng_base.c +++ b/sys/netgraph/ng_base.c @@ -85,6 +85,9 @@ struct vnet_netgraph vnet_netgraph_0; static struct mtx ng_topo_mtx; static vnet_attach_fn vnet_netgraph_iattach; +#ifdef VIMAGE +static vnet_detach_fn vnet_netgraph_idetach; +#endif #ifdef NETGRAPH_DEBUG static struct mtx ng_nodelist_mtx; /* protects global node/hook lists */ @@ -647,6 +650,9 @@ ng_make_node_common(struct ng_type *type, node_p *nodepp) return (ENOMEM); } node->nd_type = type; +#ifdef VIMAGE + node->nd_vnet = curvnet; +#endif NG_NODE_REF(node); /* note reference */ type->refs++; @@ -3074,15 +3080,17 @@ ng_mod_event(module_t mod, int event, void *data) static const vnet_modinfo_t vnet_netgraph_modinfo = { .vmi_id = VNET_MOD_NETGRAPH, .vmi_name = "netgraph", -#ifdef VIMAGE .vmi_size = sizeof(struct vnet_netgraph), + .vmi_dependson = VNET_MOD_LOIF, + .vmi_iattach = vnet_netgraph_iattach, +#ifdef VIMAGE + .vmi_idetach = vnet_netgraph_idetach #endif - .vmi_iattach = vnet_netgraph_iattach }; #endif static int -vnet_netgraph_iattach(const void *arg __unused) +vnet_netgraph_iattach(const void *unused __unused) { INIT_VNET_NETGRAPH(curvnet); @@ -3091,6 +3099,33 @@ vnet_netgraph_iattach(const void *arg __unused) return (0); } +#ifdef VIMAGE +static int +vnet_netgraph_idetach(const void *unused __unused) +{ + INIT_VNET_NETGRAPH(curvnet); + node_p node, last_killed = NULL; + + while ((node = LIST_FIRST(&V_ng_nodelist)) != NULL) { + if (node == last_killed) { + /* This should never happen */ + node->nd_flags |= NGF_REALLY_DIE; + printf("netgraph node %s needs NGF_REALLY_DIE\n", + node->nd_name); + ng_rmnode(node, NULL, NULL, 0); + /* This must never happen */ + if (node == LIST_FIRST(&V_ng_nodelist)) + panic("netgraph node %s won't die", + node->nd_name); + } + ng_rmnode(node, NULL, NULL, 0); + last_killed = node; + } + + return (0); +} +#endif /* VIMAGE */ + /* * Handle loading and unloading for this code. * The only thing we need to link into is the NETISR strucure. @@ -3313,6 +3348,7 @@ ngthread(void *arg) NG_WORKLIST_SLEEP(); STAILQ_REMOVE_HEAD(&ng_worklist, nd_input_queue.q_work); NG_WORKLIST_UNLOCK(); + CURVNET_SET(node->nd_vnet); CTR3(KTR_NET, "%20s: node [%x] (%p) taken off worklist", __func__, node->nd_ID, node); /* @@ -3342,6 +3378,7 @@ ngthread(void *arg) } } NG_NODE_UNREF(node); + CURVNET_RESTORE(); } } @@ -3675,7 +3712,9 @@ ng_callout_trampoline(void *arg) { item_p item = arg; + CURVNET_SET(NGI_NODE(item)->nd_vnet); ng_snd_item(item, 0); + CURVNET_RESTORE(); } diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index c25f6eba857f..7d811f7a5814 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -127,6 +127,9 @@ struct protosw inetsw[] = { .pr_ctlinput = udp_ctlinput, .pr_ctloutput = ip_ctloutput, .pr_init = udp_init, +#ifdef VIMAGE + .pr_destroy = udp_destroy, +#endif .pr_usrreqs = &udp_usrreqs }, { @@ -138,6 +141,9 @@ struct protosw inetsw[] = { .pr_ctlinput = tcp_ctlinput, .pr_ctloutput = tcp_ctloutput, .pr_init = tcp_init, +#ifdef VIMAGE + .pr_destroy = tcp_destroy, +#endif .pr_slowtimo = tcp_slowtimo, .pr_drain = tcp_drain, .pr_usrreqs = &tcp_usrreqs @@ -348,11 +354,15 @@ IPPROTOSPACER, .pr_input = rip_input, .pr_ctloutput = rip_ctloutput, .pr_init = rip_init, +#ifdef VIMAGE + .pr_destroy = rip_destroy, +#endif .pr_usrreqs = &rip_usrreqs }, }; extern int in_inithead(void **, int); +extern int in_detachhead(void **, int); struct domain inetdomain = { .dom_family = AF_INET, @@ -363,6 +373,9 @@ struct domain inetdomain = { .dom_rtattach = rn4_mpath_inithead, #else .dom_rtattach = in_inithead, +#endif +#ifdef VIMAGE + .dom_rtdetach = in_detachhead, #endif .dom_rtoffset = 32, .dom_maxrtkey = sizeof(struct sockaddr_in), diff --git a/sys/netinet/in_rmx.c b/sys/netinet/in_rmx.c index 8fc60ae886a4..f798f9c6866e 100644 --- a/sys/netinet/in_rmx.c +++ b/sys/netinet/in_rmx.c @@ -65,6 +65,9 @@ __FBSDID("$FreeBSD$"); #include extern int in_inithead(void **head, int off); +#ifdef VIMAGE +extern int in_detachhead(void **head, int off); +#endif #define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ @@ -382,6 +385,17 @@ in_inithead(void **head, int off) return 1; } +#ifdef VIMAGE +int +in_detachhead(void **head, int off) +{ + INIT_VNET_INET(curvnet); + + callout_drain(&V_rtq_timer); + return (1); +} +#endif + /* * This zaps old routes when the interface goes down or interface * address is deleted. In the latter case, it deletes static routes diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index aa9beb148b8f..1e5e693012c2 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -210,6 +210,9 @@ u_int16_t ip_randomid(void); int rip_ctloutput(struct socket *, struct sockopt *); void rip_ctlinput(int, struct sockaddr *, void *); void rip_init(void); +#ifdef VIMAGE +void rip_destroy(void); +#endif void rip_input(struct mbuf *, int); int rip_output(struct mbuf *, struct socket *, u_long); void ipip_input(struct mbuf *, int); diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 806fc1d01357..8063da134c30 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -205,6 +205,19 @@ rip_init(void) EVENTHANDLER_PRI_ANY); } +#ifdef VIMAGE +void +rip_destroy(void) +{ + INIT_VNET_INET(curvnet); + + hashdestroy(V_ripcbinfo.ipi_hashbase, M_PCB, + V_ripcbinfo.ipi_hashmask); + hashdestroy(V_ripcbinfo.ipi_porthashbase, M_PCB, + V_ripcbinfo.ipi_porthashmask); +} +#endif + static int rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n, struct sockaddr_in *ripsrc) diff --git a/sys/netinet/tcp_hostcache.c b/sys/netinet/tcp_hostcache.c index 93367c5e16a1..e04499d7224f 100644 --- a/sys/netinet/tcp_hostcache.c +++ b/sys/netinet/tcp_hostcache.c @@ -230,6 +230,18 @@ tcp_hc_init(void) tcp_hc_purge, curvnet); } +#ifdef VIMAGE +void +tcp_hc_destroy(void) +{ + INIT_VNET_INET(curvnet); + + /* XXX TODO walk the hashtable and free all entries */ + + callout_drain(&V_tcp_hc_callout); +} +#endif + /* * Internal function: look up an entry in the hostcache or return NULL. * diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 489617b5228c..fc49344e0bb5 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -426,6 +426,25 @@ tcp_init(void) EVENTHANDLER_PRI_ANY); } +#ifdef VIMAGE +void +tcp_destroy(void) +{ + INIT_VNET_INET(curvnet); + + tcp_tw_destroy(); + tcp_hc_destroy(); + syncache_destroy(); + + /* XXX check that hashes are empty! */ + hashdestroy(V_tcbinfo.ipi_hashbase, M_PCB, + V_tcbinfo.ipi_hashmask); + hashdestroy(V_tcbinfo.ipi_porthashbase, M_PCB, + V_tcbinfo.ipi_porthashmask); + INP_INFO_LOCK_DESTROY(&V_tcbinfo); +} +#endif + void tcp_fini(void *xtp) { diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 033e506c0c8b..dc05f46c3686 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -275,6 +275,19 @@ syncache_init(void) uma_zone_set_max(V_tcp_syncache.zone, V_tcp_syncache.cache_limit); } +#ifdef VIMAGE +void +syncache_destroy(void) +{ + INIT_VNET_INET(curvnet); + + /* XXX walk the cache, free remaining objects, stop timers */ + + uma_zdestroy(V_tcp_syncache.zone); + FREE(V_tcp_syncache.hashbase, M_SYNCACHE); +} +#endif + /* * Inserts a syncache entry into the specified bucket row. * Locks and unlocks the syncache_head autonomously. diff --git a/sys/netinet/tcp_syncache.h b/sys/netinet/tcp_syncache.h index e488039dcd03..5ed0c9ef8a75 100644 --- a/sys/netinet/tcp_syncache.h +++ b/sys/netinet/tcp_syncache.h @@ -35,6 +35,9 @@ #ifdef _KERNEL void syncache_init(void); +#ifdef VIMAGE +void syncache_destroy(void); +#endif void syncache_unreach(struct in_conninfo *, struct tcphdr *); int syncache_expand(struct in_conninfo *, struct tcpopt *, struct tcphdr *, struct socket **, struct mbuf *); diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 4c0fe947aa67..32108c74af3e 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -179,6 +179,20 @@ tcp_tw_init(void) TAILQ_INIT(&V_twq_2msl); } +#ifdef VIMAGE +void +tcp_tw_destroy(void) +{ + INIT_VNET_INET(curvnet); + struct tcptw *tw; + + INP_INFO_WLOCK(&V_tcbinfo); + while((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL) + tcp_twclose(tw, 0); + INP_INFO_WUNLOCK(&V_tcbinfo); +} +#endif + /* * Move a TCP connection into TIME_WAIT state. * tcbinfo is locked. diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 5dc840eee5cd..5568d4a893b4 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -584,6 +584,9 @@ struct tcpcb * void tcp_drain(void); void tcp_fasttimo(void); void tcp_init(void); +#ifdef VIMAGE +void tcp_destroy(void); +#endif void tcp_fini(void *); char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *, const void *); @@ -605,6 +608,9 @@ int tcp_output(struct tcpcb *); void tcp_respond(struct tcpcb *, void *, struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int); void tcp_tw_init(void); +#ifdef VIMAGE +void tcp_tw_destroy(void); +#endif void tcp_tw_zone_change(void); int tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *, struct mbuf *, int); @@ -625,6 +631,9 @@ void tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq); * All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo) */ void tcp_hc_init(void); +#ifdef VIMAGE +void tcp_hc_destroy(void); +#endif void tcp_hc_get(struct in_conninfo *, struct hc_metrics_lite *); u_long tcp_hc_getmtu(struct in_conninfo *); void tcp_hc_updatemtu(struct in_conninfo *, u_long); diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 33be911b0781..fdc96f4501ec 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -221,6 +221,20 @@ udp_discardcb(struct udpcb *up) uma_zfree(V_udpcb_zone, up); } +#ifdef VIMAGE +void +udp_destroy(void) +{ + INIT_VNET_INET(curvnet); + + hashdestroy(V_udbinfo.ipi_hashbase, M_PCB, + V_udbinfo.ipi_hashmask); + hashdestroy(V_udbinfo.ipi_porthashbase, M_PCB, + V_udbinfo.ipi_porthashmask); + INP_INFO_LOCK_DESTROY(&V_udbinfo); +} +#endif + /* * Subroutine of udp_input(), which appends the provided mbuf chain to the * passed pcb/socket. The caller must provide a sockaddr_in via udp_in that diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h index e4298191e090..8b045ad1f568 100644 --- a/sys/netinet/udp_var.h +++ b/sys/netinet/udp_var.h @@ -128,6 +128,9 @@ void udp_discardcb(struct udpcb *); void udp_ctlinput(int, struct sockaddr *, void *); void udp_init(void); +#ifdef VIMAGE +void udp_destroy(void); +#endif void udp_input(struct mbuf *, int); struct inpcb *udp_notify(struct inpcb *inp, int errno); int udp_shutdown(struct socket *so); diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c index c0b0b25da46f..e4d280dff241 100644 --- a/sys/netinet6/in6_proto.c +++ b/sys/netinet6/in6_proto.c @@ -148,6 +148,9 @@ struct ip6protosw inet6sw[] = { .pr_domain = &inet6domain, .pr_protocol = IPPROTO_IPV6, .pr_init = ip6_init, +#ifdef VIMAGE + .pr_destroy = ip6_destroy, +#endif .pr_slowtimo = frag6_slowtimo, .pr_drain = frag6_drain, .pr_usrreqs = &nousrreqs, @@ -349,6 +352,9 @@ struct ip6protosw inet6sw[] = { }; extern int in6_inithead(void **, int); +#ifdef VIMAGE +extern int in6_detachhead(void **, int); +#endif struct domain inet6domain = { .dom_family = AF_INET6, @@ -360,6 +366,9 @@ struct domain inet6domain = { .dom_rtattach = rn6_mpath_inithead, #else .dom_rtattach = in6_inithead, +#endif +#ifdef VIMAGE + .dom_rtdetach = in6_detachhead, #endif .dom_rtoffset = offsetof(struct sockaddr_in6, sin6_addr) << 3, .dom_maxrtkey = sizeof(struct sockaddr_in6), diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c index 3a423ed0ff2d..4dbdad6e4b3e 100644 --- a/sys/netinet6/in6_rmx.c +++ b/sys/netinet6/in6_rmx.c @@ -112,6 +112,9 @@ __FBSDID("$FreeBSD$"); #include extern int in6_inithead(void **head, int off); +#ifdef VIMAGE +extern int in6_detachhead(void **head, int off); +#endif #define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ @@ -464,3 +467,15 @@ in6_inithead(void **head, int off) in6_mtutimo(curvnet); /* kick off timeout first time */ return 1; } + +#ifdef VIMAGE +int +in6_detachhead(void **head, int off) +{ + INIT_VNET_INET6(curvnet); + + callout_drain(&V_rtq_timer6); + callout_drain(&V_rtq_mtutimer); + return (1); +} +#endif diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index b9617bb835d6..5166eb4cf3e9 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -303,6 +303,17 @@ ip6_init(void) netisr_register(&ip6_nh); } +#ifdef VIMAGE +void +ip6_destroy() +{ + INIT_VNET_INET6(curvnet); + + nd6_destroy(); + callout_drain(&V_in6_tmpaddrtimer_ch); +} +#endif + static int ip6_init2_vnet(const void *unused __unused) { diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index 313b6caf372e..74749f65838f 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -339,6 +339,9 @@ int icmp6_ctloutput __P((struct socket *, struct sockopt *sopt)); struct in6_ifaddr; void ip6_init __P((void)); +#ifdef VIMAGE +void ip6_destroy __P((void)); +#endif void ip6_input __P((struct mbuf *)); struct in6_ifaddr *ip6_getdstifaddr __P((struct mbuf *)); void ip6_freepcbopts __P((struct ip6_pktopts *)); diff --git a/sys/netinet6/ip6protosw.h b/sys/netinet6/ip6protosw.h index 2a7cea404add..1fae44c8602a 100644 --- a/sys/netinet6/ip6protosw.h +++ b/sys/netinet6/ip6protosw.h @@ -129,6 +129,8 @@ struct ip6protosw { /* utility hooks */ void (*pr_init) /* initialization hook */ __P((void)); + void (*pr_destroy) /* cleanup hook */ + __P((void)); void (*pr_fasttimo) /* fast timeout (200ms) */ __P((void)); diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index ccfdb8fc0791..96c6d018d39f 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -134,14 +134,8 @@ void nd6_init(void) { INIT_VNET_INET6(curvnet); - static int nd6_init_done = 0; int i; - if (nd6_init_done) { - log(LOG_NOTICE, "nd6_init called more than once(ignored)\n"); - return; - } - V_nd6_prune = 1; /* walk list every 1 seconds */ V_nd6_delay = 5; /* delay first probe time 5 second */ V_nd6_umaxtries = 3; /* maximum unicast query */ @@ -180,6 +174,8 @@ nd6_init(void) V_ip6_temp_valid_lifetime = DEF_TEMP_VALID_LIFETIME; V_ip6_temp_regen_advance = TEMPADDR_REGEN_ADVANCE; + V_ip6_desync_factor = 0; + all1_sa.sin6_family = AF_INET6; all1_sa.sin6_len = sizeof(struct sockaddr_in6); for (i = 0; i < sizeof(all1_sa.sin6_addr); i++) @@ -191,11 +187,20 @@ nd6_init(void) callout_init(&V_nd6_slowtimo_ch, 0); callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, curvnet); - - nd6_init_done = 1; - } + +#ifdef VIMAGE +void +nd6_destroy() +{ + INIT_VNET_INET6(curvnet); + + callout_drain(&V_nd6_slowtimo_ch); + callout_drain(&V_nd6_timer_ch); +} +#endif + struct nd_ifinfo * nd6_ifattach(struct ifnet *ifp) { diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h index d4d2fd577782..0730d8406d4f 100644 --- a/sys/netinet6/nd6.h +++ b/sys/netinet6/nd6.h @@ -371,6 +371,9 @@ union nd_opts { /* XXX: need nd6_var.h?? */ /* nd6.c */ void nd6_init __P((void)); +#ifdef VIMAGE +void nd6_destroy __P((void)); +#endif struct nd_ifinfo *nd6_ifattach __P((struct ifnet *)); void nd6_ifdetach __P((struct nd_ifinfo *)); int nd6_is_addr_neighbor __P((struct sockaddr_in6 *, struct ifnet *)); diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c index 6c42e3227654..4eef0647e01a 100644 --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -104,6 +104,9 @@ struct vnet_ipsec vnet_ipsec_0; #endif static int ipsec_iattach(const void *); +#ifdef VIMAGE +static int ipsec_idetach(const void *); +#endif #ifdef VIMAGE_GLOBALS /* NB: name changed so netstat doesn't use it. */ @@ -256,7 +259,10 @@ static const vnet_modinfo_t vnet_ipsec_modinfo = { .vmi_name = "ipsec", .vmi_size = sizeof(struct vnet_ipsec), .vmi_dependson = VNET_MOD_INET, /* XXX revisit - INET6 ? */ - .vmi_iattach = ipsec_iattach + .vmi_iattach = ipsec_iattach, +#ifdef VIMAGE + .vmi_idetach = ipsec_idetach +#endif }; #endif /* !VIMAGE_GLOBALS */ @@ -1791,7 +1797,6 @@ ipsec_attach(void) #else ipsec_iattach(NULL); #endif - } static int @@ -1804,6 +1809,17 @@ ipsec_iattach(const void *unused __unused) return (0); } + +#ifdef VIMAGE +static int +ipsec_idetach(const void *unused __unused) +{ + + /* XXX revisit this! */ + + return (0); +} +#endif SYSINIT(ipsec, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, ipsec_attach, NULL); diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c index a38bb8db3ecf..e79179f8cb26 100644 --- a/sys/netipsec/key.c +++ b/sys/netipsec/key.c @@ -7223,10 +7223,76 @@ key_init(void) keystat.getspi_count = 1; printf("IPsec: Initialized Security Association Processing.\n"); - - return; } +#ifdef VIMAGE +void +key_destroy(void) +{ + INIT_VNET_IPSEC(curvnet); + struct secpolicy *sp, *nextsp; + struct secspacq *acq, *nextacq; + struct secashead *sah, *nextsah; + struct secreg *reg; + int i; + + SPTREE_LOCK(); + for (i = 0; i < IPSEC_DIR_MAX; i++) { + for (sp = LIST_FIRST(&V_sptree[i]); + sp != NULL; sp = nextsp) { + nextsp = LIST_NEXT(sp, chain); + if (__LIST_CHAINED(sp)) { + LIST_REMOVE(sp, chain); + free(sp, M_IPSEC_SP); + } + } + } + SPTREE_UNLOCK(); + + SAHTREE_LOCK(); + for (sah = LIST_FIRST(&V_sahtree); sah != NULL; sah = nextsah) { + nextsah = LIST_NEXT(sah, chain); + if (__LIST_CHAINED(sah)) { + LIST_REMOVE(sah, chain); + free(sah, M_IPSEC_SAH); + } + } + SAHTREE_UNLOCK(); + + REGTREE_LOCK(); + for (i = 0; i <= SADB_SATYPE_MAX; i++) { + LIST_FOREACH(reg, &V_regtree[i], chain) { + if (__LIST_CHAINED(reg)) { + LIST_REMOVE(reg, chain); + free(reg, M_IPSEC_SAR); + break; + } + } + } + REGTREE_UNLOCK(); + + ACQ_LOCK(); + for (acq = LIST_FIRST(&V_spacqtree); acq != NULL; acq = nextacq) { + nextacq = LIST_NEXT(acq, chain); + if (__LIST_CHAINED(acq)) { + LIST_REMOVE(acq, chain); + free(acq, M_IPSEC_SAQ); + } + } + ACQ_UNLOCK(); + + SPACQ_LOCK(); + for (acq = LIST_FIRST(&V_spacqtree); acq != NULL; acq = nextacq) { + nextacq = LIST_NEXT(acq, chain); + if (__LIST_CHAINED(acq)) { + LIST_REMOVE(acq, chain); + free(acq, M_IPSEC_SAQ); + } + } + SPACQ_UNLOCK(); +} +#endif + /* * XXX: maybe This function is called after INBOUND IPsec processing. * diff --git a/sys/netipsec/key.h b/sys/netipsec/key.h index 012b35ca1068..fc38279247bf 100644 --- a/sys/netipsec/key.h +++ b/sys/netipsec/key.h @@ -100,6 +100,9 @@ extern void key_randomfill __P((void *, size_t)); extern void key_freereg __P((struct socket *)); extern int key_parse __P((struct mbuf *, struct socket *)); extern void key_init __P((void)); +#ifdef VIMAGE +extern void key_destroy(void); +#endif extern void key_sa_recordxfer __P((struct secasvar *, struct mbuf *)); extern void key_sa_routechange __P((struct sockaddr *)); extern void key_sa_stir_iv __P((struct secasvar *)); diff --git a/sys/netipsec/keysock.c b/sys/netipsec/keysock.c index 6117f65c17c4..ec0fcf96456a 100644 --- a/sys/netipsec/keysock.c +++ b/sys/netipsec/keysock.c @@ -579,6 +579,9 @@ struct domain keydomain = { .dom_family = PF_KEY, .dom_name = "key", .dom_init = key_init0, +#ifdef VIMAGE + .dom_destroy = key_destroy, +#endif .dom_protosw = keysw, .dom_protoswNPROTOSW = &keysw[sizeof(keysw)/sizeof(keysw[0])] }; diff --git a/sys/sys/domain.h b/sys/sys/domain.h index c78e50b0a41c..431429ef1a98 100644 --- a/sys/sys/domain.h +++ b/sys/sys/domain.h @@ -48,6 +48,8 @@ struct domain { char *dom_name; void (*dom_init) /* initialize domain data structures */ (void); + void (*dom_destroy) /* cleanup structures / state */ + (void); int (*dom_externalize) /* externalize access rights */ (struct mbuf *, struct mbuf **); void (*dom_dispose) /* dispose of internalized rights */ @@ -56,6 +58,8 @@ struct domain { struct domain *dom_next; int (*dom_rtattach) /* initialize routing table */ (void **, int); + int (*dom_rtdetach) /* clean up routing table */ + (void **, int); int dom_rtoffset; /* an arg to rtattach, in bits */ /* XXX MRT. * rtoffset May be 0 if the domain supplies its own rtattach(), diff --git a/sys/sys/param.h b/sys/sys/param.h index 764a5a06eff2..cf29f4a52667 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -57,7 +57,7 @@ * is created, otherwise 1. */ #undef __FreeBSD_version -#define __FreeBSD_version 800096 /* Master, propagated to newvers */ +#define __FreeBSD_version 800097 /* Master, propagated to newvers */ #ifndef LOCORE #include diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h index 9f064c5a8e8f..b55af4b7dcdb 100644 --- a/sys/sys/protosw.h +++ b/sys/sys/protosw.h @@ -70,6 +70,7 @@ typedef int pr_output_t (struct mbuf *, struct socket *); typedef void pr_ctlinput_t (int, struct sockaddr *, void *); typedef int pr_ctloutput_t (struct socket *, struct sockopt *); typedef void pr_init_t (void); +typedef void pr_destroy_t (void); typedef void pr_fasttimo_t (void); typedef void pr_slowtimo_t (void); typedef void pr_drain_t (void); @@ -86,6 +87,7 @@ struct protosw { pr_ctloutput_t *pr_ctloutput; /* control output (from above) */ /* utility hooks */ pr_init_t *pr_init; + pr_destroy_t *pr_destroy; pr_fasttimo_t *pr_fasttimo; /* fast timeout (200ms) */ pr_slowtimo_t *pr_slowtimo; /* slow timeout (500ms) */ pr_drain_t *pr_drain; /* flush any excess space possible */ diff --git a/sys/sys/vimage.h b/sys/sys/vimage.h index 5c8b5641e108..67a78f0b26d4 100644 --- a/sys/sys/vimage.h +++ b/sys/sys/vimage.h @@ -159,6 +159,7 @@ int vi_symlookup(struct kld_sym_lookup *, char *); int vi_td_ioctl(u_long, struct vi_req *, struct thread *); int vi_if_move(struct vi_req *, struct ifnet *, struct vimage *); int vi_child_of(struct vimage *, struct vimage *); +struct vimage *vimage_by_name(struct vimage *, char *); void vnet_mod_register(const struct vnet_modinfo *); void vnet_mod_register_multi(const struct vnet_modinfo *, void *, char *); void vnet_mod_deregister(const struct vnet_modinfo *);