2005-01-07 01:45:51 +00:00
|
|
|
/*-
|
1995-09-22 19:56:26 +00:00
|
|
|
* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
|
1994-05-24 10:09:53 +00:00
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
1995-09-22 19:56:26 +00:00
|
|
|
* @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95
|
1999-08-28 01:08:13 +00:00
|
|
|
* $FreeBSD$
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
|
2006-01-24 09:08:54 +00:00
|
|
|
#include "opt_ipfw.h"
|
1999-12-22 19:13:38 +00:00
|
|
|
#include "opt_ipsec.h"
|
1999-12-07 17:39:16 +00:00
|
|
|
#include "opt_inet6.h"
|
2002-08-01 21:37:34 +00:00
|
|
|
#include "opt_mac.h"
|
1999-12-07 17:39:16 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/param.h>
|
1994-05-25 09:21:21 +00:00
|
|
|
#include <sys/systm.h>
|
2002-04-30 01:54:54 +00:00
|
|
|
#include <sys/domain.h>
|
|
|
|
#include <sys/jail.h>
|
1997-02-24 20:31:25 +00:00
|
|
|
#include <sys/kernel.h>
|
2002-04-30 01:54:54 +00:00
|
|
|
#include <sys/lock.h>
|
2002-08-01 21:37:34 +00:00
|
|
|
#include <sys/mac.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/malloc.h>
|
|
|
|
#include <sys/mbuf.h>
|
1999-07-11 18:32:46 +00:00
|
|
|
#include <sys/proc.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/protosw.h>
|
2002-04-30 01:54:54 +00:00
|
|
|
#include <sys/signalvar.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/socketvar.h>
|
2002-04-30 01:54:54 +00:00
|
|
|
#include <sys/sx.h>
|
1995-03-16 18:17:34 +00:00
|
|
|
#include <sys/sysctl.h>
|
1996-04-04 10:46:44 +00:00
|
|
|
#include <sys/syslog.h>
|
1998-03-28 10:18:26 +00:00
|
|
|
|
2002-03-20 05:48:55 +00:00
|
|
|
#include <vm/uma.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
#include <net/if.h>
|
|
|
|
#include <net/route.h>
|
|
|
|
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/in_systm.h>
|
2002-04-30 01:54:54 +00:00
|
|
|
#include <netinet/in_pcb.h>
|
|
|
|
#include <netinet/in_var.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <netinet/ip.h>
|
1999-12-07 17:39:16 +00:00
|
|
|
#ifdef INET6
|
|
|
|
#include <netinet/ip6.h>
|
|
|
|
#endif
|
2002-04-30 01:54:54 +00:00
|
|
|
#include <netinet/ip_icmp.h>
|
|
|
|
#include <netinet/icmp_var.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <netinet/ip_var.h>
|
2005-11-18 20:12:40 +00:00
|
|
|
#include <netinet/ip_options.h>
|
1999-12-07 17:39:16 +00:00
|
|
|
#ifdef INET6
|
|
|
|
#include <netinet6/ip6_var.h>
|
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <netinet/udp.h>
|
|
|
|
#include <netinet/udp_var.h>
|
|
|
|
|
2002-10-16 02:25:05 +00:00
|
|
|
#ifdef FAST_IPSEC
|
|
|
|
#include <netipsec/ipsec.h>
|
|
|
|
#endif /*FAST_IPSEC*/
|
|
|
|
|
1999-12-07 17:39:16 +00:00
|
|
|
#ifdef IPSEC
|
|
|
|
#include <netinet6/ipsec.h>
|
|
|
|
#endif /*IPSEC*/
|
|
|
|
|
2000-03-27 19:14:27 +00:00
|
|
|
#include <machine/in_cksum.h>
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* UDP protocol implementation.
|
|
|
|
* Per RFC 768, August, 1980.
|
|
|
|
*/
|
|
|
|
#ifndef COMPAT_42
|
1995-11-14 20:34:56 +00:00
|
|
|
static int udpcksum = 1;
|
1994-05-24 10:09:53 +00:00
|
|
|
#else
|
1995-11-14 20:34:56 +00:00
|
|
|
static int udpcksum = 0; /* XXX */
|
1994-05-24 10:09:53 +00:00
|
|
|
#endif
|
1995-11-14 20:34:56 +00:00
|
|
|
SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
|
2002-11-20 19:00:54 +00:00
|
|
|
&udpcksum, 0, "");
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2002-11-20 19:00:54 +00:00
|
|
|
int log_in_vain = 0;
|
2004-08-16 18:32:07 +00:00
|
|
|
SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
|
1999-05-03 23:57:32 +00:00
|
|
|
&log_in_vain, 0, "Log all incoming UDP packets");
|
1996-04-04 10:46:44 +00:00
|
|
|
|
2002-11-20 19:00:54 +00:00
|
|
|
static int blackhole = 0;
|
1999-08-17 12:17:53 +00:00
|
|
|
SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
|
2002-11-20 19:00:54 +00:00
|
|
|
&blackhole, 0, "Do not send port unreachables for refused connects");
|
1999-08-17 12:17:53 +00:00
|
|
|
|
2003-11-12 20:17:11 +00:00
|
|
|
static int strict_mcast_mship = 0;
|
|
|
|
SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW,
|
|
|
|
&strict_mcast_mship, 0, "Only send multicast to member sockets");
|
|
|
|
|
1999-11-05 14:41:39 +00:00
|
|
|
struct inpcbhead udb; /* from udp_var.h */
|
1999-12-07 17:39:16 +00:00
|
|
|
#define udb6 udb /* for KAME src sync over BSD*'s */
|
1999-11-05 14:41:39 +00:00
|
|
|
struct inpcbinfo udbinfo;
|
1995-04-09 01:29:31 +00:00
|
|
|
|
|
|
|
#ifndef UDBHASHSIZE
|
Improved connection establishment performance by doing local port lookups via
a hashed port list. In the new scheme, in_pcblookup() goes away and is
replaced by a new routine, in_pcblookup_local() for doing the local port
check. Note that this implementation is space inefficient in that the PCB
struct is now too large to fit into 128 bytes. I might deal with this in the
future by using the new zone allocator, but I wanted these changes to be
extensively tested in their current form first.
Also:
1) Fixed off-by-one errors in the port lookup loops in in_pcbbind().
2) Got rid of some unneeded rehashing. Adding a new routine, in_pcbinshash()
to do the initialial hash insertion.
3) Renamed in_pcblookuphash() to in_pcblookup_hash() for easier readability.
4) Added a new routine, in_pcbremlists() to remove the PCB from the various
hash lists.
5) Added/deleted comments where appropriate.
6) Removed unnecessary splnet() locking. In general, the PCB functions should
be called at splnet()...there are unfortunately a few exceptions, however.
7) Reorganized a few structs for better cache line behavior.
8) Killed my TCP_ACK_HACK kludge. It may come back in a different form in
the future, however.
These changes have been tested on wcarchive for more than a month. In tests
done here, connection establishment overhead is reduced by more than 50
times, thus getting rid of one of the major networking scalability problems.
Still to do: make tcp_fastimo/tcp_slowtimo scale well for systems with a
large number of connections. tcp_fastimo is easy; tcp_slowtimo is difficult.
WARNING: Anything that knows about inpcb and tcpcb structs will have to be
recompiled; at the very least, this includes netstat(1).
1998-01-27 09:15:13 +00:00
|
|
|
#define UDBHASHSIZE 16
|
1995-04-09 01:29:31 +00:00
|
|
|
#endif
|
|
|
|
|
1999-11-05 14:41:39 +00:00
|
|
|
struct udpstat udpstat; /* from udp_var.h */
|
2001-06-23 17:17:59 +00:00
|
|
|
SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW,
|
1999-05-03 23:57:32 +00:00
|
|
|
&udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
|
1995-02-16 00:27:47 +00:00
|
|
|
|
2002-03-24 10:19:10 +00:00
|
|
|
static void udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
|
Until this change, the UDP input code used global variables udp_in,
udp_in6, and udp_ip6 to pass socket address state between udp_input(),
udp_append(), and soappendaddr_locked(). While file in the default
configuration, when running with multiple netisrs or direct ithread
dispatch, this can result in races wherein user processes using
recvmsg() get back the wrong source IP/port. To correct this and
related races:
- Eliminate udp_ip6, which is believed to be generated but then never
used. Eliminate ip_2_ip6_hdr() as it is now unneeded.
- Eliminate setting, testing, and existence of 'init' status fields
for the IPv6 structures. While with multiple UDP delivery this
could lead to amortization of IPv4 -> IPv6 conversion when
delivering an IPv4 UDP packet to an IPv6 socket, it added
substantial complexity and side effects.
- Move global structures into the stack, declaring udp_in in
udp_input(), and udp_in6 in udp_append() to be used if a conversion
is required. Pass &udp_in into udp_append().
- Re-annotate comments to reflect updates.
With this change, UDP appears to operate correctly in the presence of
substantial inbound processing parallelism. This solution avoids
introducing additional synchronization, but does increase the
potential stack depth.
Discovered by: kris (Bug Magnet)
MFC after: 3 weeks
2004-11-04 01:25:23 +00:00
|
|
|
int off, struct sockaddr_in *udp_in);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
Chance protocol switch method pru_detach() so that it returns void
rather than an error. Detaches do not "fail", they other occur or
the protocol flags SS_PROTOREF to take ownership of the socket.
soclose() no longer looks at so_pcb to see if it's NULL, relying
entirely on the protocol to decide whether it's time to free the
socket or not using SS_PROTOREF. so_pcb is now entirely owned and
managed by the protocol code. Likewise, no longer test so_pcb in
other socket functions, such as soreceive(), which have no business
digging into protocol internals.
Protocol detach routines no longer try to free the socket on detach,
this is performed in the socket code if the protocol permits it.
In rts_detach(), no longer test for rp != NULL in detach, and
likewise in other protocols that don't permit a NULL so_pcb, reduce
the incidence of testing for it during detach.
netinet and netinet6 are not fully updated to this change, which
will be in an upcoming commit. In their current state they may leak
memory or panic.
MFC after: 3 months
2006-04-01 15:42:02 +00:00
|
|
|
static void udp_detach(struct socket *so);
|
2002-03-19 21:25:46 +00:00
|
|
|
static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
|
2002-03-24 10:19:10 +00:00
|
|
|
struct mbuf *, struct thread *);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
void
|
2002-11-20 19:00:54 +00:00
|
|
|
udp_init()
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_INFO_LOCK_INIT(&udbinfo, "udp");
|
1995-04-09 01:29:31 +00:00
|
|
|
LIST_INIT(&udb);
|
|
|
|
udbinfo.listhead = &udb;
|
1997-03-03 09:23:37 +00:00
|
|
|
udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
|
1998-03-28 10:18:26 +00:00
|
|
|
udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB,
|
|
|
|
&udbinfo.porthashmask);
|
2002-03-20 05:48:55 +00:00
|
|
|
udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL,
|
2004-08-11 20:30:08 +00:00
|
|
|
NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
2002-03-20 05:48:55 +00:00
|
|
|
uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2002-11-20 19:00:54 +00:00
|
|
|
udp_input(m, off)
|
|
|
|
register struct mbuf *m;
|
|
|
|
int off;
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
1999-12-07 17:39:16 +00:00
|
|
|
int iphlen = off;
|
2002-11-20 19:00:54 +00:00
|
|
|
register struct ip *ip;
|
|
|
|
register struct udphdr *uh;
|
|
|
|
register struct inpcb *inp;
|
1994-05-24 10:09:53 +00:00
|
|
|
int len;
|
|
|
|
struct ip save_ip;
|
Until this change, the UDP input code used global variables udp_in,
udp_in6, and udp_ip6 to pass socket address state between udp_input(),
udp_append(), and soappendaddr_locked(). While file in the default
configuration, when running with multiple netisrs or direct ithread
dispatch, this can result in races wherein user processes using
recvmsg() get back the wrong source IP/port. To correct this and
related races:
- Eliminate udp_ip6, which is believed to be generated but then never
used. Eliminate ip_2_ip6_hdr() as it is now unneeded.
- Eliminate setting, testing, and existence of 'init' status fields
for the IPv6 structures. While with multiple UDP delivery this
could lead to amortization of IPv4 -> IPv6 conversion when
delivering an IPv4 UDP packet to an IPv6 socket, it added
substantial complexity and side effects.
- Move global structures into the stack, declaring udp_in in
udp_input(), and udp_in6 in udp_append() to be used if a conversion
is required. Pass &udp_in into udp_append().
- Re-annotate comments to reflect updates.
With this change, UDP appears to operate correctly in the presence of
substantial inbound processing parallelism. This solution avoids
introducing additional synchronization, but does increase the
potential stack depth.
Discovered by: kris (Bug Magnet)
MFC after: 3 weeks
2004-11-04 01:25:23 +00:00
|
|
|
struct sockaddr_in udp_in;
|
2006-01-24 09:08:54 +00:00
|
|
|
#ifdef IPFIREWALL_FORWARD
|
|
|
|
struct m_tag *fwd_tag;
|
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
udpstat.udps_ipackets++;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Strip IP options, if any; should skip this,
|
|
|
|
* make available to user, and use on returned packets,
|
|
|
|
* but we don't yet have a way to check the checksum
|
|
|
|
* with options still present.
|
|
|
|
*/
|
|
|
|
if (iphlen > sizeof (struct ip)) {
|
|
|
|
ip_stripoptions(m, (struct mbuf *)0);
|
|
|
|
iphlen = sizeof(struct ip);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get IP and UDP header together in first mbuf.
|
|
|
|
*/
|
|
|
|
ip = mtod(m, struct ip *);
|
|
|
|
if (m->m_len < iphlen + sizeof(struct udphdr)) {
|
|
|
|
if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) {
|
|
|
|
udpstat.udps_hdrops++;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
ip = mtod(m, struct ip *);
|
|
|
|
}
|
|
|
|
uh = (struct udphdr *)((caddr_t)ip + iphlen);
|
|
|
|
|
2000-07-04 16:35:15 +00:00
|
|
|
/* destination port of 0 is illegal, based on RFC768. */
|
|
|
|
if (uh->uh_dport == 0)
|
2002-06-10 20:05:46 +00:00
|
|
|
goto badunlocked;
|
2000-07-04 16:35:15 +00:00
|
|
|
|
2002-10-16 02:25:05 +00:00
|
|
|
/*
|
|
|
|
* Construct sockaddr format source address.
|
|
|
|
* Stuff source address and datagram in user buffer.
|
|
|
|
*/
|
Until this change, the UDP input code used global variables udp_in,
udp_in6, and udp_ip6 to pass socket address state between udp_input(),
udp_append(), and soappendaddr_locked(). While file in the default
configuration, when running with multiple netisrs or direct ithread
dispatch, this can result in races wherein user processes using
recvmsg() get back the wrong source IP/port. To correct this and
related races:
- Eliminate udp_ip6, which is believed to be generated but then never
used. Eliminate ip_2_ip6_hdr() as it is now unneeded.
- Eliminate setting, testing, and existence of 'init' status fields
for the IPv6 structures. While with multiple UDP delivery this
could lead to amortization of IPv4 -> IPv6 conversion when
delivering an IPv4 UDP packet to an IPv6 socket, it added
substantial complexity and side effects.
- Move global structures into the stack, declaring udp_in in
udp_input(), and udp_in6 in udp_append() to be used if a conversion
is required. Pass &udp_in into udp_append().
- Re-annotate comments to reflect updates.
With this change, UDP appears to operate correctly in the presence of
substantial inbound processing parallelism. This solution avoids
introducing additional synchronization, but does increase the
potential stack depth.
Discovered by: kris (Bug Magnet)
MFC after: 3 weeks
2004-11-04 01:25:23 +00:00
|
|
|
bzero(&udp_in, sizeof(udp_in));
|
|
|
|
udp_in.sin_len = sizeof(udp_in);
|
|
|
|
udp_in.sin_family = AF_INET;
|
2002-10-16 02:25:05 +00:00
|
|
|
udp_in.sin_port = uh->uh_sport;
|
|
|
|
udp_in.sin_addr = ip->ip_src;
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Make mbuf data length reflect UDP length.
|
|
|
|
* If not enough data to reflect UDP length, drop.
|
|
|
|
*/
|
|
|
|
len = ntohs((u_short)uh->uh_ulen);
|
|
|
|
if (ip->ip_len != len) {
|
1995-08-17 22:09:14 +00:00
|
|
|
if (len > ip->ip_len || len < sizeof(struct udphdr)) {
|
1994-05-24 10:09:53 +00:00
|
|
|
udpstat.udps_badlen++;
|
2002-06-10 20:05:46 +00:00
|
|
|
goto badunlocked;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
m_adj(m, len - ip->ip_len);
|
|
|
|
/* ip->ip_len = len; */
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Save a copy of the IP header in case we want restore it
|
|
|
|
* for sending an ICMP error message in response.
|
|
|
|
*/
|
2000-10-31 09:13:02 +00:00
|
|
|
if (!blackhole)
|
|
|
|
save_ip = *ip;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Checksum extended UDP header and data.
|
|
|
|
*/
|
1995-09-22 19:56:26 +00:00
|
|
|
if (uh->uh_sum) {
|
2000-03-27 19:14:27 +00:00
|
|
|
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
|
|
|
|
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
|
|
|
|
uh->uh_sum = m->m_pkthdr.csum_data;
|
|
|
|
else
|
2004-08-16 18:32:07 +00:00
|
|
|
uh->uh_sum = in_pseudo(ip->ip_src.s_addr,
|
2000-11-01 16:56:33 +00:00
|
|
|
ip->ip_dst.s_addr, htonl((u_short)len +
|
2000-03-27 19:14:27 +00:00
|
|
|
m->m_pkthdr.csum_data + IPPROTO_UDP));
|
|
|
|
uh->uh_sum ^= 0xffff;
|
|
|
|
} else {
|
2001-10-22 12:43:30 +00:00
|
|
|
char b[9];
|
|
|
|
bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
|
2000-03-27 19:14:27 +00:00
|
|
|
bzero(((struct ipovly *)ip)->ih_x1, 9);
|
|
|
|
((struct ipovly *)ip)->ih_len = uh->uh_ulen;
|
|
|
|
uh->uh_sum = in_cksum(m, len + sizeof (struct ip));
|
2001-10-22 12:43:30 +00:00
|
|
|
bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
|
2000-03-27 19:14:27 +00:00
|
|
|
}
|
1994-10-02 17:48:58 +00:00
|
|
|
if (uh->uh_sum) {
|
1994-05-24 10:09:53 +00:00
|
|
|
udpstat.udps_badsum++;
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
2001-03-13 13:26:06 +00:00
|
|
|
} else
|
|
|
|
udpstat.udps_nosum++;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2006-01-24 09:08:54 +00:00
|
|
|
#ifdef IPFIREWALL_FORWARD
|
|
|
|
/* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */
|
|
|
|
fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
|
|
|
|
|
|
|
|
if (fwd_tag != NULL) {
|
|
|
|
struct sockaddr_in *next_hop;
|
|
|
|
|
|
|
|
/* Do the hack. */
|
|
|
|
next_hop = (struct sockaddr_in *)(fwd_tag + 1);
|
|
|
|
ip->ip_dst = next_hop->sin_addr;
|
|
|
|
uh->uh_dport = ntohs(next_hop->sin_port);
|
|
|
|
/* Remove the tag from the packet. We don't need it anymore. */
|
|
|
|
m_tag_delete(m, fwd_tag);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_INFO_RLOCK(&udbinfo);
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
|
|
|
|
in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
|
1996-11-11 04:56:32 +00:00
|
|
|
struct inpcb *last;
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Deliver a multicast or broadcast datagram to *all* sockets
|
|
|
|
* for which the local and remote addresses and ports match
|
|
|
|
* those of the incoming datagram. This allows more than
|
|
|
|
* one process to receive multi/broadcasts on the same port.
|
|
|
|
* (This really ought to be done for unicast datagrams as
|
|
|
|
* well, but that would cause problems with existing
|
|
|
|
* applications that open both address-specific sockets and
|
|
|
|
* a wildcard socket listening to the same port -- they would
|
|
|
|
* end up receiving duplicates of every unicast datagram.
|
|
|
|
* Those applications open the multiple sockets to overcome an
|
|
|
|
* inadequacy of the UDP socket interface, but for backwards
|
|
|
|
* compatibility we avoid the problem here rather than
|
|
|
|
* fixing the interface. Maybe 4.5BSD will remedy this?)
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Locate pcb(s) for datagram.
|
|
|
|
* (Algorithm copied from raw_intr().)
|
|
|
|
*/
|
|
|
|
last = NULL;
|
1999-12-07 17:39:16 +00:00
|
|
|
LIST_FOREACH(inp, &udb, inp_list) {
|
2004-08-06 02:08:31 +00:00
|
|
|
if (inp->inp_lport != uh->uh_dport)
|
2002-06-10 20:05:46 +00:00
|
|
|
continue;
|
1999-12-07 17:39:16 +00:00
|
|
|
#ifdef INET6
|
1999-12-21 11:14:12 +00:00
|
|
|
if ((inp->inp_vflag & INP_IPV4) == 0)
|
2004-08-06 02:08:31 +00:00
|
|
|
continue;
|
1999-12-07 17:39:16 +00:00
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
if (inp->inp_laddr.s_addr != INADDR_ANY) {
|
2002-06-10 20:05:46 +00:00
|
|
|
if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
|
2004-08-06 02:08:31 +00:00
|
|
|
continue;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
if (inp->inp_faddr.s_addr != INADDR_ANY) {
|
|
|
|
if (inp->inp_faddr.s_addr !=
|
|
|
|
ip->ip_src.s_addr ||
|
|
|
|
inp->inp_fport != uh->uh_sport)
|
2004-08-06 02:08:31 +00:00
|
|
|
continue;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2004-08-06 02:08:31 +00:00
|
|
|
INP_LOCK(inp);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2003-11-12 20:17:11 +00:00
|
|
|
/*
|
|
|
|
* Check multicast packets to make sure they are only
|
|
|
|
* sent to sockets with multicast memberships for the
|
|
|
|
* packet's destination address and arrival interface
|
|
|
|
*/
|
|
|
|
#define MSHIP(_inp, n) ((_inp)->inp_moptions->imo_membership[(n)])
|
|
|
|
#define NMSHIPS(_inp) ((_inp)->inp_moptions->imo_num_memberships)
|
|
|
|
if (strict_mcast_mship && inp->inp_moptions != NULL) {
|
|
|
|
int mship, foundmship = 0;
|
|
|
|
|
|
|
|
for (mship = 0; mship < NMSHIPS(inp); mship++) {
|
|
|
|
if (MSHIP(inp, mship)->inm_addr.s_addr
|
|
|
|
== ip->ip_dst.s_addr &&
|
|
|
|
MSHIP(inp, mship)->inm_ifp
|
|
|
|
== m->m_pkthdr.rcvif) {
|
|
|
|
foundmship = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2004-08-06 02:08:31 +00:00
|
|
|
if (foundmship == 0) {
|
|
|
|
INP_UNLOCK(inp);
|
|
|
|
continue;
|
|
|
|
}
|
2003-11-12 20:17:11 +00:00
|
|
|
}
|
|
|
|
#undef NMSHIPS
|
|
|
|
#undef MSHIP
|
1994-05-24 10:09:53 +00:00
|
|
|
if (last != NULL) {
|
|
|
|
struct mbuf *n;
|
|
|
|
|
2002-11-20 19:00:54 +00:00
|
|
|
n = m_copy(m, 0, M_COPYALL);
|
2002-10-16 02:25:05 +00:00
|
|
|
if (n != NULL)
|
|
|
|
udp_append(last, ip, n,
|
1999-12-07 17:39:16 +00:00
|
|
|
iphlen +
|
Until this change, the UDP input code used global variables udp_in,
udp_in6, and udp_ip6 to pass socket address state between udp_input(),
udp_append(), and soappendaddr_locked(). While file in the default
configuration, when running with multiple netisrs or direct ithread
dispatch, this can result in races wherein user processes using
recvmsg() get back the wrong source IP/port. To correct this and
related races:
- Eliminate udp_ip6, which is believed to be generated but then never
used. Eliminate ip_2_ip6_hdr() as it is now unneeded.
- Eliminate setting, testing, and existence of 'init' status fields
for the IPv6 structures. While with multiple UDP delivery this
could lead to amortization of IPv4 -> IPv6 conversion when
delivering an IPv4 UDP packet to an IPv6 socket, it added
substantial complexity and side effects.
- Move global structures into the stack, declaring udp_in in
udp_input(), and udp_in6 in udp_append() to be used if a conversion
is required. Pass &udp_in into udp_append().
- Re-annotate comments to reflect updates.
With this change, UDP appears to operate correctly in the presence of
substantial inbound processing parallelism. This solution avoids
introducing additional synchronization, but does increase the
potential stack depth.
Discovered by: kris (Bug Magnet)
MFC after: 3 weeks
2004-11-04 01:25:23 +00:00
|
|
|
sizeof(struct udphdr),
|
|
|
|
&udp_in);
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_UNLOCK(last);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
1996-11-11 04:56:32 +00:00
|
|
|
last = inp;
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Don't look for additional matches if this one does
|
|
|
|
* not have either the SO_REUSEPORT or SO_REUSEADDR
|
|
|
|
* socket options set. This heuristic avoids searching
|
|
|
|
* through all pcbs in the common case of a non-shared
|
|
|
|
* port. It * assumes that an application will never
|
|
|
|
* clear these options after setting them.
|
|
|
|
*/
|
2002-05-31 11:52:35 +00:00
|
|
|
if ((last->inp_socket->so_options&(SO_REUSEPORT|SO_REUSEADDR)) == 0)
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (last == NULL) {
|
|
|
|
/*
|
|
|
|
* No matching pcb found; discard datagram.
|
|
|
|
* (No need to send an ICMP Port Unreachable
|
|
|
|
* for a broadcast or multicast datgram.)
|
|
|
|
*/
|
|
|
|
udpstat.udps_noportbcast++;
|
2002-06-12 15:21:41 +00:00
|
|
|
goto badheadlocked;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
Until this change, the UDP input code used global variables udp_in,
udp_in6, and udp_ip6 to pass socket address state between udp_input(),
udp_append(), and soappendaddr_locked(). While file in the default
configuration, when running with multiple netisrs or direct ithread
dispatch, this can result in races wherein user processes using
recvmsg() get back the wrong source IP/port. To correct this and
related races:
- Eliminate udp_ip6, which is believed to be generated but then never
used. Eliminate ip_2_ip6_hdr() as it is now unneeded.
- Eliminate setting, testing, and existence of 'init' status fields
for the IPv6 structures. While with multiple UDP delivery this
could lead to amortization of IPv4 -> IPv6 conversion when
delivering an IPv4 UDP packet to an IPv6 socket, it added
substantial complexity and side effects.
- Move global structures into the stack, declaring udp_in in
udp_input(), and udp_in6 in udp_append() to be used if a conversion
is required. Pass &udp_in into udp_append().
- Re-annotate comments to reflect updates.
With this change, UDP appears to operate correctly in the presence of
substantial inbound processing parallelism. This solution avoids
introducing additional synchronization, but does increase the
potential stack depth.
Discovered by: kris (Bug Magnet)
MFC after: 3 weeks
2004-11-04 01:25:23 +00:00
|
|
|
udp_append(last, ip, m, iphlen + sizeof(struct udphdr),
|
|
|
|
&udp_in);
|
2002-10-16 02:33:28 +00:00
|
|
|
INP_UNLOCK(last);
|
2004-10-12 20:03:56 +00:00
|
|
|
INP_INFO_RUNLOCK(&udbinfo);
|
1994-05-24 10:09:53 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
/*
|
1996-10-07 19:06:12 +00:00
|
|
|
* Locate pcb for datagram.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
Improved connection establishment performance by doing local port lookups via
a hashed port list. In the new scheme, in_pcblookup() goes away and is
replaced by a new routine, in_pcblookup_local() for doing the local port
check. Note that this implementation is space inefficient in that the PCB
struct is now too large to fit into 128 bytes. I might deal with this in the
future by using the new zone allocator, but I wanted these changes to be
extensively tested in their current form first.
Also:
1) Fixed off-by-one errors in the port lookup loops in in_pcbbind().
2) Got rid of some unneeded rehashing. Adding a new routine, in_pcbinshash()
to do the initialial hash insertion.
3) Renamed in_pcblookuphash() to in_pcblookup_hash() for easier readability.
4) Added a new routine, in_pcbremlists() to remove the PCB from the various
hash lists.
5) Added/deleted comments where appropriate.
6) Removed unnecessary splnet() locking. In general, the PCB functions should
be called at splnet()...there are unfortunately a few exceptions, however.
7) Reorganized a few structs for better cache line behavior.
8) Killed my TCP_ACK_HACK kludge. It may come back in a different form in
the future, however.
These changes have been tested on wcarchive for more than a month. In tests
done here, connection establishment overhead is reduced by more than 50
times, thus getting rid of one of the major networking scalability problems.
Still to do: make tcp_fastimo/tcp_slowtimo scale well for systems with a
large number of connections. tcp_fastimo is easy; tcp_slowtimo is difficult.
WARNING: Anything that knows about inpcb and tcpcb structs will have to be
recompiled; at the very least, this includes netstat(1).
1998-01-27 09:15:13 +00:00
|
|
|
inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport,
|
1999-12-07 17:39:16 +00:00
|
|
|
ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif);
|
1995-04-09 01:29:31 +00:00
|
|
|
if (inp == NULL) {
|
1996-04-27 18:19:12 +00:00
|
|
|
if (log_in_vain) {
|
1996-05-02 05:54:14 +00:00
|
|
|
char buf[4*sizeof "123"];
|
1996-04-27 18:19:12 +00:00
|
|
|
|
|
|
|
strcpy(buf, inet_ntoa(ip->ip_dst));
|
1997-12-19 23:46:21 +00:00
|
|
|
log(LOG_INFO,
|
|
|
|
"Connection attempt to UDP %s:%d from %s:%d\n",
|
|
|
|
buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
|
|
|
|
ntohs(uh->uh_sport));
|
1996-04-27 18:19:12 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
udpstat.udps_noport++;
|
|
|
|
if (m->m_flags & (M_BCAST | M_MCAST)) {
|
|
|
|
udpstat.udps_noportbcast++;
|
2002-06-12 15:21:41 +00:00
|
|
|
goto badheadlocked;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2000-05-24 12:57:52 +00:00
|
|
|
if (blackhole)
|
2002-06-12 15:21:41 +00:00
|
|
|
goto badheadlocked;
|
2002-08-04 20:50:13 +00:00
|
|
|
if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
|
|
|
|
goto badheadlocked;
|
Fixed broken ICMP error generation, unified conversion of IP header
fields between host and network byte order. The details:
o icmp_error() now does not add IP header length. This fixes the problem
when icmp_error() is called from ip_forward(). In this case the ip_len
of the original IP datagram returned with ICMP error was wrong.
o icmp_error() expects all three fields, ip_len, ip_id and ip_off in host
byte order, so DTRT and convert these fields back to network byte order
before sending a message. This fixes the problem described in PR 16240
and PR 20877 (ip_id field was returned in host byte order).
o ip_ttl decrement operation in ip_forward() was moved down to make sure
that it does not corrupt the copy of original IP datagram passed later
to icmp_error().
o A copy of original IP datagram in ip_forward() was made a read-write,
independent copy. This fixes the problem I first reported to Garrett
Wollman and Bill Fenner and later put in audit trail of PR 16240:
ip_output() (not always) converts fields of original datagram to network
byte order, but because copy (mcopy) and its original (m) most likely
share the same mbuf cluster, ip_output()'s manipulations on original
also corrupted the copy.
o ip_output() now expects all three fields, ip_len, ip_off and (what is
significant) ip_id in host byte order. It was a headache for years that
ip_id was handled differently. The only compatibility issue here is the
raw IP socket interface with IP_HDRINCL socket option set and a non-zero
ip_id field, but ip.4 manual page was unclear on whether in this case
ip_id field should be in host or network byte order.
2000-09-01 12:33:03 +00:00
|
|
|
*ip = save_ip;
|
|
|
|
ip->ip_len += iphlen;
|
2000-05-24 12:57:52 +00:00
|
|
|
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_INFO_RUNLOCK(&udbinfo);
|
1994-05-24 10:09:53 +00:00
|
|
|
return;
|
|
|
|
}
|
2002-06-12 15:21:41 +00:00
|
|
|
INP_LOCK(inp);
|
2005-08-22 16:13:08 +00:00
|
|
|
/* Check the minimum TTL for socket. */
|
|
|
|
if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl)
|
|
|
|
goto badheadlocked;
|
Until this change, the UDP input code used global variables udp_in,
udp_in6, and udp_ip6 to pass socket address state between udp_input(),
udp_append(), and soappendaddr_locked(). While file in the default
configuration, when running with multiple netisrs or direct ithread
dispatch, this can result in races wherein user processes using
recvmsg() get back the wrong source IP/port. To correct this and
related races:
- Eliminate udp_ip6, which is believed to be generated but then never
used. Eliminate ip_2_ip6_hdr() as it is now unneeded.
- Eliminate setting, testing, and existence of 'init' status fields
for the IPv6 structures. While with multiple UDP delivery this
could lead to amortization of IPv4 -> IPv6 conversion when
delivering an IPv4 UDP packet to an IPv6 socket, it added
substantial complexity and side effects.
- Move global structures into the stack, declaring udp_in in
udp_input(), and udp_in6 in udp_append() to be used if a conversion
is required. Pass &udp_in into udp_append().
- Re-annotate comments to reflect updates.
With this change, UDP appears to operate correctly in the presence of
substantial inbound processing parallelism. This solution avoids
introducing additional synchronization, but does increase the
potential stack depth.
Discovered by: kris (Bug Magnet)
MFC after: 3 weeks
2004-11-04 01:25:23 +00:00
|
|
|
udp_append(inp, ip, m, iphlen + sizeof(struct udphdr), &udp_in);
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_UNLOCK(inp);
|
2004-10-12 20:03:56 +00:00
|
|
|
INP_INFO_RUNLOCK(&udbinfo);
|
1994-05-24 10:09:53 +00:00
|
|
|
return;
|
2002-06-12 15:21:41 +00:00
|
|
|
|
|
|
|
badheadlocked:
|
2002-06-10 20:05:46 +00:00
|
|
|
if (inp)
|
|
|
|
INP_UNLOCK(inp);
|
2004-10-12 20:03:56 +00:00
|
|
|
INP_INFO_RUNLOCK(&udbinfo);
|
2002-06-10 20:05:46 +00:00
|
|
|
badunlocked:
|
1994-05-24 10:09:53 +00:00
|
|
|
m_freem(m);
|
1999-12-07 17:39:16 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Until this change, the UDP input code used global variables udp_in,
udp_in6, and udp_ip6 to pass socket address state between udp_input(),
udp_append(), and soappendaddr_locked(). While file in the default
configuration, when running with multiple netisrs or direct ithread
dispatch, this can result in races wherein user processes using
recvmsg() get back the wrong source IP/port. To correct this and
related races:
- Eliminate udp_ip6, which is believed to be generated but then never
used. Eliminate ip_2_ip6_hdr() as it is now unneeded.
- Eliminate setting, testing, and existence of 'init' status fields
for the IPv6 structures. While with multiple UDP delivery this
could lead to amortization of IPv4 -> IPv6 conversion when
delivering an IPv4 UDP packet to an IPv6 socket, it added
substantial complexity and side effects.
- Move global structures into the stack, declaring udp_in in
udp_input(), and udp_in6 in udp_append() to be used if a conversion
is required. Pass &udp_in into udp_append().
- Re-annotate comments to reflect updates.
With this change, UDP appears to operate correctly in the presence of
substantial inbound processing parallelism. This solution avoids
introducing additional synchronization, but does increase the
potential stack depth.
Discovered by: kris (Bug Magnet)
MFC after: 3 weeks
2004-11-04 01:25:23 +00:00
|
|
|
* Subroutine of udp_input(), which appends the provided mbuf chain to the
|
|
|
|
* passed pcb/socket. The caller must provide a sockaddr_in via udp_in that
|
|
|
|
* contains the source address. If the socket ends up being an IPv6 socket,
|
|
|
|
* udp_append() will convert to a sockaddr_in6 before passing the address
|
|
|
|
* into the socket code.
|
1999-12-07 17:39:16 +00:00
|
|
|
*/
|
|
|
|
static void
|
Until this change, the UDP input code used global variables udp_in,
udp_in6, and udp_ip6 to pass socket address state between udp_input(),
udp_append(), and soappendaddr_locked(). While file in the default
configuration, when running with multiple netisrs or direct ithread
dispatch, this can result in races wherein user processes using
recvmsg() get back the wrong source IP/port. To correct this and
related races:
- Eliminate udp_ip6, which is believed to be generated but then never
used. Eliminate ip_2_ip6_hdr() as it is now unneeded.
- Eliminate setting, testing, and existence of 'init' status fields
for the IPv6 structures. While with multiple UDP delivery this
could lead to amortization of IPv4 -> IPv6 conversion when
delivering an IPv4 UDP packet to an IPv6 socket, it added
substantial complexity and side effects.
- Move global structures into the stack, declaring udp_in in
udp_input(), and udp_in6 in udp_append() to be used if a conversion
is required. Pass &udp_in into udp_append().
- Re-annotate comments to reflect updates.
With this change, UDP appears to operate correctly in the presence of
substantial inbound processing parallelism. This solution avoids
introducing additional synchronization, but does increase the
potential stack depth.
Discovered by: kris (Bug Magnet)
MFC after: 3 weeks
2004-11-04 01:25:23 +00:00
|
|
|
udp_append(last, ip, n, off, udp_in)
|
2002-11-20 19:00:54 +00:00
|
|
|
struct inpcb *last;
|
|
|
|
struct ip *ip;
|
|
|
|
struct mbuf *n;
|
|
|
|
int off;
|
Until this change, the UDP input code used global variables udp_in,
udp_in6, and udp_ip6 to pass socket address state between udp_input(),
udp_append(), and soappendaddr_locked(). While file in the default
configuration, when running with multiple netisrs or direct ithread
dispatch, this can result in races wherein user processes using
recvmsg() get back the wrong source IP/port. To correct this and
related races:
- Eliminate udp_ip6, which is believed to be generated but then never
used. Eliminate ip_2_ip6_hdr() as it is now unneeded.
- Eliminate setting, testing, and existence of 'init' status fields
for the IPv6 structures. While with multiple UDP delivery this
could lead to amortization of IPv4 -> IPv6 conversion when
delivering an IPv4 UDP packet to an IPv6 socket, it added
substantial complexity and side effects.
- Move global structures into the stack, declaring udp_in in
udp_input(), and udp_in6 in udp_append() to be used if a conversion
is required. Pass &udp_in into udp_append().
- Re-annotate comments to reflect updates.
With this change, UDP appears to operate correctly in the presence of
substantial inbound processing parallelism. This solution avoids
introducing additional synchronization, but does increase the
potential stack depth.
Discovered by: kris (Bug Magnet)
MFC after: 3 weeks
2004-11-04 01:25:23 +00:00
|
|
|
struct sockaddr_in *udp_in;
|
1999-12-07 17:39:16 +00:00
|
|
|
{
|
|
|
|
struct sockaddr *append_sa;
|
Reduce the number of unnecessary unlock-relocks on socket buffer mutexes
associated with performing a wakeup on the socket buffer:
- When performing an sbappend*() followed by a so[rw]wakeup(), explicitly
acquire the socket buffer lock and use the _locked() variants of both
calls. Note that the _locked() sowakeup() versions unlock the mutex on
return. This is done in uipc_send(), divert_packet(), mroute
socket_send(), raw_append(), tcp_reass(), tcp_input(), and udp_append().
- When the socket buffer lock is dropped before a sowakeup(), remove the
explicit unlock and use the _locked() sowakeup() variant. This is done
in soisdisconnecting(), soisdisconnected() when setting the can't send/
receive flags and dropping data, and in uipc_rcvd() which adjusting
back-pressure on the sockets.
For UNIX domain sockets running mpsafe with a contention-intensive SMP
mysql benchmark, this results in a 1.6% query rate improvement due to
reduce mutex costs.
2004-06-26 19:10:39 +00:00
|
|
|
struct socket *so;
|
1999-12-07 17:39:16 +00:00
|
|
|
struct mbuf *opts = 0;
|
2004-11-04 07:14:03 +00:00
|
|
|
#ifdef INET6
|
Until this change, the UDP input code used global variables udp_in,
udp_in6, and udp_ip6 to pass socket address state between udp_input(),
udp_append(), and soappendaddr_locked(). While file in the default
configuration, when running with multiple netisrs or direct ithread
dispatch, this can result in races wherein user processes using
recvmsg() get back the wrong source IP/port. To correct this and
related races:
- Eliminate udp_ip6, which is believed to be generated but then never
used. Eliminate ip_2_ip6_hdr() as it is now unneeded.
- Eliminate setting, testing, and existence of 'init' status fields
for the IPv6 structures. While with multiple UDP delivery this
could lead to amortization of IPv4 -> IPv6 conversion when
delivering an IPv4 UDP packet to an IPv6 socket, it added
substantial complexity and side effects.
- Move global structures into the stack, declaring udp_in in
udp_input(), and udp_in6 in udp_append() to be used if a conversion
is required. Pass &udp_in into udp_append().
- Re-annotate comments to reflect updates.
With this change, UDP appears to operate correctly in the presence of
substantial inbound processing parallelism. This solution avoids
introducing additional synchronization, but does increase the
potential stack depth.
Discovered by: kris (Bug Magnet)
MFC after: 3 weeks
2004-11-04 01:25:23 +00:00
|
|
|
struct sockaddr_in6 udp_in6;
|
2004-11-04 07:14:03 +00:00
|
|
|
#endif
|
1999-12-07 17:39:16 +00:00
|
|
|
|
2004-05-04 01:08:15 +00:00
|
|
|
INP_LOCK_ASSERT(last);
|
|
|
|
|
2004-02-17 14:02:37 +00:00
|
|
|
#if defined(IPSEC) || defined(FAST_IPSEC)
|
2002-10-16 02:25:05 +00:00
|
|
|
/* check AH/ESP integrity. */
|
2004-02-03 18:20:55 +00:00
|
|
|
if (ipsec4_in_reject(n, last)) {
|
2004-02-17 14:02:37 +00:00
|
|
|
#ifdef IPSEC
|
2002-10-16 02:25:05 +00:00
|
|
|
ipsecstat.in_polvio++;
|
|
|
|
#endif /*IPSEC*/
|
|
|
|
m_freem(n);
|
|
|
|
return;
|
|
|
|
}
|
2004-02-17 14:02:37 +00:00
|
|
|
#endif /*IPSEC || FAST_IPSEC*/
|
2002-10-16 02:25:05 +00:00
|
|
|
#ifdef MAC
|
Introduce a MAC label reference in 'struct inpcb', which caches
the MAC label referenced from 'struct socket' in the IPv4 and
IPv6-based protocols. This permits MAC labels to be checked during
network delivery operations without dereferencing inp->inp_socket
to get to so->so_label, which will eventually avoid our having to
grab the socket lock during delivery at the network layer.
This change introduces 'struct inpcb' as a labeled object to the
MAC Framework, along with the normal circus of entry points:
initialization, creation from socket, destruction, as well as a
delivery access control check.
For most policies, the inpcb label will simply be a cache of the
socket label, so a new protocol switch method is introduced,
pr_sosetlabel() to notify protocols that the socket layer label
has been updated so that the cache can be updated while holding
appropriate locks. Most protocols implement this using
pru_sosetlabel_null(), but IPv4/IPv6 protocols using inpcbs use
the the worker function in_pcbsosetlabel(), which calls into the
MAC Framework to perform a cache update.
Biba, LOMAC, and MLS implement these entry points, as do the stub
policy, and test policy.
Reviewed by: sam, bms
Obtained from: TrustedBSD Project
Sponsored by: DARPA, Network Associates Laboratories
2003-11-18 00:39:07 +00:00
|
|
|
if (mac_check_inpcb_deliver(last, n) != 0) {
|
2002-10-16 02:25:05 +00:00
|
|
|
m_freem(n);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif
|
1999-12-07 17:39:16 +00:00
|
|
|
if (last->inp_flags & INP_CONTROLOPTS ||
|
2004-01-31 10:40:25 +00:00
|
|
|
last->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
|
1999-12-07 17:39:16 +00:00
|
|
|
#ifdef INET6
|
|
|
|
if (last->inp_vflag & INP_IPV6) {
|
|
|
|
int savedflags;
|
|
|
|
|
|
|
|
savedflags = last->inp_flags;
|
|
|
|
last->inp_flags &= ~INP_UNMAPPABLEOPTS;
|
2003-10-29 12:52:28 +00:00
|
|
|
ip6_savecontrol(last, n, &opts);
|
1999-12-07 17:39:16 +00:00
|
|
|
last->inp_flags = savedflags;
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
ip_savecontrol(last, &opts, ip, n);
|
2002-05-31 11:52:35 +00:00
|
|
|
}
|
1999-12-07 17:39:16 +00:00
|
|
|
#ifdef INET6
|
|
|
|
if (last->inp_vflag & INP_IPV6) {
|
Until this change, the UDP input code used global variables udp_in,
udp_in6, and udp_ip6 to pass socket address state between udp_input(),
udp_append(), and soappendaddr_locked(). While file in the default
configuration, when running with multiple netisrs or direct ithread
dispatch, this can result in races wherein user processes using
recvmsg() get back the wrong source IP/port. To correct this and
related races:
- Eliminate udp_ip6, which is believed to be generated but then never
used. Eliminate ip_2_ip6_hdr() as it is now unneeded.
- Eliminate setting, testing, and existence of 'init' status fields
for the IPv6 structures. While with multiple UDP delivery this
could lead to amortization of IPv4 -> IPv6 conversion when
delivering an IPv4 UDP packet to an IPv6 socket, it added
substantial complexity and side effects.
- Move global structures into the stack, declaring udp_in in
udp_input(), and udp_in6 in udp_append() to be used if a conversion
is required. Pass &udp_in into udp_append().
- Re-annotate comments to reflect updates.
With this change, UDP appears to operate correctly in the presence of
substantial inbound processing parallelism. This solution avoids
introducing additional synchronization, but does increase the
potential stack depth.
Discovered by: kris (Bug Magnet)
MFC after: 3 weeks
2004-11-04 01:25:23 +00:00
|
|
|
bzero(&udp_in6, sizeof(udp_in6));
|
|
|
|
udp_in6.sin6_len = sizeof(udp_in6);
|
|
|
|
udp_in6.sin6_family = AF_INET6;
|
|
|
|
in6_sin_2_v4mapsin6(udp_in, &udp_in6);
|
|
|
|
append_sa = (struct sockaddr *)&udp_in6;
|
1999-12-07 17:39:16 +00:00
|
|
|
} else
|
|
|
|
#endif
|
Until this change, the UDP input code used global variables udp_in,
udp_in6, and udp_ip6 to pass socket address state between udp_input(),
udp_append(), and soappendaddr_locked(). While file in the default
configuration, when running with multiple netisrs or direct ithread
dispatch, this can result in races wherein user processes using
recvmsg() get back the wrong source IP/port. To correct this and
related races:
- Eliminate udp_ip6, which is believed to be generated but then never
used. Eliminate ip_2_ip6_hdr() as it is now unneeded.
- Eliminate setting, testing, and existence of 'init' status fields
for the IPv6 structures. While with multiple UDP delivery this
could lead to amortization of IPv4 -> IPv6 conversion when
delivering an IPv4 UDP packet to an IPv6 socket, it added
substantial complexity and side effects.
- Move global structures into the stack, declaring udp_in in
udp_input(), and udp_in6 in udp_append() to be used if a conversion
is required. Pass &udp_in into udp_append().
- Re-annotate comments to reflect updates.
With this change, UDP appears to operate correctly in the presence of
substantial inbound processing parallelism. This solution avoids
introducing additional synchronization, but does increase the
potential stack depth.
Discovered by: kris (Bug Magnet)
MFC after: 3 weeks
2004-11-04 01:25:23 +00:00
|
|
|
append_sa = (struct sockaddr *)udp_in;
|
1999-12-07 17:39:16 +00:00
|
|
|
m_adj(n, off);
|
Reduce the number of unnecessary unlock-relocks on socket buffer mutexes
associated with performing a wakeup on the socket buffer:
- When performing an sbappend*() followed by a so[rw]wakeup(), explicitly
acquire the socket buffer lock and use the _locked() variants of both
calls. Note that the _locked() sowakeup() versions unlock the mutex on
return. This is done in uipc_send(), divert_packet(), mroute
socket_send(), raw_append(), tcp_reass(), tcp_input(), and udp_append().
- When the socket buffer lock is dropped before a sowakeup(), remove the
explicit unlock and use the _locked() sowakeup() variant. This is done
in soisdisconnecting(), soisdisconnected() when setting the can't send/
receive flags and dropping data, and in uipc_rcvd() which adjusting
back-pressure on the sockets.
For UNIX domain sockets running mpsafe with a contention-intensive SMP
mysql benchmark, this results in a 1.6% query rate improvement due to
reduce mutex costs.
2004-06-26 19:10:39 +00:00
|
|
|
|
|
|
|
so = last->inp_socket;
|
|
|
|
SOCKBUF_LOCK(&so->so_rcv);
|
|
|
|
if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
|
1999-12-07 17:39:16 +00:00
|
|
|
m_freem(n);
|
|
|
|
if (opts)
|
|
|
|
m_freem(opts);
|
|
|
|
udpstat.udps_fullsock++;
|
Reduce the number of unnecessary unlock-relocks on socket buffer mutexes
associated with performing a wakeup on the socket buffer:
- When performing an sbappend*() followed by a so[rw]wakeup(), explicitly
acquire the socket buffer lock and use the _locked() variants of both
calls. Note that the _locked() sowakeup() versions unlock the mutex on
return. This is done in uipc_send(), divert_packet(), mroute
socket_send(), raw_append(), tcp_reass(), tcp_input(), and udp_append().
- When the socket buffer lock is dropped before a sowakeup(), remove the
explicit unlock and use the _locked() sowakeup() variant. This is done
in soisdisconnecting(), soisdisconnected() when setting the can't send/
receive flags and dropping data, and in uipc_rcvd() which adjusting
back-pressure on the sockets.
For UNIX domain sockets running mpsafe with a contention-intensive SMP
mysql benchmark, this results in a 1.6% query rate improvement due to
reduce mutex costs.
2004-06-26 19:10:39 +00:00
|
|
|
SOCKBUF_UNLOCK(&so->so_rcv);
|
2002-05-31 11:52:35 +00:00
|
|
|
} else
|
Reduce the number of unnecessary unlock-relocks on socket buffer mutexes
associated with performing a wakeup on the socket buffer:
- When performing an sbappend*() followed by a so[rw]wakeup(), explicitly
acquire the socket buffer lock and use the _locked() variants of both
calls. Note that the _locked() sowakeup() versions unlock the mutex on
return. This is done in uipc_send(), divert_packet(), mroute
socket_send(), raw_append(), tcp_reass(), tcp_input(), and udp_append().
- When the socket buffer lock is dropped before a sowakeup(), remove the
explicit unlock and use the _locked() sowakeup() variant. This is done
in soisdisconnecting(), soisdisconnected() when setting the can't send/
receive flags and dropping data, and in uipc_rcvd() which adjusting
back-pressure on the sockets.
For UNIX domain sockets running mpsafe with a contention-intensive SMP
mysql benchmark, this results in a 1.6% query rate improvement due to
reduce mutex costs.
2004-06-26 19:10:39 +00:00
|
|
|
sorwakeup_locked(so);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Notify a udp user of an asynchronous error;
|
|
|
|
* just wake up so that he can collect error status.
|
|
|
|
*/
|
2002-06-14 08:35:21 +00:00
|
|
|
struct inpcb *
|
2002-11-20 19:00:54 +00:00
|
|
|
udp_notify(inp, errno)
|
|
|
|
register struct inpcb *inp;
|
|
|
|
int errno;
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
inp->inp_socket->so_error = errno;
|
|
|
|
sorwakeup(inp->inp_socket);
|
|
|
|
sowwakeup(inp->inp_socket);
|
2002-06-14 08:35:21 +00:00
|
|
|
return inp;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2002-11-20 19:00:54 +00:00
|
|
|
udp_ctlinput(cmd, sa, vip)
|
|
|
|
int cmd;
|
|
|
|
struct sockaddr *sa;
|
|
|
|
void *vip;
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2001-02-26 21:19:47 +00:00
|
|
|
struct ip *ip = vip;
|
|
|
|
struct udphdr *uh;
|
2002-06-14 08:35:21 +00:00
|
|
|
struct inpcb *(*notify)(struct inpcb *, int) = udp_notify;
|
2004-08-16 18:32:07 +00:00
|
|
|
struct in_addr faddr;
|
2001-02-26 21:19:47 +00:00
|
|
|
struct inpcb *inp;
|
|
|
|
|
|
|
|
faddr = ((struct sockaddr_in *)sa)->sin_addr;
|
|
|
|
if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
|
2004-08-16 18:32:07 +00:00
|
|
|
return;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2003-11-20 20:07:39 +00:00
|
|
|
/*
|
|
|
|
* Redirects don't need to be handled up here.
|
|
|
|
*/
|
|
|
|
if (PRC_IS_REDIRECT(cmd))
|
|
|
|
return;
|
|
|
|
/*
|
|
|
|
* Hostdead is ugly because it goes linearly through all PCBs.
|
|
|
|
* XXX: We never get this from ICMP, otherwise it makes an
|
|
|
|
* excellent DoS attack on machines with many connections.
|
|
|
|
*/
|
|
|
|
if (cmd == PRC_HOSTDEAD)
|
2001-02-22 21:23:45 +00:00
|
|
|
ip = 0;
|
|
|
|
else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
|
1994-05-24 10:09:53 +00:00
|
|
|
return;
|
|
|
|
if (ip) {
|
|
|
|
uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_INFO_RLOCK(&udbinfo);
|
2001-02-26 21:19:47 +00:00
|
|
|
inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport,
|
2004-08-16 18:32:07 +00:00
|
|
|
ip->ip_src, uh->uh_sport, 0, NULL);
|
2002-06-10 20:05:46 +00:00
|
|
|
if (inp != NULL) {
|
|
|
|
INP_LOCK(inp);
|
2003-02-15 02:37:57 +00:00
|
|
|
if (inp->inp_socket != NULL) {
|
2002-06-10 20:05:46 +00:00
|
|
|
(*notify)(inp, inetctlerrmap[cmd]);
|
|
|
|
}
|
|
|
|
INP_UNLOCK(inp);
|
|
|
|
}
|
|
|
|
INP_INFO_RUNLOCK(&udbinfo);
|
1994-05-24 10:09:53 +00:00
|
|
|
} else
|
2002-06-10 20:05:46 +00:00
|
|
|
in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd], notify);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
1998-05-15 20:11:40 +00:00
|
|
|
static int
|
2000-07-04 11:25:35 +00:00
|
|
|
udp_pcblist(SYSCTL_HANDLER_ARGS)
|
1998-05-15 20:11:40 +00:00
|
|
|
{
|
2005-06-01 11:24:00 +00:00
|
|
|
int error, i, n;
|
1998-05-15 20:11:40 +00:00
|
|
|
struct inpcb *inp, **inp_list;
|
|
|
|
inp_gen_t gencnt;
|
|
|
|
struct xinpgen xig;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The process of preparing the TCB list is too time-consuming and
|
|
|
|
* resource-intensive to repeat twice on every request.
|
|
|
|
*/
|
|
|
|
if (req->oldptr == 0) {
|
|
|
|
n = udbinfo.ipi_count;
|
|
|
|
req->oldidx = 2 * (sizeof xig)
|
|
|
|
+ (n + n/8) * sizeof(struct xinpcb);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (req->newptr != 0)
|
|
|
|
return EPERM;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* OK, now we're committed to doing something.
|
|
|
|
*/
|
2003-02-15 02:37:57 +00:00
|
|
|
INP_INFO_RLOCK(&udbinfo);
|
1998-05-15 20:11:40 +00:00
|
|
|
gencnt = udbinfo.ipi_gencnt;
|
|
|
|
n = udbinfo.ipi_count;
|
2003-02-15 02:37:57 +00:00
|
|
|
INP_INFO_RUNLOCK(&udbinfo);
|
1998-05-15 20:11:40 +00:00
|
|
|
|
2004-02-26 00:27:04 +00:00
|
|
|
error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
|
2002-07-28 19:59:31 +00:00
|
|
|
+ n * sizeof(struct xinpcb));
|
2004-02-26 00:27:04 +00:00
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
2002-07-28 19:59:31 +00:00
|
|
|
|
1998-05-15 20:11:40 +00:00
|
|
|
xig.xig_len = sizeof xig;
|
|
|
|
xig.xig_count = n;
|
|
|
|
xig.xig_gen = gencnt;
|
|
|
|
xig.xig_sogen = so_gencnt;
|
|
|
|
error = SYSCTL_OUT(req, &xig, sizeof xig);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
2003-02-19 05:47:46 +00:00
|
|
|
inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
|
1998-05-15 20:11:40 +00:00
|
|
|
if (inp_list == 0)
|
|
|
|
return ENOMEM;
|
2004-08-16 18:32:07 +00:00
|
|
|
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_INFO_RLOCK(&udbinfo);
|
2001-02-04 13:13:25 +00:00
|
|
|
for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n;
|
|
|
|
inp = LIST_NEXT(inp, inp_list)) {
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_LOCK(inp);
|
2002-06-21 22:54:16 +00:00
|
|
|
if (inp->inp_gencnt <= gencnt &&
|
|
|
|
cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)
|
1998-05-15 20:11:40 +00:00
|
|
|
inp_list[i++] = inp;
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_UNLOCK(inp);
|
1998-05-15 20:11:40 +00:00
|
|
|
}
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_INFO_RUNLOCK(&udbinfo);
|
1998-05-15 20:11:40 +00:00
|
|
|
n = i;
|
|
|
|
|
|
|
|
error = 0;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
inp = inp_list[i];
|
|
|
|
if (inp->inp_gencnt <= gencnt) {
|
|
|
|
struct xinpcb xi;
|
2005-05-06 02:50:00 +00:00
|
|
|
bzero(&xi, sizeof(xi));
|
1998-05-15 20:11:40 +00:00
|
|
|
xi.xi_len = sizeof xi;
|
|
|
|
/* XXX should avoid extra copy */
|
|
|
|
bcopy(inp, &xi.xi_inp, sizeof *inp);
|
|
|
|
if (inp->inp_socket)
|
|
|
|
sotoxsocket(inp->inp_socket, &xi.xi_socket);
|
2003-02-15 02:37:57 +00:00
|
|
|
xi.xi_inp.inp_gencnt = inp->inp_gencnt;
|
1998-05-15 20:11:40 +00:00
|
|
|
error = SYSCTL_OUT(req, &xi, sizeof xi);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!error) {
|
|
|
|
/*
|
|
|
|
* Give the user an updated idea of our state.
|
|
|
|
* If the generation differs from what we told
|
|
|
|
* her before, she knows that something happened
|
|
|
|
* while we were processing this request, and it
|
|
|
|
* might be necessary to retry.
|
|
|
|
*/
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_INFO_RLOCK(&udbinfo);
|
1998-05-15 20:11:40 +00:00
|
|
|
xig.xig_gen = udbinfo.ipi_gencnt;
|
|
|
|
xig.xig_sogen = so_gencnt;
|
|
|
|
xig.xig_count = udbinfo.ipi_count;
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_INFO_RUNLOCK(&udbinfo);
|
1998-05-15 20:11:40 +00:00
|
|
|
error = SYSCTL_OUT(req, &xig, sizeof xig);
|
|
|
|
}
|
|
|
|
free(inp_list, M_TEMP);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
|
2002-11-20 19:00:54 +00:00
|
|
|
udp_pcblist, "S,xinpcb", "List of active UDP sockets");
|
1998-05-15 20:11:40 +00:00
|
|
|
|
1999-07-11 18:32:46 +00:00
|
|
|
static int
|
2000-07-04 11:25:35 +00:00
|
|
|
udp_getcred(SYSCTL_HANDLER_ARGS)
|
1999-07-11 18:32:46 +00:00
|
|
|
{
|
2001-02-18 13:30:20 +00:00
|
|
|
struct xucred xuc;
|
1999-07-11 18:32:46 +00:00
|
|
|
struct sockaddr_in addrs[2];
|
|
|
|
struct inpcb *inp;
|
2005-06-01 11:24:00 +00:00
|
|
|
int error;
|
1999-07-11 18:32:46 +00:00
|
|
|
|
2004-07-26 07:24:04 +00:00
|
|
|
error = suser_cred(req->td->td_ucred, SUSER_ALLOWJAIL);
|
1999-07-11 18:32:46 +00:00
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
error = SYSCTL_IN(req, addrs, sizeof(addrs));
|
|
|
|
if (error)
|
|
|
|
return (error);
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_INFO_RLOCK(&udbinfo);
|
1999-07-11 18:32:46 +00:00
|
|
|
inp = in_pcblookup_hash(&udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
|
1999-12-07 17:39:16 +00:00
|
|
|
addrs[0].sin_addr, addrs[0].sin_port, 1, NULL);
|
2002-07-12 09:55:48 +00:00
|
|
|
if (inp == NULL || inp->inp_socket == NULL) {
|
1999-07-11 18:32:46 +00:00
|
|
|
error = ENOENT;
|
|
|
|
goto out;
|
|
|
|
}
|
2002-03-22 19:57:41 +00:00
|
|
|
error = cr_canseesocket(req->td->td_ucred, inp->inp_socket);
|
2001-06-24 12:18:27 +00:00
|
|
|
if (error)
|
|
|
|
goto out;
|
2002-02-27 04:45:37 +00:00
|
|
|
cru2x(inp->inp_socket->so_cred, &xuc);
|
1999-07-11 18:32:46 +00:00
|
|
|
out:
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_INFO_RUNLOCK(&udbinfo);
|
2002-07-11 23:18:43 +00:00
|
|
|
if (error == 0)
|
|
|
|
error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
|
1999-07-11 18:32:46 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2001-06-24 12:18:27 +00:00
|
|
|
SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
|
|
|
|
CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
|
|
|
|
udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
|
1999-07-11 18:32:46 +00:00
|
|
|
|
1995-11-14 20:34:56 +00:00
|
|
|
static int
|
2002-11-20 19:00:54 +00:00
|
|
|
udp_output(inp, m, addr, control, td)
|
|
|
|
register struct inpcb *inp;
|
|
|
|
struct mbuf *m;
|
|
|
|
struct sockaddr *addr;
|
|
|
|
struct mbuf *control;
|
|
|
|
struct thread *td;
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2002-11-20 19:00:54 +00:00
|
|
|
register struct udpiphdr *ui;
|
|
|
|
register int len = m->m_pkthdr.len;
|
2002-10-21 20:10:05 +00:00
|
|
|
struct in_addr faddr, laddr;
|
2002-10-21 20:40:02 +00:00
|
|
|
struct cmsghdr *cm;
|
|
|
|
struct sockaddr_in *sin, src;
|
2002-10-21 20:10:05 +00:00
|
|
|
int error = 0;
|
2003-08-20 14:46:40 +00:00
|
|
|
int ipflags;
|
2002-10-21 20:10:05 +00:00
|
|
|
u_short fport, lport;
|
2004-08-19 01:13:10 +00:00
|
|
|
int unlock_udbinfo;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2004-08-19 01:13:10 +00:00
|
|
|
/*
|
|
|
|
* udp_output() may need to temporarily bind or connect the current
|
|
|
|
* inpcb. As such, we don't know up front what inpcb locks we will
|
|
|
|
* need. Do any work to decide what is needed up front before
|
|
|
|
* acquiring locks.
|
|
|
|
*/
|
1996-10-25 17:57:53 +00:00
|
|
|
if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
|
2002-10-21 20:40:02 +00:00
|
|
|
if (control)
|
|
|
|
m_freem(control);
|
2004-08-19 01:13:10 +00:00
|
|
|
m_freem(m);
|
|
|
|
return EMSGSIZE;
|
1996-10-25 17:57:53 +00:00
|
|
|
}
|
|
|
|
|
2002-10-21 20:40:02 +00:00
|
|
|
src.sin_addr.s_addr = INADDR_ANY;
|
|
|
|
if (control != NULL) {
|
|
|
|
/*
|
|
|
|
* XXX: Currently, we assume all the optional information
|
|
|
|
* is stored in a single mbuf.
|
|
|
|
*/
|
|
|
|
if (control->m_next) {
|
|
|
|
m_freem(control);
|
2004-08-19 01:13:10 +00:00
|
|
|
m_freem(m);
|
|
|
|
return EINVAL;
|
2002-10-21 20:40:02 +00:00
|
|
|
}
|
|
|
|
for (; control->m_len > 0;
|
|
|
|
control->m_data += CMSG_ALIGN(cm->cmsg_len),
|
|
|
|
control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
|
|
|
|
cm = mtod(control, struct cmsghdr *);
|
|
|
|
if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0 ||
|
|
|
|
cm->cmsg_len > control->m_len) {
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (cm->cmsg_level != IPPROTO_IP)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
switch (cm->cmsg_type) {
|
|
|
|
case IP_SENDSRCADDR:
|
|
|
|
if (cm->cmsg_len !=
|
|
|
|
CMSG_LEN(sizeof(struct in_addr))) {
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
bzero(&src, sizeof(src));
|
|
|
|
src.sin_family = AF_INET;
|
|
|
|
src.sin_len = sizeof(src);
|
|
|
|
src.sin_port = inp->inp_lport;
|
|
|
|
src.sin_addr = *(struct in_addr *)CMSG_DATA(cm);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
error = ENOPROTOOPT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
m_freem(control);
|
|
|
|
}
|
2004-08-19 01:13:10 +00:00
|
|
|
if (error) {
|
|
|
|
m_freem(m);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (src.sin_addr.s_addr != INADDR_ANY ||
|
|
|
|
addr != NULL) {
|
|
|
|
INP_INFO_WLOCK(&udbinfo);
|
|
|
|
unlock_udbinfo = 1;
|
|
|
|
} else
|
|
|
|
unlock_udbinfo = 0;
|
|
|
|
INP_LOCK(inp);
|
|
|
|
|
|
|
|
#ifdef MAC
|
|
|
|
mac_create_mbuf_from_inpcb(inp, m);
|
|
|
|
#endif
|
|
|
|
|
2002-10-21 20:10:05 +00:00
|
|
|
laddr = inp->inp_laddr;
|
|
|
|
lport = inp->inp_lport;
|
2002-10-21 20:40:02 +00:00
|
|
|
if (src.sin_addr.s_addr != INADDR_ANY) {
|
|
|
|
if (lport == 0) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto release;
|
|
|
|
}
|
|
|
|
error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
|
2004-03-27 21:05:46 +00:00
|
|
|
&laddr.s_addr, &lport, td->td_ucred);
|
2002-10-21 20:40:02 +00:00
|
|
|
if (error)
|
|
|
|
goto release;
|
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
if (addr) {
|
This Implements the mumbled about "Jail" feature.
This is a seriously beefed up chroot kind of thing. The process
is jailed along the same lines as a chroot does it, but with
additional tough restrictions imposed on what the superuser can do.
For all I know, it is safe to hand over the root bit inside a
prison to the customer living in that prison, this is what
it was developed for in fact: "real virtual servers".
Each prison has an ip number associated with it, which all IP
communications will be coerced to use and each prison has its own
hostname.
Needless to say, you need more RAM this way, but the advantage is
that each customer can run their own particular version of apache
and not stomp on the toes of their neighbors.
It generally does what one would expect, but setting up a jail
still takes a little knowledge.
A few notes:
I have no scripts for setting up a jail, don't ask me for them.
The IP number should be an alias on one of the interfaces.
mount a /proc in each jail, it will make ps more useable.
/proc/<pid>/status tells the hostname of the prison for
jailed processes.
Quotas are only sensible if you have a mountpoint per prison.
There are no privisions for stopping resource-hogging.
Some "#ifdef INET" and similar may be missing (send patches!)
If somebody wants to take it from here and develop it into
more of a "virtual machine" they should be most welcome!
Tools, comments, patches & documentation most welcome.
Have fun...
Sponsored by: http://www.rndassociates.com/
Run for almost a year by: http://www.servetheweb.com/
1999-04-28 11:38:52 +00:00
|
|
|
sin = (struct sockaddr_in *)addr;
|
2005-03-29 01:10:46 +00:00
|
|
|
if (jailed(td->td_ucred))
|
2002-02-27 18:32:23 +00:00
|
|
|
prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr);
|
1994-05-24 10:09:53 +00:00
|
|
|
if (inp->inp_faddr.s_addr != INADDR_ANY) {
|
|
|
|
error = EISCONN;
|
|
|
|
goto release;
|
|
|
|
}
|
2002-10-21 20:10:05 +00:00
|
|
|
error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, &lport,
|
2004-03-27 21:05:46 +00:00
|
|
|
&faddr.s_addr, &fport, NULL, td->td_ucred);
|
2002-10-21 20:10:05 +00:00
|
|
|
if (error)
|
1994-05-24 10:09:53 +00:00
|
|
|
goto release;
|
2002-10-21 20:10:05 +00:00
|
|
|
|
|
|
|
/* Commit the local port if newly assigned. */
|
|
|
|
if (inp->inp_laddr.s_addr == INADDR_ANY &&
|
|
|
|
inp->inp_lport == 0) {
|
2005-02-22 07:50:02 +00:00
|
|
|
/*
|
|
|
|
* Remember addr if jailed, to prevent rebinding.
|
|
|
|
*/
|
|
|
|
if (jailed(td->td_ucred))
|
|
|
|
inp->inp_laddr = laddr;
|
2002-10-21 20:10:05 +00:00
|
|
|
inp->inp_lport = lport;
|
|
|
|
if (in_pcbinshash(inp) != 0) {
|
|
|
|
inp->inp_lport = 0;
|
|
|
|
error = EAGAIN;
|
|
|
|
goto release;
|
|
|
|
}
|
|
|
|
inp->inp_flags |= INP_ANONPORT;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
} else {
|
2002-10-21 20:10:05 +00:00
|
|
|
faddr = inp->inp_faddr;
|
|
|
|
fport = inp->inp_fport;
|
|
|
|
if (faddr.s_addr == INADDR_ANY) {
|
1994-05-24 10:09:53 +00:00
|
|
|
error = ENOTCONN;
|
|
|
|
goto release;
|
|
|
|
}
|
|
|
|
}
|
2004-08-21 16:14:04 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2004-08-21 16:14:04 +00:00
|
|
|
* Calculate data length and get a mbuf for UDP, IP, and possible
|
2004-08-22 01:32:48 +00:00
|
|
|
* link-layer headers. Immediate slide the data pointer back forward
|
|
|
|
* since we won't use that space at this layer.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
2004-08-21 16:14:04 +00:00
|
|
|
M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_DONTWAIT);
|
|
|
|
if (m == NULL) {
|
1994-05-24 10:09:53 +00:00
|
|
|
error = ENOBUFS;
|
2004-06-16 08:50:14 +00:00
|
|
|
goto release;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2004-08-21 16:14:04 +00:00
|
|
|
m->m_data += max_linkhdr;
|
|
|
|
m->m_len -= max_linkhdr;
|
2004-08-22 01:32:48 +00:00
|
|
|
m->m_pkthdr.len -= max_linkhdr;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Fill in mbuf with extended UDP header
|
|
|
|
* and addresses and length put into network format.
|
|
|
|
*/
|
|
|
|
ui = mtod(m, struct udpiphdr *);
|
2000-03-27 19:14:27 +00:00
|
|
|
bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */
|
1994-05-24 10:09:53 +00:00
|
|
|
ui->ui_pr = IPPROTO_UDP;
|
2002-10-21 20:10:05 +00:00
|
|
|
ui->ui_src = laddr;
|
|
|
|
ui->ui_dst = faddr;
|
|
|
|
ui->ui_sport = lport;
|
|
|
|
ui->ui_dport = fport;
|
2000-03-27 19:14:27 +00:00
|
|
|
ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2005-09-26 20:25:16 +00:00
|
|
|
/*
|
|
|
|
* Set the Don't Fragment bit in the IP header.
|
|
|
|
*/
|
|
|
|
if (inp->inp_flags & INP_DONTFRAG) {
|
|
|
|
struct ip *ip;
|
|
|
|
ip = (struct ip *)&ui->ui_i;
|
|
|
|
ip->ip_off |= IP_DF;
|
|
|
|
}
|
|
|
|
|
2004-09-05 02:34:12 +00:00
|
|
|
ipflags = 0;
|
|
|
|
if (inp->inp_socket->so_options & SO_DONTROUTE)
|
|
|
|
ipflags |= IP_ROUTETOIF;
|
|
|
|
if (inp->inp_socket->so_options & SO_BROADCAST)
|
|
|
|
ipflags |= IP_ALLOWBROADCAST;
|
2005-10-12 18:13:25 +00:00
|
|
|
if (inp->inp_vflag & INP_ONESBCAST)
|
2003-08-20 14:46:40 +00:00
|
|
|
ipflags |= IP_SENDONES;
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2000-03-27 19:14:27 +00:00
|
|
|
* Set up checksum and output datagram.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
if (udpcksum) {
|
2005-10-12 18:13:25 +00:00
|
|
|
if (inp->inp_vflag & INP_ONESBCAST)
|
2003-09-03 02:19:29 +00:00
|
|
|
faddr.s_addr = INADDR_BROADCAST;
|
|
|
|
ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
|
2000-03-27 19:14:27 +00:00
|
|
|
htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
|
|
|
|
m->m_pkthdr.csum_flags = CSUM_UDP;
|
|
|
|
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
|
|
|
|
} else {
|
|
|
|
ui->ui_sum = 0;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
|
1997-04-03 05:14:45 +00:00
|
|
|
((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */
|
|
|
|
((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */
|
1994-05-24 10:09:53 +00:00
|
|
|
udpstat.udps_opackets++;
|
1999-12-07 17:39:16 +00:00
|
|
|
|
2004-08-19 01:13:10 +00:00
|
|
|
if (unlock_udbinfo)
|
|
|
|
INP_INFO_WUNLOCK(&udbinfo);
|
2003-11-20 20:07:39 +00:00
|
|
|
error = ip_output(m, inp->inp_options, NULL, ipflags,
|
2002-10-16 01:54:46 +00:00
|
|
|
inp->inp_moptions, inp);
|
2004-08-19 01:13:10 +00:00
|
|
|
INP_UNLOCK(inp);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
|
|
|
|
release:
|
2004-08-19 01:13:10 +00:00
|
|
|
INP_UNLOCK(inp);
|
|
|
|
if (unlock_udbinfo)
|
|
|
|
INP_INFO_WUNLOCK(&udbinfo);
|
1994-05-24 10:09:53 +00:00
|
|
|
m_freem(m);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
1999-11-05 14:41:39 +00:00
|
|
|
u_long udp_sendspace = 9216; /* really max datagram size */
|
2002-11-20 19:00:54 +00:00
|
|
|
/* 40 1K datagrams */
|
2005-12-14 22:27:48 +00:00
|
|
|
SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
|
1999-05-03 23:57:32 +00:00
|
|
|
&udp_sendspace, 0, "Maximum outgoing UDP datagram size");
|
1995-11-14 20:34:56 +00:00
|
|
|
|
1999-12-07 17:39:16 +00:00
|
|
|
u_long udp_recvspace = 40 * (1024 +
|
|
|
|
#ifdef INET6
|
|
|
|
sizeof(struct sockaddr_in6)
|
|
|
|
#else
|
|
|
|
sizeof(struct sockaddr_in)
|
|
|
|
#endif
|
|
|
|
);
|
2005-12-14 22:27:48 +00:00
|
|
|
SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
|
2004-01-27 22:17:39 +00:00
|
|
|
&udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2006-04-01 15:15:05 +00:00
|
|
|
static void
|
1997-02-14 18:15:53 +00:00
|
|
|
udp_abort(struct socket *so)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
1997-02-14 18:15:53 +00:00
|
|
|
struct inpcb *inp;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1997-02-14 18:15:53 +00:00
|
|
|
inp = sotoinpcb(so);
|
Update in_pcb-derived basic socket types following changes to
pru_abort(), pru_detach(), and in_pcbdetach():
- Universally support and enforce the invariant that so_pcb is
never NULL, converting dozens of unnecessary NULL checks into
assertions, and eliminating dozens of unnecessary error handling
cases in protocol code.
- In some cases, eliminate unnecessary pcbinfo locking, as it is no
longer required to ensure so_pcb != NULL. For example, in protocol
shutdown methods, and in raw IP send.
- Abort and detach protocol switch methods no longer return failures,
nor attempt to free sockets, as the socket layer does this.
- Invoke in_pcbfree() after in_pcbdetach() in order to free the
detached in_pcb structure for a socket.
MFC after: 3 months
2006-04-01 16:20:54 +00:00
|
|
|
KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
|
|
|
|
INP_INFO_WLOCK(&udbinfo);
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_LOCK(inp);
|
1997-02-14 18:15:53 +00:00
|
|
|
soisdisconnected(so);
|
|
|
|
in_pcbdetach(inp);
|
Update in_pcb-derived basic socket types following changes to
pru_abort(), pru_detach(), and in_pcbdetach():
- Universally support and enforce the invariant that so_pcb is
never NULL, converting dozens of unnecessary NULL checks into
assertions, and eliminating dozens of unnecessary error handling
cases in protocol code.
- In some cases, eliminate unnecessary pcbinfo locking, as it is no
longer required to ensure so_pcb != NULL. For example, in protocol
shutdown methods, and in raw IP send.
- Abort and detach protocol switch methods no longer return failures,
nor attempt to free sockets, as the socket layer does this.
- Invoke in_pcbfree() after in_pcbdetach() in order to free the
detached in_pcb structure for a socket.
MFC after: 3 months
2006-04-01 16:20:54 +00:00
|
|
|
in_pcbfree(inp);
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_INFO_WUNLOCK(&udbinfo);
|
1997-02-14 18:15:53 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1997-02-14 18:15:53 +00:00
|
|
|
static int
|
2001-09-12 08:38:13 +00:00
|
|
|
udp_attach(struct socket *so, int proto, struct thread *td)
|
1997-02-14 18:15:53 +00:00
|
|
|
{
|
|
|
|
struct inpcb *inp;
|
2005-06-01 11:24:00 +00:00
|
|
|
int error;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1997-02-14 18:15:53 +00:00
|
|
|
inp = sotoinpcb(so);
|
Update in_pcb-derived basic socket types following changes to
pru_abort(), pru_detach(), and in_pcbdetach():
- Universally support and enforce the invariant that so_pcb is
never NULL, converting dozens of unnecessary NULL checks into
assertions, and eliminating dozens of unnecessary error handling
cases in protocol code.
- In some cases, eliminate unnecessary pcbinfo locking, as it is no
longer required to ensure so_pcb != NULL. For example, in protocol
shutdown methods, and in raw IP send.
- Abort and detach protocol switch methods no longer return failures,
nor attempt to free sockets, as the socket layer does this.
- Invoke in_pcbfree() after in_pcbdetach() in order to free the
detached in_pcb structure for a socket.
MFC after: 3 months
2006-04-01 16:20:54 +00:00
|
|
|
KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
|
|
|
|
INP_INFO_WLOCK(&udbinfo);
|
1999-12-07 17:39:16 +00:00
|
|
|
error = soreserve(so, udp_sendspace, udp_recvspace);
|
2002-06-10 20:05:46 +00:00
|
|
|
if (error) {
|
|
|
|
INP_INFO_WUNLOCK(&udbinfo);
|
1999-12-07 17:39:16 +00:00
|
|
|
return error;
|
2002-06-10 20:05:46 +00:00
|
|
|
}
|
2004-03-27 20:41:32 +00:00
|
|
|
error = in_pcballoc(so, &udbinfo, "udpinp");
|
2003-08-19 17:11:46 +00:00
|
|
|
if (error) {
|
|
|
|
INP_INFO_WUNLOCK(&udbinfo);
|
1997-02-14 18:15:53 +00:00
|
|
|
return error;
|
2003-08-19 17:11:46 +00:00
|
|
|
}
|
1999-12-07 17:39:16 +00:00
|
|
|
|
|
|
|
inp = (struct inpcb *)so->so_pcb;
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_LOCK(inp);
|
|
|
|
INP_INFO_WUNLOCK(&udbinfo);
|
1999-12-07 17:39:16 +00:00
|
|
|
inp->inp_vflag |= INP_IPV4;
|
|
|
|
inp->inp_ip_ttl = ip_defttl;
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_UNLOCK(inp);
|
1997-02-14 18:15:53 +00:00
|
|
|
return 0;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1997-02-14 18:15:53 +00:00
|
|
|
static int
|
2001-09-12 08:38:13 +00:00
|
|
|
udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
|
1997-02-14 18:15:53 +00:00
|
|
|
{
|
|
|
|
struct inpcb *inp;
|
2005-06-01 11:24:00 +00:00
|
|
|
int error;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1997-02-14 18:15:53 +00:00
|
|
|
inp = sotoinpcb(so);
|
Update in_pcb-derived basic socket types following changes to
pru_abort(), pru_detach(), and in_pcbdetach():
- Universally support and enforce the invariant that so_pcb is
never NULL, converting dozens of unnecessary NULL checks into
assertions, and eliminating dozens of unnecessary error handling
cases in protocol code.
- In some cases, eliminate unnecessary pcbinfo locking, as it is no
longer required to ensure so_pcb != NULL. For example, in protocol
shutdown methods, and in raw IP send.
- Abort and detach protocol switch methods no longer return failures,
nor attempt to free sockets, as the socket layer does this.
- Invoke in_pcbfree() after in_pcbdetach() in order to free the
detached in_pcb structure for a socket.
MFC after: 3 months
2006-04-01 16:20:54 +00:00
|
|
|
KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
|
|
|
|
INP_INFO_WLOCK(&udbinfo);
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_LOCK(inp);
|
2004-03-27 21:05:46 +00:00
|
|
|
error = in_pcbbind(inp, nam, td->td_ucred);
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_UNLOCK(inp);
|
|
|
|
INP_INFO_WUNLOCK(&udbinfo);
|
1997-02-14 18:15:53 +00:00
|
|
|
return error;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1997-02-14 18:15:53 +00:00
|
|
|
static int
|
2001-09-12 08:38:13 +00:00
|
|
|
udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
|
1997-02-14 18:15:53 +00:00
|
|
|
{
|
|
|
|
struct inpcb *inp;
|
2005-06-01 11:24:00 +00:00
|
|
|
int error;
|
This Implements the mumbled about "Jail" feature.
This is a seriously beefed up chroot kind of thing. The process
is jailed along the same lines as a chroot does it, but with
additional tough restrictions imposed on what the superuser can do.
For all I know, it is safe to hand over the root bit inside a
prison to the customer living in that prison, this is what
it was developed for in fact: "real virtual servers".
Each prison has an ip number associated with it, which all IP
communications will be coerced to use and each prison has its own
hostname.
Needless to say, you need more RAM this way, but the advantage is
that each customer can run their own particular version of apache
and not stomp on the toes of their neighbors.
It generally does what one would expect, but setting up a jail
still takes a little knowledge.
A few notes:
I have no scripts for setting up a jail, don't ask me for them.
The IP number should be an alias on one of the interfaces.
mount a /proc in each jail, it will make ps more useable.
/proc/<pid>/status tells the hostname of the prison for
jailed processes.
Quotas are only sensible if you have a mountpoint per prison.
There are no privisions for stopping resource-hogging.
Some "#ifdef INET" and similar may be missing (send patches!)
If somebody wants to take it from here and develop it into
more of a "virtual machine" they should be most welcome!
Tools, comments, patches & documentation most welcome.
Have fun...
Sponsored by: http://www.rndassociates.com/
Run for almost a year by: http://www.servetheweb.com/
1999-04-28 11:38:52 +00:00
|
|
|
struct sockaddr_in *sin;
|
1997-02-14 18:15:53 +00:00
|
|
|
|
|
|
|
inp = sotoinpcb(so);
|
Update in_pcb-derived basic socket types following changes to
pru_abort(), pru_detach(), and in_pcbdetach():
- Universally support and enforce the invariant that so_pcb is
never NULL, converting dozens of unnecessary NULL checks into
assertions, and eliminating dozens of unnecessary error handling
cases in protocol code.
- In some cases, eliminate unnecessary pcbinfo locking, as it is no
longer required to ensure so_pcb != NULL. For example, in protocol
shutdown methods, and in raw IP send.
- Abort and detach protocol switch methods no longer return failures,
nor attempt to free sockets, as the socket layer does this.
- Invoke in_pcbfree() after in_pcbdetach() in order to free the
detached in_pcb structure for a socket.
MFC after: 3 months
2006-04-01 16:20:54 +00:00
|
|
|
KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
|
|
|
|
INP_INFO_WLOCK(&udbinfo);
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_LOCK(inp);
|
|
|
|
if (inp->inp_faddr.s_addr != INADDR_ANY) {
|
|
|
|
INP_UNLOCK(inp);
|
|
|
|
INP_INFO_WUNLOCK(&udbinfo);
|
1997-02-14 18:15:53 +00:00
|
|
|
return EISCONN;
|
2002-06-10 20:05:46 +00:00
|
|
|
}
|
2000-09-17 13:34:18 +00:00
|
|
|
sin = (struct sockaddr_in *)nam;
|
2005-03-29 01:10:46 +00:00
|
|
|
if (jailed(td->td_ucred))
|
2002-02-27 18:32:23 +00:00
|
|
|
prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr);
|
2004-03-27 21:05:46 +00:00
|
|
|
error = in_pcbconnect(inp, nam, td->td_ucred);
|
2002-05-31 11:52:35 +00:00
|
|
|
if (error == 0)
|
1997-02-14 18:15:53 +00:00
|
|
|
soisconnected(so);
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_UNLOCK(inp);
|
|
|
|
INP_INFO_WUNLOCK(&udbinfo);
|
1997-02-14 18:15:53 +00:00
|
|
|
return error;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
|
Chance protocol switch method pru_detach() so that it returns void
rather than an error. Detaches do not "fail", they other occur or
the protocol flags SS_PROTOREF to take ownership of the socket.
soclose() no longer looks at so_pcb to see if it's NULL, relying
entirely on the protocol to decide whether it's time to free the
socket or not using SS_PROTOREF. so_pcb is now entirely owned and
managed by the protocol code. Likewise, no longer test so_pcb in
other socket functions, such as soreceive(), which have no business
digging into protocol internals.
Protocol detach routines no longer try to free the socket on detach,
this is performed in the socket code if the protocol permits it.
In rts_detach(), no longer test for rp != NULL in detach, and
likewise in other protocols that don't permit a NULL so_pcb, reduce
the incidence of testing for it during detach.
netinet and netinet6 are not fully updated to this change, which
will be in an upcoming commit. In their current state they may leak
memory or panic.
MFC after: 3 months
2006-04-01 15:42:02 +00:00
|
|
|
static void
|
1997-02-14 18:15:53 +00:00
|
|
|
udp_detach(struct socket *so)
|
|
|
|
{
|
|
|
|
struct inpcb *inp;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1997-02-14 18:15:53 +00:00
|
|
|
inp = sotoinpcb(so);
|
Update in_pcb-derived basic socket types following changes to
pru_abort(), pru_detach(), and in_pcbdetach():
- Universally support and enforce the invariant that so_pcb is
never NULL, converting dozens of unnecessary NULL checks into
assertions, and eliminating dozens of unnecessary error handling
cases in protocol code.
- In some cases, eliminate unnecessary pcbinfo locking, as it is no
longer required to ensure so_pcb != NULL. For example, in protocol
shutdown methods, and in raw IP send.
- Abort and detach protocol switch methods no longer return failures,
nor attempt to free sockets, as the socket layer does this.
- Invoke in_pcbfree() after in_pcbdetach() in order to free the
detached in_pcb structure for a socket.
MFC after: 3 months
2006-04-01 16:20:54 +00:00
|
|
|
KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
|
|
|
|
INP_INFO_WLOCK(&udbinfo);
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_LOCK(inp);
|
1997-02-14 18:15:53 +00:00
|
|
|
in_pcbdetach(inp);
|
Update in_pcb-derived basic socket types following changes to
pru_abort(), pru_detach(), and in_pcbdetach():
- Universally support and enforce the invariant that so_pcb is
never NULL, converting dozens of unnecessary NULL checks into
assertions, and eliminating dozens of unnecessary error handling
cases in protocol code.
- In some cases, eliminate unnecessary pcbinfo locking, as it is no
longer required to ensure so_pcb != NULL. For example, in protocol
shutdown methods, and in raw IP send.
- Abort and detach protocol switch methods no longer return failures,
nor attempt to free sockets, as the socket layer does this.
- Invoke in_pcbfree() after in_pcbdetach() in order to free the
detached in_pcb structure for a socket.
MFC after: 3 months
2006-04-01 16:20:54 +00:00
|
|
|
in_pcbfree(inp);
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_INFO_WUNLOCK(&udbinfo);
|
1997-02-14 18:15:53 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1997-02-14 18:15:53 +00:00
|
|
|
static int
|
|
|
|
udp_disconnect(struct socket *so)
|
|
|
|
{
|
|
|
|
struct inpcb *inp;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1997-02-14 18:15:53 +00:00
|
|
|
inp = sotoinpcb(so);
|
Update in_pcb-derived basic socket types following changes to
pru_abort(), pru_detach(), and in_pcbdetach():
- Universally support and enforce the invariant that so_pcb is
never NULL, converting dozens of unnecessary NULL checks into
assertions, and eliminating dozens of unnecessary error handling
cases in protocol code.
- In some cases, eliminate unnecessary pcbinfo locking, as it is no
longer required to ensure so_pcb != NULL. For example, in protocol
shutdown methods, and in raw IP send.
- Abort and detach protocol switch methods no longer return failures,
nor attempt to free sockets, as the socket layer does this.
- Invoke in_pcbfree() after in_pcbdetach() in order to free the
detached in_pcb structure for a socket.
MFC after: 3 months
2006-04-01 16:20:54 +00:00
|
|
|
KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
|
|
|
|
INP_INFO_WLOCK(&udbinfo);
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_LOCK(inp);
|
|
|
|
if (inp->inp_faddr.s_addr == INADDR_ANY) {
|
|
|
|
INP_INFO_WUNLOCK(&udbinfo);
|
|
|
|
INP_UNLOCK(inp);
|
1997-02-14 18:15:53 +00:00
|
|
|
return ENOTCONN;
|
2002-06-10 20:05:46 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1997-02-14 18:15:53 +00:00
|
|
|
in_pcbdisconnect(inp);
|
|
|
|
inp->inp_laddr.s_addr = INADDR_ANY;
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_UNLOCK(inp);
|
|
|
|
INP_INFO_WUNLOCK(&udbinfo);
|
1997-02-14 18:15:53 +00:00
|
|
|
so->so_state &= ~SS_ISCONNECTED; /* XXX */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
1997-08-16 19:16:27 +00:00
|
|
|
udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
|
2001-09-12 08:38:13 +00:00
|
|
|
struct mbuf *control, struct thread *td)
|
1997-02-14 18:15:53 +00:00
|
|
|
{
|
|
|
|
struct inpcb *inp;
|
|
|
|
|
|
|
|
inp = sotoinpcb(so);
|
Update in_pcb-derived basic socket types following changes to
pru_abort(), pru_detach(), and in_pcbdetach():
- Universally support and enforce the invariant that so_pcb is
never NULL, converting dozens of unnecessary NULL checks into
assertions, and eliminating dozens of unnecessary error handling
cases in protocol code.
- In some cases, eliminate unnecessary pcbinfo locking, as it is no
longer required to ensure so_pcb != NULL. For example, in protocol
shutdown methods, and in raw IP send.
- Abort and detach protocol switch methods no longer return failures,
nor attempt to free sockets, as the socket layer does this.
- Invoke in_pcbfree() after in_pcbdetach() in order to free the
detached in_pcb structure for a socket.
MFC after: 3 months
2006-04-01 16:20:54 +00:00
|
|
|
KASSERT(inp != NULL, ("udp_send: inp == NULL"));
|
2004-08-19 01:13:10 +00:00
|
|
|
return udp_output(inp, m, addr, control, td);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
1999-11-05 14:41:39 +00:00
|
|
|
int
|
1997-02-14 18:15:53 +00:00
|
|
|
udp_shutdown(struct socket *so)
|
|
|
|
{
|
1994-05-24 10:09:53 +00:00
|
|
|
struct inpcb *inp;
|
1997-02-14 18:15:53 +00:00
|
|
|
|
|
|
|
inp = sotoinpcb(so);
|
Update in_pcb-derived basic socket types following changes to
pru_abort(), pru_detach(), and in_pcbdetach():
- Universally support and enforce the invariant that so_pcb is
never NULL, converting dozens of unnecessary NULL checks into
assertions, and eliminating dozens of unnecessary error handling
cases in protocol code.
- In some cases, eliminate unnecessary pcbinfo locking, as it is no
longer required to ensure so_pcb != NULL. For example, in protocol
shutdown methods, and in raw IP send.
- Abort and detach protocol switch methods no longer return failures,
nor attempt to free sockets, as the socket layer does this.
- Invoke in_pcbfree() after in_pcbdetach() in order to free the
detached in_pcb structure for a socket.
MFC after: 3 months
2006-04-01 16:20:54 +00:00
|
|
|
KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_LOCK(inp);
|
1997-02-14 18:15:53 +00:00
|
|
|
socantsendmore(so);
|
2002-06-10 20:05:46 +00:00
|
|
|
INP_UNLOCK(inp);
|
1997-02-14 18:15:53 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2002-06-10 20:05:46 +00:00
|
|
|
/*
|
2004-08-16 18:32:07 +00:00
|
|
|
* This is the wrapper function for in_setsockaddr. We just pass down
|
|
|
|
* the pcbinfo for in_setsockaddr to lock. We don't want to do the locking
|
2002-06-10 20:05:46 +00:00
|
|
|
* here because in_setsockaddr will call malloc and might block.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
udp_sockaddr(struct socket *so, struct sockaddr **nam)
|
|
|
|
{
|
|
|
|
return (in_setsockaddr(so, nam, &udbinfo));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is the wrapper function for in_setpeeraddr. We just pass down
|
|
|
|
* the pcbinfo for in_setpeeraddr to lock.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
udp_peeraddr(struct socket *so, struct sockaddr **nam)
|
|
|
|
{
|
|
|
|
return (in_setpeeraddr(so, nam, &udbinfo));
|
|
|
|
}
|
|
|
|
|
1997-02-14 18:15:53 +00:00
|
|
|
struct pr_usrreqs udp_usrreqs = {
|
2004-11-08 14:44:54 +00:00
|
|
|
.pru_abort = udp_abort,
|
|
|
|
.pru_attach = udp_attach,
|
|
|
|
.pru_bind = udp_bind,
|
|
|
|
.pru_connect = udp_connect,
|
|
|
|
.pru_control = in_control,
|
|
|
|
.pru_detach = udp_detach,
|
|
|
|
.pru_disconnect = udp_disconnect,
|
|
|
|
.pru_peeraddr = udp_peeraddr,
|
|
|
|
.pru_send = udp_send,
|
|
|
|
.pru_shutdown = udp_shutdown,
|
|
|
|
.pru_sockaddr = udp_sockaddr,
|
|
|
|
.pru_sosetlabel = in_pcbsosetlabel
|
1997-02-14 18:15:53 +00:00
|
|
|
};
|