diff --git a/contrib/pf/authpf/authpf.8 b/contrib/pf/authpf/authpf.8 index 5d63e83c012f..ee0dcaa423b3 100644 --- a/contrib/pf/authpf/authpf.8 +++ b/contrib/pf/authpf/authpf.8 @@ -1,28 +1,18 @@ -.\" $OpenBSD: authpf.8,v 1.38 2005/01/04 09:57:04 jmc Exp $ +.\" $OpenBSD: authpf.8,v 1.43 2007/02/24 17:21:04 beck Exp $ .\" -.\" Copyright (c) 2002 Bob Beck (beck@openbsd.org>. All rights reserved. +.\" Copyright (c) 1998-2007 Bob Beck (beck@openbsd.org>. All rights reserved. .\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 3. The name of the author may not be used to endorse or promote products -.\" derived from this software without specific prior written permission. +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. .\" -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" .Dd January 10, 2002 .Dt AUTHPF 8 @@ -225,8 +215,11 @@ it becomes unresponsive, or if arp or address spoofing is used to hijack the session. Note that TCP keepalives are not sufficient for this, since they are not secure. -Also note that +Also note that the various SSH tunnelling mechanisms, +such as .Ar AllowTcpForwarding +and +.Ar PermitTunnel , should be disabled for .Nm users to prevent them from circumventing restrictions imposed by the @@ -424,8 +417,7 @@ TCP connections. external_if = "xl0" internal_if = "fxp0" -pass in log quick on $internal_if proto tcp from $user_ip to any \e - keep state +pass in log quick on $internal_if proto tcp from $user_ip to any pass in quick on $internal_if from $user_ip to any .Ed .Pp @@ -440,16 +432,15 @@ ipsec_gw="10.2.3.4" # rdr ftp for proxying by ftp-proxy(8) rdr on $internal_if proto tcp from $user_ip to any port 21 \e - -> 127.0.0.1 port 8081 + -> 127.0.0.1 port 8021 # allow out ftp, ssh, www and https only, and allow user to negotiate # ipsec with the ipsec server. pass in log quick on $internal_if proto tcp from $user_ip to any \e - port { 21, 22, 80, 443 } flags S/SA + port { 21, 22, 80, 443 } pass in quick on $internal_if proto tcp from $user_ip to any \e port { 21, 22, 80, 443 } -pass in quick proto udp from $user_ip to $ipsec_gw port = isakmp \e - keep state +pass in quick proto udp from $user_ip to $ipsec_gw port = isakmp pass in quick proto esp from $user_ip to $ipsec_gw .Ed .Pp @@ -464,7 +455,7 @@ int_if = "fxp0" # nat and tag connections... nat on $ext_if from $user_ip to any tag $user_ip -> $ext_addr pass in quick on $int_if from $user_ip to any -pass out log quick on $ext_if tagged $user_ip keep state +pass out log quick on $ext_if tagged $user_ip .Ed .Pp With the above rules added by @@ -490,7 +481,7 @@ lines will give SMTP and IMAP access to logged in users: .Bd -literal table persist pass in on $ext_if proto tcp from \e - to port { smtp imap } keep state + to port { smtp imap } .Ed .Pp It is also possible to use the "authpf_users" @@ -516,6 +507,7 @@ rdr-anchor "authpf/*" from .Sh SEE ALSO .Xr pf 4 , .Xr pf.conf 5 , +.Xr securelevel 7 , .Xr ftp-proxy 8 .Sh HISTORY The diff --git a/contrib/pf/authpf/authpf.c b/contrib/pf/authpf/authpf.c index 1ae6aa452d5e..68adcd258eef 100644 --- a/contrib/pf/authpf/authpf.c +++ b/contrib/pf/authpf/authpf.c @@ -1,28 +1,19 @@ -/* $OpenBSD: authpf.c,v 1.89 2005/02/10 04:24:15 joel Exp $ */ +/* $OpenBSD: authpf.c,v 1.104 2007/02/24 17:35:08 beck Exp $ */ /* - * Copyright (C) 1998 - 2002 Bob Beck (beck@openbsd.org). + * Copyright (C) 1998 - 2007 Bob Beck (beck@openbsd.org). * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include @@ -50,15 +41,13 @@ #include "pathnames.h" -extern int symset(const char *, const char *, int); - static int read_config(FILE *); static void print_message(char *); static int allowed_luser(char *); static int check_luser(char *, char *); static int remove_stale_rulesets(void); static int change_filter(int, const char *, const char *); -static int change_table(int, const char *, const char *); +static int change_table(int, const char *); static void authpf_kill_states(void); int dev; /* pf device */ @@ -67,7 +56,6 @@ char rulesetname[MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 2]; char tablename[PF_TABLE_NAME_SIZE] = "authpf_users"; FILE *pidfp; -char *infile; /* file name printed by yyerror() in parse.y */ char luser[MAXLOGNAME]; /* username */ char ipsrc[256]; /* ip as a string */ char pidfile[MAXPATHLEN]; /* we save pid in this file. */ @@ -92,11 +80,16 @@ main(int argc, char *argv[]) struct in6_addr ina; struct passwd *pw; char *cp; + gid_t gid; uid_t uid; char *shell; login_cap_t *lc; config = fopen(PATH_CONFFILE, "r"); + if (config == NULL) { + syslog(LOG_ERR, "can not open %s (%m)", PATH_CONFFILE); + exit(1); + } if ((cp = getenv("SSH_TTY")) == NULL) { syslog(LOG_ERR, "non-interactive session connection for authpf"); @@ -133,7 +126,6 @@ main(int argc, char *argv[]) uid = getuid(); pw = getpwuid(uid); - endpwent(); if (pw == NULL) { syslog(LOG_ERR, "cannot find user for uid %u", uid); goto die; @@ -246,6 +238,8 @@ main(int argc, char *argv[]) if (++lockcnt > 10) { syslog(LOG_ERR, "cannot kill previous authpf (pid %d)", otherpid); + fclose(pidfp); + pidfp = NULL; goto dogdeath; } sleep(1); @@ -255,12 +249,22 @@ main(int argc, char *argv[]) * it's lock, giving us a chance to get it now */ fclose(pidfp); + pidfp = NULL; } while (1); + + /* whack the group list */ + gid = getegid(); + if (setgroups(1, &gid) == -1) { + syslog(LOG_INFO, "setgroups: %s", strerror(errno)); + do_death(0); + } /* revoke privs */ - seteuid(getuid()); - setuid(getuid()); - + uid = getuid(); + if (setresuid(uid, uid, uid) == -1) { + syslog(LOG_INFO, "setresuid: %s", strerror(errno)); + do_death(0); + } openlog("authpf", LOG_PID | LOG_NDELAY, LOG_DAEMON); if (!check_luser(PATH_BAN_DIR, luser) || !allowed_luser(luser)) { @@ -268,8 +272,8 @@ main(int argc, char *argv[]) do_death(0); } - if (config == NULL || read_config(config)) { - syslog(LOG_INFO, "bad or nonexistent %s", PATH_CONFFILE); + if (read_config(config)) { + syslog(LOG_ERR, "invalid config file %s", PATH_CONFFILE); do_death(0); } @@ -288,7 +292,7 @@ main(int argc, char *argv[]) printf("Unable to modify filters\r\n"); do_death(0); } - if (change_table(1, luser, ipsrc) == -1) { + if (change_table(1, ipsrc) == -1) { printf("Unable to modify table\r\n"); change_filter(0, luser, ipsrc); do_death(0); @@ -299,7 +303,7 @@ main(int argc, char *argv[]) signal(SIGALRM, need_death); signal(SIGPIPE, need_death); signal(SIGHUP, need_death); - signal(SIGSTOP, need_death); + signal(SIGQUIT, need_death); signal(SIGTSTP, need_death); while (1) { printf("\r\nHello %s. ", luser); @@ -547,9 +551,11 @@ check_luser(char *luserdir, char *luser) while (fputs(tmp, stdout) != EOF && !feof(f)) { if (fgets(tmp, sizeof(tmp), f) == NULL) { fflush(stdout); + fclose(f); return (0); } } + fclose(f); } fflush(stdout); return (0); @@ -633,6 +639,7 @@ change_filter(int add, const char *luser, const char *ipsrc) char *fdpath = NULL, *userstr = NULL, *ipstr = NULL; char *rsn = NULL, *fn = NULL; pid_t pid; + gid_t gid; int s; if (luser == NULL || !luser[0] || ipsrc == NULL || !ipsrc[0]) { @@ -672,8 +679,14 @@ change_filter(int add, const char *luser, const char *ipsrc) switch (pid = fork()) { case -1: - err(1, "fork failed"); + syslog(LOG_ERR, "fork failed"); + goto error; case 0: + /* revoke group privs before exec */ + gid = getgid(); + if (setregid(gid, gid) == -1) { + err(1, "setregid"); + } execvp(PATH_PFCTL, pargv); warn("exec of %s failed", PATH_PFCTL); _exit(1); @@ -682,10 +695,8 @@ change_filter(int add, const char *luser, const char *ipsrc) /* parent */ waitpid(pid, &s, 0); if (s != 0) { - if (WIFEXITED(s)) { - syslog(LOG_ERR, "pfctl exited abnormally"); - goto error; - } + syslog(LOG_ERR, "pfctl exited abnormally"); + goto error; } if (add) { @@ -701,16 +712,10 @@ change_filter(int add, const char *luser, const char *ipsrc) syslog(LOG_ERR, "malloc failed"); error: free(fdpath); - fdpath = NULL; free(rsn); - rsn = NULL; free(userstr); - userstr = NULL; free(ipstr); - ipstr = NULL; free(fn); - fn = NULL; - infile = NULL; return (-1); } @@ -718,13 +723,14 @@ change_filter(int add, const char *luser, const char *ipsrc) * Add/remove this IP from the "authpf_users" table. */ static int -change_table(int add, const char *luser, const char *ipsrc) +change_table(int add, const char *ipsrc) { struct pfioc_table io; struct pfr_addr addr; bzero(&io, sizeof(io)); - strlcpy(io.pfrio_table.pfrt_name, tablename, sizeof(io.pfrio_table)); + strlcpy(io.pfrio_table.pfrt_name, tablename, + sizeof(io.pfrio_table.pfrt_name)); io.pfrio_buffer = &addr; io.pfrio_esize = sizeof(addr); io.pfrio_size = 1; @@ -813,13 +819,11 @@ do_death(int active) if (active) { change_filter(0, luser, ipsrc); - change_table(0, luser, ipsrc); + change_table(0, ipsrc); authpf_kill_states(); remove_stale_rulesets(); } - if (pidfp) - ftruncate(fileno(pidfp), 0); - if (pidfile[0]) + if (pidfile[0] && (pidfp != NULL)) if (unlink(pidfile) == -1) syslog(LOG_ERR, "cannot unlink %s (%m)", pidfile); exit(ret); diff --git a/contrib/pf/ftp-proxy/filter.c b/contrib/pf/ftp-proxy/filter.c new file mode 100644 index 000000000000..f86429db51d3 --- /dev/null +++ b/contrib/pf/ftp-proxy/filter.c @@ -0,0 +1,387 @@ +/* $OpenBSD: filter.c,v 1.5 2006/12/01 07:31:21 camield Exp $ */ + +/* + * Copyright (c) 2004, 2005 Camiel Dobbelaar, + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "filter.h" + +/* From netinet/in.h, but only _KERNEL_ gets them. */ +#define satosin(sa) ((struct sockaddr_in *)(sa)) +#define satosin6(sa) ((struct sockaddr_in6 *)(sa)) + +enum { TRANS_FILTER = 0, TRANS_NAT, TRANS_RDR, TRANS_SIZE }; + +int prepare_rule(u_int32_t, int, struct sockaddr *, struct sockaddr *, + u_int16_t); +int server_lookup4(struct sockaddr_in *, struct sockaddr_in *, + struct sockaddr_in *); +int server_lookup6(struct sockaddr_in6 *, struct sockaddr_in6 *, + struct sockaddr_in6 *); + +static struct pfioc_pooladdr pfp; +static struct pfioc_rule pfr; +static struct pfioc_trans pft; +static struct pfioc_trans_e pfte[TRANS_SIZE]; +static int dev, rule_log; +static char *qname; + +int +add_filter(u_int32_t id, u_int8_t dir, struct sockaddr *src, + struct sockaddr *dst, u_int16_t d_port) +{ + if (!src || !dst || !d_port) { + errno = EINVAL; + return (-1); + } + + if (prepare_rule(id, PF_RULESET_FILTER, src, dst, d_port) == -1) + return (-1); + + pfr.rule.direction = dir; + if (ioctl(dev, DIOCADDRULE, &pfr) == -1) + return (-1); + + return (0); +} + +int +add_nat(u_int32_t id, struct sockaddr *src, struct sockaddr *dst, + u_int16_t d_port, struct sockaddr *nat, u_int16_t nat_range_low, + u_int16_t nat_range_high) +{ + if (!src || !dst || !d_port || !nat || !nat_range_low || + (src->sa_family != nat->sa_family)) { + errno = EINVAL; + return (-1); + } + + if (prepare_rule(id, PF_RULESET_NAT, src, dst, d_port) == -1) + return (-1); + + if (nat->sa_family == AF_INET) { + memcpy(&pfp.addr.addr.v.a.addr.v4, + &satosin(nat)->sin_addr.s_addr, 4); + memset(&pfp.addr.addr.v.a.mask.addr8, 255, 4); + } else { + memcpy(&pfp.addr.addr.v.a.addr.v6, + &satosin6(nat)->sin6_addr.s6_addr, 16); + memset(&pfp.addr.addr.v.a.mask.addr8, 255, 16); + } + if (ioctl(dev, DIOCADDADDR, &pfp) == -1) + return (-1); + + pfr.rule.rpool.proxy_port[0] = nat_range_low; + pfr.rule.rpool.proxy_port[1] = nat_range_high; + if (ioctl(dev, DIOCADDRULE, &pfr) == -1) + return (-1); + + return (0); +} + +int +add_rdr(u_int32_t id, struct sockaddr *src, struct sockaddr *dst, + u_int16_t d_port, struct sockaddr *rdr, u_int16_t rdr_port) +{ + if (!src || !dst || !d_port || !rdr || !rdr_port || + (src->sa_family != rdr->sa_family)) { + errno = EINVAL; + return (-1); + } + + if (prepare_rule(id, PF_RULESET_RDR, src, dst, d_port) == -1) + return (-1); + + if (rdr->sa_family == AF_INET) { + memcpy(&pfp.addr.addr.v.a.addr.v4, + &satosin(rdr)->sin_addr.s_addr, 4); + memset(&pfp.addr.addr.v.a.mask.addr8, 255, 4); + } else { + memcpy(&pfp.addr.addr.v.a.addr.v6, + &satosin6(rdr)->sin6_addr.s6_addr, 16); + memset(&pfp.addr.addr.v.a.mask.addr8, 255, 16); + } + if (ioctl(dev, DIOCADDADDR, &pfp) == -1) + return (-1); + + pfr.rule.rpool.proxy_port[0] = rdr_port; + if (ioctl(dev, DIOCADDRULE, &pfr) == -1) + return (-1); + + return (0); +} + +int +do_commit(void) +{ + if (ioctl(dev, DIOCXCOMMIT, &pft) == -1) + return (-1); + + return (0); +} + +int +do_rollback(void) +{ + if (ioctl(dev, DIOCXROLLBACK, &pft) == -1) + return (-1); + + return (0); +} + +void +init_filter(char *opt_qname, int opt_verbose) +{ + struct pf_status status; + + qname = opt_qname; + + if (opt_verbose == 1) + rule_log = PF_LOG; + else if (opt_verbose == 2) + rule_log = PF_LOG_ALL; + + dev = open("/dev/pf", O_RDWR); + if (dev == -1) + err(1, "/dev/pf"); + if (ioctl(dev, DIOCGETSTATUS, &status) == -1) + err(1, "DIOCGETSTATUS"); + if (!status.running) + errx(1, "pf is disabled"); +} + +int +prepare_commit(u_int32_t id) +{ + char an[PF_ANCHOR_NAME_SIZE]; + int i; + + memset(&pft, 0, sizeof pft); + pft.size = TRANS_SIZE; + pft.esize = sizeof pfte[0]; + pft.array = pfte; + + snprintf(an, PF_ANCHOR_NAME_SIZE, "%s/%d.%d", FTP_PROXY_ANCHOR, + getpid(), id); + for (i = 0; i < TRANS_SIZE; i++) { + memset(&pfte[i], 0, sizeof pfte[0]); + strlcpy(pfte[i].anchor, an, PF_ANCHOR_NAME_SIZE); + switch (i) { + case TRANS_FILTER: + pfte[i].rs_num = PF_RULESET_FILTER; + break; + case TRANS_NAT: + pfte[i].rs_num = PF_RULESET_NAT; + break; + case TRANS_RDR: + pfte[i].rs_num = PF_RULESET_RDR; + break; + default: + errno = EINVAL; + return (-1); + } + } + + if (ioctl(dev, DIOCXBEGIN, &pft) == -1) + return (-1); + + return (0); +} + +int +prepare_rule(u_int32_t id, int rs_num, struct sockaddr *src, + struct sockaddr *dst, u_int16_t d_port) +{ + char an[PF_ANCHOR_NAME_SIZE]; + + if ((src->sa_family != AF_INET && src->sa_family != AF_INET6) || + (src->sa_family != dst->sa_family)) { + errno = EPROTONOSUPPORT; + return (-1); + } + + memset(&pfp, 0, sizeof pfp); + memset(&pfr, 0, sizeof pfr); + snprintf(an, PF_ANCHOR_NAME_SIZE, "%s/%d.%d", FTP_PROXY_ANCHOR, + getpid(), id); + strlcpy(pfp.anchor, an, PF_ANCHOR_NAME_SIZE); + strlcpy(pfr.anchor, an, PF_ANCHOR_NAME_SIZE); + + switch (rs_num) { + case PF_RULESET_FILTER: + pfr.ticket = pfte[TRANS_FILTER].ticket; + break; + case PF_RULESET_NAT: + pfr.ticket = pfte[TRANS_NAT].ticket; + break; + case PF_RULESET_RDR: + pfr.ticket = pfte[TRANS_RDR].ticket; + break; + default: + errno = EINVAL; + return (-1); + } + if (ioctl(dev, DIOCBEGINADDRS, &pfp) == -1) + return (-1); + pfr.pool_ticket = pfp.ticket; + + /* Generic for all rule types. */ + pfr.rule.af = src->sa_family; + pfr.rule.proto = IPPROTO_TCP; + pfr.rule.src.addr.type = PF_ADDR_ADDRMASK; + pfr.rule.dst.addr.type = PF_ADDR_ADDRMASK; + if (src->sa_family == AF_INET) { + memcpy(&pfr.rule.src.addr.v.a.addr.v4, + &satosin(src)->sin_addr.s_addr, 4); + memset(&pfr.rule.src.addr.v.a.mask.addr8, 255, 4); + memcpy(&pfr.rule.dst.addr.v.a.addr.v4, + &satosin(dst)->sin_addr.s_addr, 4); + memset(&pfr.rule.dst.addr.v.a.mask.addr8, 255, 4); + } else { + memcpy(&pfr.rule.src.addr.v.a.addr.v6, + &satosin6(src)->sin6_addr.s6_addr, 16); + memset(&pfr.rule.src.addr.v.a.mask.addr8, 255, 16); + memcpy(&pfr.rule.dst.addr.v.a.addr.v6, + &satosin6(dst)->sin6_addr.s6_addr, 16); + memset(&pfr.rule.dst.addr.v.a.mask.addr8, 255, 16); + } + pfr.rule.dst.port_op = PF_OP_EQ; + pfr.rule.dst.port[0] = htons(d_port); + + switch (rs_num) { + case PF_RULESET_FILTER: + /* + * pass quick [log] inet[6] proto tcp \ + * from $src to $dst port = $d_port flags S/SA keep state + * (max 1) [queue qname] + */ + pfr.rule.action = PF_PASS; + pfr.rule.quick = 1; + pfr.rule.log = rule_log; + pfr.rule.keep_state = 1; + pfr.rule.flags = TH_SYN; + pfr.rule.flagset = (TH_SYN|TH_ACK); + pfr.rule.max_states = 1; + if (qname != NULL) + strlcpy(pfr.rule.qname, qname, sizeof pfr.rule.qname); + break; + case PF_RULESET_NAT: + /* + * nat inet[6] proto tcp from $src to $dst port $d_port -> $nat + */ + pfr.rule.action = PF_NAT; + break; + case PF_RULESET_RDR: + /* + * rdr inet[6] proto tcp from $src to $dst port $d_port -> $rdr + */ + pfr.rule.action = PF_RDR; + break; + default: + errno = EINVAL; + return (-1); + } + + return (0); +} + +int +server_lookup(struct sockaddr *client, struct sockaddr *proxy, + struct sockaddr *server) +{ + if (client->sa_family == AF_INET) + return (server_lookup4(satosin(client), satosin(proxy), + satosin(server))); + + if (client->sa_family == AF_INET6) + return (server_lookup6(satosin6(client), satosin6(proxy), + satosin6(server))); + + errno = EPROTONOSUPPORT; + return (-1); +} + +int +server_lookup4(struct sockaddr_in *client, struct sockaddr_in *proxy, + struct sockaddr_in *server) +{ + struct pfioc_natlook pnl; + + memset(&pnl, 0, sizeof pnl); + pnl.direction = PF_OUT; + pnl.af = AF_INET; + pnl.proto = IPPROTO_TCP; + memcpy(&pnl.saddr.v4, &client->sin_addr.s_addr, sizeof pnl.saddr.v4); + memcpy(&pnl.daddr.v4, &proxy->sin_addr.s_addr, sizeof pnl.daddr.v4); + pnl.sport = client->sin_port; + pnl.dport = proxy->sin_port; + + if (ioctl(dev, DIOCNATLOOK, &pnl) == -1) + return (-1); + + memset(server, 0, sizeof(struct sockaddr_in)); + server->sin_len = sizeof(struct sockaddr_in); + server->sin_family = AF_INET; + memcpy(&server->sin_addr.s_addr, &pnl.rdaddr.v4, + sizeof server->sin_addr.s_addr); + server->sin_port = pnl.rdport; + + return (0); +} + +int +server_lookup6(struct sockaddr_in6 *client, struct sockaddr_in6 *proxy, + struct sockaddr_in6 *server) +{ + struct pfioc_natlook pnl; + + memset(&pnl, 0, sizeof pnl); + pnl.direction = PF_OUT; + pnl.af = AF_INET6; + pnl.proto = IPPROTO_TCP; + memcpy(&pnl.saddr.v6, &client->sin6_addr.s6_addr, sizeof pnl.saddr.v6); + memcpy(&pnl.daddr.v6, &proxy->sin6_addr.s6_addr, sizeof pnl.daddr.v6); + pnl.sport = client->sin6_port; + pnl.dport = proxy->sin6_port; + + if (ioctl(dev, DIOCNATLOOK, &pnl) == -1) + return (-1); + + memset(server, 0, sizeof(struct sockaddr_in6)); + server->sin6_len = sizeof(struct sockaddr_in6); + server->sin6_family = AF_INET6; + memcpy(&server->sin6_addr.s6_addr, &pnl.rdaddr.v6, + sizeof server->sin6_addr); + server->sin6_port = pnl.rdport; + + return (0); +} diff --git a/contrib/pf/ftp-proxy/filter.h b/contrib/pf/ftp-proxy/filter.h new file mode 100644 index 000000000000..6779c5974467 --- /dev/null +++ b/contrib/pf/ftp-proxy/filter.h @@ -0,0 +1,31 @@ +/* $OpenBSD: filter.h,v 1.3 2005/06/07 14:12:07 camield Exp $ */ + +/* + * Copyright (c) 2004, 2005 Camiel Dobbelaar, + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define FTP_PROXY_ANCHOR "ftp-proxy" + +int add_filter(u_int32_t, u_int8_t, struct sockaddr *, struct sockaddr *, + u_int16_t); +int add_nat(u_int32_t, struct sockaddr *, struct sockaddr *, u_int16_t, + struct sockaddr *, u_int16_t, u_int16_t); +int add_rdr(u_int32_t, struct sockaddr *, struct sockaddr *, u_int16_t, + struct sockaddr *, u_int16_t); +int do_commit(void); +int do_rollback(void); +void init_filter(char *, int); +int prepare_commit(u_int32_t); +int server_lookup(struct sockaddr *, struct sockaddr *, struct sockaddr *); diff --git a/contrib/pf/ftp-proxy/ftp-proxy.8 b/contrib/pf/ftp-proxy/ftp-proxy.8 index e12813600769..44e6e59d22f5 100644 --- a/contrib/pf/ftp-proxy/ftp-proxy.8 +++ b/contrib/pf/ftp-proxy/ftp-proxy.8 @@ -1,293 +1,183 @@ -.\" $OpenBSD: ftp-proxy.8,v 1.42 2004/11/19 00:47:23 jmc Exp $ +.\" $OpenBSD: ftp-proxy.8,v 1.7 2006/12/30 13:01:54 camield Exp $ .\" -.\" Copyright (c) 1996-2001 -.\" Obtuse Systems Corporation, All rights reserved. +.\" Copyright (c) 2004, 2005 Camiel Dobbelaar, .\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 3. Neither the name of the University nor the names of its contributors -.\" may be used to endorse or promote products derived from this software -.\" without specific prior written permission. +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. .\" -.\" THIS SOFTWARE IS PROVIDED BY OBTUSE SYSTEMS AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL OBTUSE OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd August 17, 2001 +.Dd November 28, 2004 .Dt FTP-PROXY 8 .Os .Sh NAME .Nm ftp-proxy -.Nd Internet File Transfer Protocol proxy server +.Nd Internet File Transfer Protocol proxy daemon .Sh SYNOPSIS .Nm ftp-proxy -.Bk -words -.Op Fl AnrVw +.Op Fl 6Adrv .Op Fl a Ar address -.Op Fl D Ar debuglevel -.Op Fl g Ar group -.Op Fl M Ar maxport -.Op Fl m Ar minport -.Op Fl R Ar address[:port] -.Op Fl S Ar address +.Op Fl b Ar address +.Op Fl D Ar level +.Op Fl m Ar maxsessions +.Op Fl P Ar port +.Op Fl p Ar port +.Op Fl q Ar queue +.Op Fl R Ar address .Op Fl t Ar timeout -.Op Fl u Ar user -.Ek .Sh DESCRIPTION .Nm is a proxy for the Internet File Transfer Protocol. -The proxy uses +FTP control connections should be redirected into the proxy using the .Xr pf 4 -and expects to have the FTP control connection as described in -.Xr services 5 -redirected to it via a +.Ar rdr +command, after which the proxy connects to the server on behalf of +the client. +.Pp +The proxy allows data connections to pass, rewriting and redirecting +them so that the right addresses are used. +All connections from the client to the server have their source +address rewritten so they appear to come from the proxy. +Consequently, all connections from the server to the proxy have +their destination address rewritten, so they are redirected to the +client. +The proxy uses the .Xr pf 4 -.Em rdr -command. -An example of how to do that is further down in this document. +.Ar anchor +facility for this. +.Pp +Assuming the FTP control connection is from $client to $server, the +proxy connected to the server using the $proxy source address, and +$port is negotiated, then +.Nm ftp-proxy +adds the following rules to the various anchors. +(These example rules use inet, but the proxy also supports inet6.) +.Pp +In case of active mode (PORT or EPRT): +.Bd -literal -offset 2n +rdr from $server to $proxy port $port -> $client +pass quick inet proto tcp \e + from $server to $client port $port +.Ed +.Pp +In case of passive mode (PASV or EPSV): +.Bd -literal -offset 2n +nat from $client to $server port $port -> $proxy +pass in quick inet proto tcp \e + from $client to $server port $port +pass out quick inet proto tcp \e + from $proxy to $server port $port +.Ed .Pp The options are as follows: .Bl -tag -width Ds +.It Fl 6 +IPv6 mode. +The proxy will expect and use IPv6 addresses for all communication. +Only the extended FTP modes EPSV and EPRT are allowed with IPv6. +The proxy is in IPv4 mode by default. .It Fl A -Permit only anonymous FTP connections. -The proxy will allow connections to log in to other sites as the user -.Qq ftp -or -.Qq anonymous -only. -Any attempt to log in as another user will be blocked by the proxy. +Only permit anonymous FTP connections. +Either user "ftp" or user "anonymous" is allowed. .It Fl a Ar address -Specify the local IP address to use in -.Xr bind 2 -as the source for connections made by -.Nm ftp-proxy -when connecting to destination FTP servers. -This may be necessary if the interface address of -your default route is not reachable from the destinations -.Nm -is attempting connections to, or this address is different from the one -connections are being NATed to. -In the usual case this means that -.Ar address -should be a publicly visible IP address assigned to one of -the interfaces on the machine running -.Nm -and should be the same address to which you are translating traffic -if you are using the -.Fl n -option. -.It Fl D Ar debuglevel -Specify a debug level, where the proxy emits verbose debug output -into -.Xr syslogd 8 -at level -.Dv LOG_DEBUG . -Meaningful values of debuglevel are 0-3, where 0 is no debug output and -3 is lots of debug output, the default being 0. -.It Fl g Ar group -Specify the named group to drop group privileges to, after doing -.Xr pf 4 -lookups which require root. -By default, -.Nm -uses the default group of the user it drops privilege to. -.It Fl M Ar maxport -Specify the upper end of the port range the proxy will use for the -data connections it establishes. -The default is -.Dv IPPORT_HILASTAUTO -defined in -.Aq Pa netinet/in.h -as 65535. -.It Fl m Ar minport -Specify the lower end of the port range the proxy will use for all -data connections it establishes. -The default is -.Dv IPPORT_HIFIRSTAUTO -defined in -.Aq Pa netinet/in.h -as 49152. -.It Fl n -Activate network address translation -.Pq NAT -mode. -In this mode, the proxy will not attempt to proxy passive mode -.Pq PASV or EPSV -data connections. -In order for this to work, the machine running the proxy will need to -be forwarding packets and doing network address translation to allow -the outbound passive connections from the client to reach the server. -See -.Xr pf.conf 5 -for more details on NAT. -The proxy only ignores passive mode data connections when using this flag; -it will still proxy PORT and EPRT mode data connections. -Without this flag, -.Nm -does not require any IP forwarding or NAT beyond the -.Em rdr -necessary to capture the FTP control connection. -.It Fl R Ar address:[port] -Reverse proxy mode for FTP servers running behind a NAT gateway. -In this mode, no redirection is needed. -The proxy is run from -.Xr inetd 8 -on the port that external clients connect to (usually 21). -Control connections and passive data connections are forwarded -to the server. +The proxy will use this as the source address for the control +connection to a server. +.It Fl b Ar address +Address where the proxy will listen for redirected control connections. +The default is 127.0.0.1, or ::1 in IPv6 mode. +.It Fl D Ar level +Debug level, ranging from 0 to 7. +Higher is more verbose. +The default is 5. +(These levels correspond to the +.Xr syslog 3 +levels.) +.It Fl d +Do not daemonize. +The process will stay in the foreground, logging to standard error. +.It Fl m Ar maxsessions +Maximum number of concurrent FTP sessions. +When the proxy reaches this limit, new connections are denied. +The default is 100 sessions. +The limit can be lowered to a minimum of 1, or raised to a maximum of 500. +.It Fl P Ar port +Fixed server port. +Only used in combination with +.Fl R . +The default is port 21. +.It Fl p Ar port +Port where the proxy will listen for redirected connections. +The default is port 8021. +.It Fl q Ar queue +Create rules with queue +.Ar queue +appended, so that data connections can be queued. +.It Fl R Ar address +Fixed server address, also known as reverse mode. +The proxy will always connect to the same server, regardless of +where the client wanted to connect to (before it was redirected). +Use this option to proxy for a server behind NAT, or to forward all +connections to another proxy. .It Fl r -Use reverse host -.Pq reverse DNS -lookups for logging and libwrap use. -By default, -the proxy does not look up hostnames for libwrap or logging purposes. -.It Fl S Ar address -Source address to use for data connections made by the proxy. -Useful when there are multiple addresses (aliases) available -to the proxy. -Clients may expect data connections to have the same source -address as the control connections, and reject or drop other -connections. +Rewrite sourceport to 20 in active mode to suit ancient clients that insist +on this RFC property. .It Fl t Ar timeout -Specifies a timeout, in seconds. -The proxy will exit and close open connections if it sees no data -for the duration of the timeout. -The default is 0, which means the proxy will not time out. -.It Fl u Ar user -Specify the named user to drop privilege to, after doing -.Xr pf 4 -lookups which require root privilege. -By default, -.Nm -drops privilege to the user -.Em proxy . -.Pp -Running as root means that the source of data connections the proxy makes -for PORT and EPRT will be the RFC mandated port 20. -When running as a non-root user, the source of the data connections from -.Nm -will be chosen randomly from the range -.Ar minport -to -.Ar maxport -as described above. -.It Fl V -Be verbose. -With this option the proxy logs the control commands -sent by clients and the replies sent by the servers to -.Xr syslogd 8 . -.It Fl w -Use the tcp wrapper access control library -.Xr hosts_access 3 , -allowing connections to be allowed or denied based on the tcp wrapper's -.Xr hosts.allow 5 -and -.Xr hosts.deny 5 -files. -The proxy does libwrap operations after determining the destination -of the captured control connection, so that tcp wrapper rules may -be written based on the destination as well as the source of FTP connections. +Number of seconds that the control connection can be idle, before the +proxy will disconnect. +The maximum is 86400 seconds, which is also the default. +Do not set this too low, because the control connection is usually +idle when large data transfers are taking place. +.It Fl v +Set the 'log' flag on pf rules committed by +.Nm . +Use twice to set the 'log-all' flag. +The pf rules do not log by default. .El -.Pp -.Nm ftp-proxy -is run from -.Xr inetd 8 -and requires that FTP connections are redirected to it using a -.Em rdr -rule. -A typical way to do this would be to use a +.Sh CONFIGURATION +To make use of the proxy, .Xr pf.conf 5 -rule such as +needs the following rules. +All anchors are mandatory. +Adjust the rules as needed. +.Pp +In the NAT section: .Bd -literal -offset 2n -int_if = \&"xl0\&" -rdr pass on $int_if proto tcp from any to any port 21 -> 127.0.0.1 port 8021 +nat-anchor "ftp-proxy/*" +rdr-anchor "ftp-proxy/*" +rdr pass on $int_if proto tcp from $lan to any port 21 -> \e + 127.0.0.1 port 8021 .Ed .Pp -.Xr inetd 8 -must then be configured to run -.Nm -on the port from above using +In the rule section: .Bd -literal -offset 2n -127.0.0.1:8021 stream tcp nowait root /usr/libexec/ftp-proxy ftp-proxy +anchor "ftp-proxy/*" +pass out proto tcp from $proxy to any port 21 .Ed -.Pp -in -.Xr inetd.conf 5 . -.Pp -.Nm -accepts the redirected control connections and forwards them -to the server. -The proxy replaces the address and port number that the client -sends through the control connection to the server with its own -address and proxy port, where it listens for the data connection. -When the server opens the data connection back to this port, the -proxy forwards it to the client. -The -.Xr pf.conf 5 -rules need to let pass connections to these proxy ports -(see options -.Fl u , m , -and -.Fl M -above) in on the external interface. -The following example allows only ports 49152 to 65535 to pass in -statefully: -.Bd -literal -offset indent -block in on $ext_if proto tcp all -pass in on $ext_if inet proto tcp from any to $ext_if \e - port > 49151 keep state -.Ed -.Pp -Alternatively, rules can make use of the fact that by default, -.Nm -runs as user -.Qq proxy -to allow the backchannel connections, as in the following example: -.Bd -literal -offset indent -block in on $ext_if proto tcp all -pass in on $ext_if inet proto tcp from any to $ext_if \e - user proxy keep state -.Ed -.Pp -These examples do not cover the connections from the proxy to the -foreign FTP server. -If one does not pass outgoing connections by default additional rules -are needed. .Sh SEE ALSO .Xr ftp 1 , .Xr pf 4 , -.Xr hosts.allow 5 , -.Xr hosts.deny 5 , -.Xr inetd.conf 5 , -.Xr pf.conf 5 , -.Xr inetd 8 , -.Xr pfctl 8 , -.Xr syslogd 8 -.Sh BUGS -Extended Passive mode -.Pq EPSV -is not supported by the proxy and will not work unless the proxy is run -in network address translation mode. -When not in network address translation mode, the proxy returns an error -to the client, hopefully forcing the client to revert to passive mode -.Pq PASV -which is supported. -EPSV will work in network address translation mode, assuming a .Xr pf.conf 5 -setup which allows the EPSV connections through to their destinations. +.Sh CAVEATS +.Xr pf 4 +does not allow the ruleset to be modified if the system is running at a +.Xr securelevel 7 +higher than 1. +At that level +.Nm ftp-proxy +cannot add rules to the anchors and FTP data connections may get blocked. .Pp -IPv6 is not yet supported. +Negotiated data connection ports below 1024 are not allowed. +.Pp +The negotiated IP address for active modes is ignored for security +reasons. +This makes third party file transfers impossible. +.Pp +.Nm ftp-proxy +chroots to "/var/empty" and changes to user "proxy" to drop privileges. diff --git a/contrib/pf/ftp-proxy/ftp-proxy.c b/contrib/pf/ftp-proxy/ftp-proxy.c index dd0c654ee753..99e417472fb2 100644 --- a/contrib/pf/ftp-proxy/ftp-proxy.c +++ b/contrib/pf/ftp-proxy/ftp-proxy.c @@ -1,84 +1,36 @@ -/* $OpenBSD: ftp-proxy.c,v 1.41 2005/03/05 23:11:19 cloder Exp $ */ +/* $OpenBSD: ftp-proxy.c,v 1.13 2006/12/30 13:24:00 camield Exp $ */ /* - * Copyright (c) 1996-2001 - * Obtuse Systems Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Camiel Dobbelaar, * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the Obtuse Systems nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY OBTUSE SYSTEMS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL OBTUSE SYSTEMS CORPORATION OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -/* - * ftp proxy, Originally based on juniper_ftp_proxy from the Obtuse - * Systems juniper firewall, written by Dan Boulet - * and Bob Beck - * - * This version basically passes everything through unchanged except - * for the PORT and the * "227 Entering Passive Mode" reply. - * - * A PORT command is handled by noting the IP address and port number - * specified and then configuring a listen port on some very high port - * number and telling the server about it using a PORT message. - * We then watch for an in-bound connection on the port from the server - * and connect to the client's port when it happens. - * - * A "227 Entering Passive Mode" reply is handled by noting the IP address - * and port number specified and then configuring a listen port on some - * very high port number and telling the client about it using a - * "227 Entering Passive Mode" reply. - * We then watch for an in-bound connection on the port from the client - * and connect to the server's port when it happens. - * - * supports tcp wrapper lookups/access control with the -w flag using - * the real destination address - the tcp wrapper stuff is done after - * the real destination address is retrieved from pf - * - */ - -/* - * TODO: - * Plenty, this is very basic, with the idea to get it in clean first. - * - * - IPv6 and EPASV support - * - Content filter support - * - filename filter support - * - per-user rules perhaps. - */ - -#include +#include +#include #include +#include #include #include +#include #include - #include -#include +#include #include -#include +#include +#include #include #include #include @@ -86,1288 +38,1056 @@ #include #include #include -#include #include #include +#include -#include "util.h" +#include "filter.h" -#ifdef LIBWRAP -#include -int allow_severity = LOG_INFO; -int deny_severity = LOG_NOTICE; -#endif /* LIBWRAP */ +#define CONNECT_TIMEOUT 30 +#define MIN_PORT 1024 +#define MAX_LINE 500 +#define MAX_LOGLINE 300 +#define NTOP_BUFS 3 +#define TCP_BACKLOG 10 -int min_port = IPPORT_HIFIRSTAUTO; -int max_port = IPPORT_HILASTAUTO; +#define CHROOT_DIR "/var/empty" +#define NOPRIV_USER "proxy" -#define STARTBUFSIZE 1024 /* Must be at least 3 */ +/* pfctl standard NAT range. */ +#define PF_NAT_PROXY_PORT_LOW 50001 +#define PF_NAT_PROXY_PORT_HIGH 65535 -/* - * Variables used to support PORT mode connections. - * - * This gets a bit complicated. - * - * If PORT mode is on then client_listen_sa describes the socket that - * the real client is listening on and server_listen_sa describes the - * socket that we are listening on (waiting for the real server to connect - * with us). - * - * If PASV mode is on then client_listen_sa describes the socket that - * we are listening on (waiting for the real client to connect to us on) - * and server_listen_sa describes the socket that the real server is - * listening on. - * - * If the socket we are listening on gets a connection then we connect - * to the other side's socket. Similarly, if a connected socket is - * shutdown then we shutdown the other side's socket. - */ +#define sstosa(ss) ((struct sockaddr *)(ss)) -double xfer_start_time; +enum { CMD_NONE = 0, CMD_PORT, CMD_EPRT, CMD_PASV, CMD_EPSV }; -struct sockaddr_in real_server_sa; -struct sockaddr_in client_listen_sa; -struct sockaddr_in server_listen_sa; -struct sockaddr_in proxy_sa; -struct in_addr src_addr; +struct session { + u_int32_t id; + struct sockaddr_storage client_ss; + struct sockaddr_storage proxy_ss; + struct sockaddr_storage server_ss; + struct sockaddr_storage orig_server_ss; + struct bufferevent *client_bufev; + struct bufferevent *server_bufev; + int client_fd; + int server_fd; + char cbuf[MAX_LINE]; + size_t cbuf_valid; + char sbuf[MAX_LINE]; + size_t sbuf_valid; + int cmd; + u_int16_t port; + u_int16_t proxy_port; + LIST_ENTRY(session) entry; +}; -int client_listen_socket = -1; /* Only used in PASV mode */ -int client_data_socket = -1; /* Connected socket to real client */ -int server_listen_socket = -1; /* Only used in PORT mode */ -int server_data_socket = -1; /* Connected socket to real server */ -int client_data_bytes, server_data_bytes; +LIST_HEAD(, session) sessions = LIST_HEAD_INITIALIZER(sessions); -int AnonFtpOnly; -int Verbose; -int NatMode; -int ReverseMode; +void client_error(struct bufferevent *, short, void *); +int client_parse(struct session *s); +int client_parse_anon(struct session *s); +int client_parse_cmd(struct session *s); +void client_read(struct bufferevent *, void *); +int drop_privs(void); +void end_session(struct session *); +int exit_daemon(void); +int getline(char *, size_t *); +void handle_connection(const int, short, void *); +void handle_signal(int, short, void *); +struct session * init_session(void); +void logmsg(int, const char *, ...); +u_int16_t parse_port(int); +u_int16_t pick_proxy_port(void); +void proxy_reply(int, struct sockaddr *, u_int16_t); +void server_error(struct bufferevent *, short, void *); +int server_parse(struct session *s); +void server_read(struct bufferevent *, void *); +const char *sock_ntop(struct sockaddr *); +void usage(void); -char ClientName[NI_MAXHOST]; -char RealServerName[NI_MAXHOST]; -char OurName[NI_MAXHOST]; +char linebuf[MAX_LINE + 1]; +size_t linelen; -const char *User = "proxy"; -const char *Group; +char ntop_buf[NTOP_BUFS][INET6_ADDRSTRLEN]; -extern int Debug_Level; -extern int Use_Rdns; -extern in_addr_t Bind_Addr; +struct sockaddr_storage fixed_server_ss, fixed_proxy_ss; +char *fixed_server, *fixed_server_port, *fixed_proxy, *listen_ip, *listen_port, + *qname; +int anonymous_only, daemonize, id_count, ipv6_mode, loglevel, max_sessions, + rfc_mode, session_count, timeout, verbose; extern char *__progname; -typedef enum { - UNKNOWN_MODE, - PORT_MODE, - PASV_MODE, - EPRT_MODE, - EPSV_MODE -} connection_mode_t; - -connection_mode_t connection_mode; - -extern void debuglog(int debug_level, const char *fmt, ...); -double wallclock_time(void); -void show_xfer_stats(void); -void log_control_command (char *cmd, int client); -int new_dataconn(int server); -void do_client_cmd(struct csiob *client, struct csiob *server); -void do_server_reply(struct csiob *server, struct csiob *client); -static void -usage(void) +void +client_error(struct bufferevent *bufev, short what, void *arg) { - syslog(LOG_NOTICE, - "usage: %s [-AnrVw] [-a address] [-D debuglevel] [-g group]" - " [-M maxport] [-m minport] [-R address[:port]] [-S address]" - " [-t timeout] [-u user]", __progname); - exit(EX_USAGE); + struct session *s = arg; + + if (what & EVBUFFER_EOF) + logmsg(LOG_INFO, "#%d client close", s->id); + else if (what == (EVBUFFER_ERROR | EVBUFFER_READ)) + logmsg(LOG_ERR, "#%d client reset connection", s->id); + else if (what & EVBUFFER_TIMEOUT) + logmsg(LOG_ERR, "#%d client timeout", s->id); + else if (what & EVBUFFER_WRITE) + logmsg(LOG_ERR, "#%d client write error: %d", s->id, what); + else + logmsg(LOG_ERR, "#%d abnormal client error: %d", s->id, what); + + end_session(s); } -static void -close_client_data(void) +int +client_parse(struct session *s) { - if (client_data_socket >= 0) { - shutdown(client_data_socket, 2); - close(client_data_socket); - client_data_socket = -1; + /* Reset any previous command. */ + s->cmd = CMD_NONE; + s->port = 0; + + /* Commands we are looking for are at least 4 chars long. */ + if (linelen < 4) + return (1); + + if (linebuf[0] == 'P' || linebuf[0] == 'p' || + linebuf[0] == 'E' || linebuf[0] == 'e') + return (client_parse_cmd(s)); + + if (anonymous_only && (linebuf[0] == 'U' || linebuf[0] == 'u')) + return (client_parse_anon(s)); + + return (1); +} + +int +client_parse_anon(struct session *s) +{ + if (strcasecmp("USER ftp\r\n", linebuf) != 0 && + strcasecmp("USER anonymous\r\n", linebuf) != 0) { + snprintf(linebuf, sizeof linebuf, + "500 Only anonymous FTP allowed\r\n"); + logmsg(LOG_DEBUG, "#%d proxy: %s", s->id, linebuf); + + /* Talk back to the client ourself. */ + linelen = strlen(linebuf); + bufferevent_write(s->client_bufev, linebuf, linelen); + + /* Clear buffer so it's not sent to the server. */ + linebuf[0] = '\0'; + linelen = 0; } + + return (1); } -static void -close_server_data(void) +int +client_parse_cmd(struct session *s) { - if (server_data_socket >= 0) { - shutdown(server_data_socket, 2); - close(server_data_socket); - server_data_socket = -1; + if (strncasecmp("PASV", linebuf, 4) == 0) + s->cmd = CMD_PASV; + else if (strncasecmp("PORT ", linebuf, 5) == 0) + s->cmd = CMD_PORT; + else if (strncasecmp("EPSV", linebuf, 4) == 0) + s->cmd = CMD_EPSV; + else if (strncasecmp("EPRT ", linebuf, 5) == 0) + s->cmd = CMD_EPRT; + else + return (1); + + if (ipv6_mode && (s->cmd == CMD_PASV || s->cmd == CMD_PORT)) { + logmsg(LOG_CRIT, "PASV and PORT not allowed with IPv6"); + return (0); } + + if (s->cmd == CMD_PORT || s->cmd == CMD_EPRT) { + s->port = parse_port(s->cmd); + if (s->port < MIN_PORT) { + logmsg(LOG_CRIT, "#%d bad port in '%s'", s->id, + linebuf); + return (0); + } + s->proxy_port = pick_proxy_port(); + proxy_reply(s->cmd, sstosa(&s->proxy_ss), s->proxy_port); + logmsg(LOG_DEBUG, "#%d proxy: %s", s->id, linebuf); + } + + return (1); } -static void +void +client_read(struct bufferevent *bufev, void *arg) +{ + struct session *s = arg; + size_t buf_avail, read; + int n; + + do { + buf_avail = sizeof s->cbuf - s->cbuf_valid; + read = bufferevent_read(bufev, s->cbuf + s->cbuf_valid, + buf_avail); + s->cbuf_valid += read; + + while ((n = getline(s->cbuf, &s->cbuf_valid)) > 0) { + logmsg(LOG_DEBUG, "#%d client: %s", s->id, linebuf); + if (!client_parse(s)) { + end_session(s); + return; + } + bufferevent_write(s->server_bufev, linebuf, linelen); + } + + if (n == -1) { + logmsg(LOG_ERR, "#%d client command too long or not" + " clean", s->id); + end_session(s); + return; + } + } while (read == buf_avail); +} + +int drop_privs(void) { struct passwd *pw; - struct group *gr; - uid_t uid = 0; - gid_t gid = 0; - if (User != NULL) { - pw = getpwnam(User); - if (pw == NULL) { - syslog(LOG_ERR, "cannot find user %s", User); - exit(EX_USAGE); - } - uid = pw->pw_uid; - gid = pw->pw_gid; - } + pw = getpwnam(NOPRIV_USER); + if (pw == NULL) + return (0); - if (Group != NULL) { - gr = getgrnam(Group); - if (gr == NULL) { - syslog(LOG_ERR, "cannot find group %s", Group); - exit(EX_USAGE); - } - gid = gr->gr_gid; - } + tzset(); + if (chroot(CHROOT_DIR) != 0 || chdir("/") != 0 || + setgroups(1, &pw->pw_gid) != 0 || + setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) != 0 || + setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid) != 0) + return (0); - if (gid != 0 && (setegid(gid) == -1 || setgid(gid) == -1)) { - syslog(LOG_ERR, "cannot drop group privs (%m)"); - exit(EX_CONFIG); - } - - if (uid != 0 && (seteuid(uid) == -1 || setuid(uid) == -1)) { - syslog(LOG_ERR, "cannot drop root privs (%m)"); - exit(EX_CONFIG); - } -} - -#ifdef LIBWRAP -/* - * Check a connection against the tcpwrapper, log if we're going to - * reject it, returns: 0 -> reject, 1 -> accept. We add in hostnames - * if we are set to do reverse DNS, otherwise no. - */ -static int -check_host(struct sockaddr_in *client_sin, struct sockaddr_in *server_sin) -{ - char cname[NI_MAXHOST]; - char sname[NI_MAXHOST]; - struct request_info request; - int i; - - request_init(&request, RQ_DAEMON, __progname, RQ_CLIENT_SIN, - client_sin, RQ_SERVER_SIN, server_sin, RQ_CLIENT_ADDR, - inet_ntoa(client_sin->sin_addr), 0); - - if (Use_Rdns) { - /* - * We already looked these up, but we have to do it again - * for tcp wrapper, to ensure that we get the DNS name, since - * the tcp wrapper cares about these things, and we don't - * want to pass in a printed address as a name. - */ - i = getnameinfo((struct sockaddr *) &client_sin->sin_addr, - sizeof(&client_sin->sin_addr), cname, sizeof(cname), - NULL, 0, NI_NAMEREQD); - - if (i != 0 && i != EAI_NONAME && i != EAI_AGAIN) - strlcpy(cname, STRING_UNKNOWN, sizeof(cname)); - - i = getnameinfo((struct sockaddr *)&server_sin->sin_addr, - sizeof(&server_sin->sin_addr), sname, sizeof(sname), - NULL, 0, NI_NAMEREQD); - - if (i != 0 && i != EAI_NONAME && i != EAI_AGAIN) - strlcpy(sname, STRING_UNKNOWN, sizeof(sname)); - } else { - /* - * ensure the TCP wrapper doesn't start doing - * reverse DNS lookups if we aren't supposed to. - */ - strlcpy(cname, STRING_UNKNOWN, sizeof(cname)); - strlcpy(sname, STRING_UNKNOWN, sizeof(sname)); - } - - request_set(&request, RQ_SERVER_ADDR, inet_ntoa(server_sin->sin_addr), - 0); - request_set(&request, RQ_CLIENT_NAME, cname, RQ_SERVER_NAME, sname, 0); - - if (!hosts_access(&request)) { - syslog(LOG_NOTICE, "tcpwrappers rejected: %s -> %s", - ClientName, RealServerName); - return(0); - } - return(1); -} -#endif /* LIBWRAP */ - -double -wallclock_time(void) -{ - struct timeval tv; - - gettimeofday(&tv, NULL); - return(tv.tv_sec + tv.tv_usec / 1e6); -} - -/* - * Show the stats for this data transfer - */ -void -show_xfer_stats(void) -{ - char tbuf[1000]; - double delta; - size_t len; - int i = -1; - - if (!Verbose) - return; - - delta = wallclock_time() - xfer_start_time; - - if (delta < 0.001) - delta = 0.001; - - if (client_data_bytes == 0 && server_data_bytes == 0) { - syslog(LOG_INFO, - "data transfer complete (no bytes transferred)"); - return; - } - - len = sizeof(tbuf); - - if (delta >= 60) { - int idelta; - - idelta = delta + 0.5; - if (idelta >= 60*60) { - i = snprintf(tbuf, len, - "data transfer complete (%dh %dm %ds", - idelta / (60*60), (idelta % (60*60)) / 60, - idelta % 60); - if (i == -1 || i >= len) - goto logit; - len -= i; - } else { - i = snprintf(tbuf, len, - "data transfer complete (%dm %ds", idelta / 60, - idelta % 60); - if (i == -1 || i >= len) - goto logit; - len -= i; - } - } else { - i = snprintf(tbuf, len, "data transfer complete (%.1fs", - delta); - if (i == -1 || i >= len) - goto logit; - len -= i; - } - - if (client_data_bytes > 0) { - i = snprintf(&tbuf[strlen(tbuf)], len, - ", %d bytes to server) (%.1fKB/s", client_data_bytes, - (client_data_bytes / delta) / (double)1024); - if (i == -1 || i >= len) - goto logit; - len -= i; - } - if (server_data_bytes > 0) { - i = snprintf(&tbuf[strlen(tbuf)], len, - ", %d bytes to client) (%.1fKB/s", server_data_bytes, - (server_data_bytes / delta) / (double)1024); - if (i == -1 || i >= len) - goto logit; - len -= i; - } - strlcat(tbuf, ")", sizeof(tbuf)); - logit: - if (i != -1) - syslog(LOG_INFO, "%s", tbuf); + return (1); } void -log_control_command (char *cmd, int client) +end_session(struct session *s) { - /* log an ftp control command or reply */ - const char *logstring; - int level = LOG_DEBUG; + int err; - if (!Verbose) - return; + logmsg(LOG_INFO, "#%d ending session", s->id); - /* don't log passwords */ - if (strncasecmp(cmd, "pass ", 5) == 0) - logstring = "PASS XXXX"; - else - logstring = cmd; - if (client) { - /* log interesting stuff at LOG_INFO, rest at LOG_DEBUG */ - if ((strncasecmp(cmd, "user ", 5) == 0) || - (strncasecmp(cmd, "retr ", 5) == 0) || - (strncasecmp(cmd, "cwd ", 4) == 0) || - (strncasecmp(cmd, "stor " ,5) == 0)) - level = LOG_INFO; + if (s->client_fd != -1) + close(s->client_fd); + if (s->server_fd != -1) + close(s->server_fd); + + if (s->client_bufev) + bufferevent_free(s->client_bufev); + if (s->server_bufev) + bufferevent_free(s->server_bufev); + + /* Remove rulesets by commiting empty ones. */ + err = 0; + if (prepare_commit(s->id) == -1) + err = errno; + else if (do_commit() == -1) { + err = errno; + do_rollback(); } - syslog(level, "%s %s", client ? "client:" : " server:", - logstring); + if (err) + logmsg(LOG_ERR, "#%d pf rule removal failed: %s", s->id, + strerror(err)); + + LIST_REMOVE(s, entry); + free(s); + session_count--; } -/* - * set ourselves up for a new data connection. Direction is toward client if - * "server" is 0, towards server otherwise. - */ int -new_dataconn(int server) +exit_daemon(void) { - /* - * Close existing data conn. - */ + struct session *s, *next; - if (client_listen_socket != -1) { - close(client_listen_socket); - client_listen_socket = -1; + for (s = LIST_FIRST(&sessions); s != LIST_END(&sessions); s = next) { + next = LIST_NEXT(s, entry); + end_session(s); } - close_client_data(); - if (server_listen_socket != -1) { - close(server_listen_socket); - server_listen_socket = -1; - } - close_server_data(); + if (daemonize) + closelog(); - if (server) { - bzero(&server_listen_sa, sizeof(server_listen_sa)); - server_listen_socket = get_backchannel_socket(SOCK_STREAM, - min_port, max_port, -1, 1, &server_listen_sa); + exit(0); - if (server_listen_socket == -1) { - syslog(LOG_INFO, "server socket bind() failed (%m)"); - exit(EX_OSERR); - } - if (listen(server_listen_socket, 5) != 0) { - syslog(LOG_INFO, "server socket listen() failed (%m)"); - exit(EX_OSERR); - } - } else { - bzero(&client_listen_sa, sizeof(client_listen_sa)); - client_listen_socket = get_backchannel_socket(SOCK_STREAM, - min_port, max_port, -1, 1, &client_listen_sa); - - if (client_listen_socket == -1) { - syslog(LOG_NOTICE, - "cannot get client listen socket (%m)"); - exit(EX_OSERR); - } - if (listen(client_listen_socket, 5) != 0) { - syslog(LOG_NOTICE, - "cannot listen on client socket (%m)"); - exit(EX_OSERR); - } - } - return(0); + /* NOTREACHED */ + return (-1); } -static void -connect_pasv_backchannel(void) +int +getline(char *buf, size_t *valid) { - struct sockaddr_in listen_sa; - socklen_t salen; + size_t i; - /* - * We are about to accept a connection from the client. - * This is a PASV data connection. - */ - debuglog(2, "client listen socket ready"); + if (*valid > MAX_LINE) + return (-1); - close_server_data(); - close_client_data(); - - salen = sizeof(listen_sa); - client_data_socket = accept(client_listen_socket, - (struct sockaddr *)&listen_sa, &salen); - - if (client_data_socket < 0) { - syslog(LOG_NOTICE, "accept() failed (%m)"); - exit(EX_OSERR); - } - close(client_listen_socket); - client_listen_socket = -1; - memset(&listen_sa, 0, sizeof(listen_sa)); - - server_data_socket = get_backchannel_socket(SOCK_STREAM, min_port, - max_port, -1, 1, &listen_sa); - if (server_data_socket < 0) { - syslog(LOG_NOTICE, "get_backchannel_socket() failed (%m)"); - exit(EX_OSERR); - } - if (connect(server_data_socket, (struct sockaddr *) &server_listen_sa, - sizeof(server_listen_sa)) != 0) { - syslog(LOG_NOTICE, "connect() failed (%m)"); - exit(EX_NOHOST); - } - client_data_bytes = 0; - server_data_bytes = 0; - xfer_start_time = wallclock_time(); -} - -static void -connect_port_backchannel(void) -{ - struct sockaddr_in listen_sa; - socklen_t salen; - - /* - * We are about to accept a connection from the server. - * This is a PORT or EPRT data connection. - */ - debuglog(2, "server listen socket ready"); - - close_server_data(); - close_client_data(); - - salen = sizeof(listen_sa); - server_data_socket = accept(server_listen_socket, - (struct sockaddr *)&listen_sa, &salen); - if (server_data_socket < 0) { - syslog(LOG_NOTICE, "accept() failed (%m)"); - exit(EX_OSERR); - } - close(server_listen_socket); - server_listen_socket = -1; - - if (getuid() != 0) { - /* - * We're not running as root, so we get a backchannel - * socket bound in our designated range, instead of - * getting one bound to port 20 - This is deliberately - * not RFC compliant. - */ - bcopy(&src_addr, &listen_sa.sin_addr, sizeof(struct in_addr)); - client_data_socket = get_backchannel_socket(SOCK_STREAM, - min_port, max_port, -1, 1, &listen_sa); - if (client_data_socket < 0) { - syslog(LOG_NOTICE, "get_backchannel_socket() failed (%m)"); - exit(EX_OSERR); - } - - } else { - - /* - * We're root, get our backchannel socket bound to port - * 20 here, so we're fully RFC compliant. - */ - client_data_socket = socket(AF_INET, SOCK_STREAM, 0); - - salen = 1; - listen_sa.sin_family = AF_INET; - bcopy(&src_addr, &listen_sa.sin_addr, sizeof(struct in_addr)); - listen_sa.sin_port = htons(20); - - if (setsockopt(client_data_socket, SOL_SOCKET, SO_REUSEADDR, - &salen, sizeof(salen)) == -1) { - syslog(LOG_NOTICE, "setsockopt() failed (%m)"); - exit(EX_OSERR); - } - - if (bind(client_data_socket, (struct sockaddr *)&listen_sa, - sizeof(listen_sa)) == - 1) { - syslog(LOG_NOTICE, "data channel bind() failed (%m)"); - exit(EX_OSERR); - } - } - - if (connect(client_data_socket, (struct sockaddr *) &client_listen_sa, - sizeof(client_listen_sa)) != 0) { - syslog(LOG_INFO, "cannot connect data channel (%m)"); - exit(EX_NOHOST); - } - - client_data_bytes = 0; - server_data_bytes = 0; - xfer_start_time = wallclock_time(); -} - -void -do_client_cmd(struct csiob *client, struct csiob *server) -{ - int i, j, rv; - char tbuf[100]; - char *sendbuf = NULL; - - log_control_command((char *)client->line_buffer, 1); - - /* client->line_buffer is an ftp control command. - * There is no reason for these to be very long. - * In the interest of limiting buffer overrun attempts, - * we catch them here. - */ - if (strlen((char *)client->line_buffer) > 512) { - syslog(LOG_NOTICE, "excessively long control command"); - exit(EX_DATAERR); - } - - /* - * Check the client user provided if needed - */ - if (AnonFtpOnly && strncasecmp((char *)client->line_buffer, "user ", - strlen("user ")) == 0) { - char *cp; - - cp = (char *) client->line_buffer + strlen("user "); - if ((strcasecmp(cp, "ftp\r\n") != 0) && - (strcasecmp(cp, "anonymous\r\n") != 0)) { - /* - * this isn't anonymous - give the client an - * error before they send a password - */ - snprintf(tbuf, sizeof(tbuf), - "500 Only anonymous FTP is allowed\r\n"); - j = 0; - i = strlen(tbuf); - do { - rv = send(client->fd, tbuf + j, i - j, 0); - if (rv == -1 && errno != EAGAIN && - errno != EINTR) - break; - else if (rv != -1) - j += rv; - } while (j >= 0 && j < i); - sendbuf = NULL; - } else - sendbuf = (char *)client->line_buffer; - } else if ((strncasecmp((char *)client->line_buffer, "eprt ", - strlen("eprt ")) == 0)) { - - /* Watch out for EPRT commands */ - char *line = NULL, *q, *p, *result[3], delim; - struct addrinfo hints, *res = NULL; - unsigned long proto; - - j = 0; - line = strdup((char *)client->line_buffer+strlen("eprt ")); - if (line == NULL) { - syslog(LOG_ERR, "insufficient memory"); - exit(EX_UNAVAILABLE); - } - p = line; - delim = p[0]; - p++; - - memset(result,0, sizeof(result)); - for (i = 0; i < 3; i++) { - q = strchr(p, delim); - if (!q || *q != delim) - goto parsefail; - *q++ = '\0'; - result[i] = p; - p = q; - } - - proto = strtoul(result[0], &p, 10); - if (!*result[0] || *p) - goto protounsupp; - - memset(&hints, 0, sizeof(hints)); - if (proto != 1) /* 1 == AF_INET - all we support for now */ - goto protounsupp; - hints.ai_family = AF_INET; - hints.ai_socktype = SOCK_STREAM; - hints.ai_flags = AI_NUMERICHOST; /*no DNS*/ - if (getaddrinfo(result[1], result[2], &hints, &res)) - goto parsefail; - if (res->ai_next) - goto parsefail; - if (sizeof(client_listen_sa) < res->ai_addrlen) - goto parsefail; - memcpy(&client_listen_sa, res->ai_addr, res->ai_addrlen); - - debuglog(1, "client wants us to use %s:%u", - inet_ntoa(client_listen_sa.sin_addr), - htons(client_listen_sa.sin_port)); - - /* - * Configure our own listen socket and tell the server about it - */ - new_dataconn(1); - connection_mode = EPRT_MODE; - - debuglog(1, "we want server to use %s:%u", - inet_ntoa(server->sa.sin_addr), - ntohs(server_listen_sa.sin_port)); - - snprintf(tbuf, sizeof(tbuf), "EPRT |%d|%s|%u|\r\n", 1, - inet_ntoa(server->sa.sin_addr), - ntohs(server_listen_sa.sin_port)); - debuglog(1, "to server (modified): %s", tbuf); - sendbuf = tbuf; - goto out; -parsefail: - snprintf(tbuf, sizeof(tbuf), - "500 Invalid argument; rejected\r\n"); - sendbuf = NULL; - goto out; -protounsupp: - /* we only support AF_INET for now */ - if (proto == 2) - snprintf(tbuf, sizeof(tbuf), - "522 Protocol not supported, use (1)\r\n"); - else - snprintf(tbuf, sizeof(tbuf), - "501 Protocol not supported\r\n"); - sendbuf = NULL; -out: - if (line) - free(line); - if (res) - freeaddrinfo(res); - if (sendbuf == NULL) { - debuglog(1, "to client (modified): %s", tbuf); - i = strlen(tbuf); - do { - rv = send(client->fd, tbuf + j, i - j, 0); - if (rv == -1 && errno != EAGAIN && - errno != EINTR) - break; - else if (rv != -1) - j += rv; - } while (j >= 0 && j < i); - } - } else if (!NatMode && (strncasecmp((char *)client->line_buffer, - "epsv", strlen("epsv")) == 0)) { - - /* - * If we aren't in NAT mode, deal with EPSV. - * EPSV is a problem - Unlike PASV, the reply from the - * server contains *only* a port, we can't modify the reply - * to the client and get the client to connect to us without - * resorting to using a dynamic rdr rule we have to add in - * for the reply to this connection, and take away afterwards. - * so this will wait until we have the right solution for rule - * additions/deletions in pf. - * - * in the meantime we just tell the client we don't do it, - * and most clients should fall back to using PASV. - */ - - snprintf(tbuf, sizeof(tbuf), - "500 EPSV command not understood\r\n"); - debuglog(1, "to client (modified): %s", tbuf); - j = 0; - i = strlen(tbuf); - do { - rv = send(client->fd, tbuf + j, i - j, 0); - if (rv == -1 && errno != EAGAIN && errno != EINTR) - break; - else if (rv != -1) - j += rv; - } while (j >= 0 && j < i); - sendbuf = NULL; - } else if (strncasecmp((char *)client->line_buffer, "port ", - strlen("port ")) == 0) { - unsigned int values[6]; - char *tailptr; - - debuglog(1, "Got a PORT command"); - - tailptr = (char *)&client->line_buffer[strlen("port ")]; - values[0] = 0; - - i = sscanf(tailptr, "%u,%u,%u,%u,%u,%u", &values[0], - &values[1], &values[2], &values[3], &values[4], - &values[5]); - if (i != 6) { - syslog(LOG_INFO, "malformed PORT command (%s)", - client->line_buffer); - exit(EX_DATAERR); - } - - for (i = 0; i<6; i++) { - if (values[i] > 255) { - syslog(LOG_INFO, - "malformed PORT command (%s)", - client->line_buffer); - exit(EX_DATAERR); - } - } - - client_listen_sa.sin_family = AF_INET; - client_listen_sa.sin_addr.s_addr = htonl((values[0] << 24) | - (values[1] << 16) | (values[2] << 8) | - (values[3] << 0)); - - client_listen_sa.sin_port = htons((values[4] << 8) | - values[5]); - debuglog(1, "client wants us to use %u.%u.%u.%u:%u", - values[0], values[1], values[2], values[3], - (values[4] << 8) | values[5]); - - /* - * Configure our own listen socket and tell the server about it - */ - new_dataconn(1); - connection_mode = PORT_MODE; - - debuglog(1, "we want server to use %s:%u", - inet_ntoa(server->sa.sin_addr), - ntohs(server_listen_sa.sin_port)); - - snprintf(tbuf, sizeof(tbuf), "PORT %u,%u,%u,%u,%u,%u\r\n", - ((u_char *)&server->sa.sin_addr.s_addr)[0], - ((u_char *)&server->sa.sin_addr.s_addr)[1], - ((u_char *)&server->sa.sin_addr.s_addr)[2], - ((u_char *)&server->sa.sin_addr.s_addr)[3], - ((u_char *)&server_listen_sa.sin_port)[0], - ((u_char *)&server_listen_sa.sin_port)[1]); - - debuglog(1, "to server (modified): %s", tbuf); - - sendbuf = tbuf; - } else - sendbuf = (char *)client->line_buffer; - - /* - *send our (possibly modified) control command in sendbuf - * on it's way to the server - */ - if (sendbuf != NULL) { - j = 0; - i = strlen(sendbuf); - do { - rv = send(server->fd, sendbuf + j, i - j, 0); - if (rv == -1 && errno != EAGAIN && errno != EINTR) - break; - else if (rv != -1) - j += rv; - } while (j >= 0 && j < i); - } -} - -void -do_server_reply(struct csiob *server, struct csiob *client) -{ - int code, i, j, rv; - struct in_addr *iap; - static int continuing = 0; - char tbuf[100], *sendbuf, *p; - - log_control_command((char *)server->line_buffer, 0); - - if (strlen((char *)server->line_buffer) > 512) { - /* - * someone's playing games. Have a cow in the syslogs and - * exit - we don't pass this on for fear of hurting - * our other end, which might be poorly implemented. - */ - syslog(LOG_NOTICE, "long FTP control reply"); - exit(EX_DATAERR); - } - - /* - * Watch out for "227 Entering Passive Mode ..." replies - */ - code = strtol((char *)server->line_buffer, &p, 10); - if (isspace(server->line_buffer[0])) - code = 0; - if (!*(server->line_buffer) || (*p != ' ' && *p != '-')) { - if (continuing) - goto sendit; - syslog(LOG_INFO, "malformed control reply"); - exit(EX_DATAERR); - } - if (code <= 0 || code > 999) { - if (continuing) - goto sendit; - syslog(LOG_INFO, "invalid server reply code %d", code); - exit(EX_DATAERR); - } - if (*p == '-') - continuing = 1; - else - continuing = 0; - if (code == 227 && !NatMode) { - unsigned int values[6]; - char *tailptr; - - debuglog(1, "Got a PASV reply"); - debuglog(1, "{%s}", (char *)server->line_buffer); - - tailptr = (char *)strchr((char *)server->line_buffer, '('); - if (tailptr == NULL) { - tailptr = strrchr((char *)server->line_buffer, ' '); - if (tailptr == NULL) { - syslog(LOG_NOTICE, "malformed 227 reply"); - exit(EX_DATAERR); - } - } - tailptr++; /* skip past space or ( */ - - values[0] = 0; - - i = sscanf(tailptr, "%u,%u,%u,%u,%u,%u", &values[0], - &values[1], &values[2], &values[3], &values[4], - &values[5]); - if (i != 6) { - syslog(LOG_INFO, "malformed PASV reply (%s)", - client->line_buffer); - exit(EX_DATAERR); - } - for (i = 0; i<6; i++) - if (values[i] > 255) { - syslog(LOG_INFO, "malformed PASV reply(%s)", - client->line_buffer); - exit(EX_DATAERR); - } - - server_listen_sa.sin_family = AF_INET; - server_listen_sa.sin_addr.s_addr = htonl((values[0] << 24) | - (values[1] << 16) | (values[2] << 8) | (values[3] << 0)); - server_listen_sa.sin_port = htons((values[4] << 8) | - values[5]); - - debuglog(1, "server wants us to use %s:%u", - inet_ntoa(server_listen_sa.sin_addr), (values[4] << 8) | - values[5]); - - new_dataconn(0); - connection_mode = PASV_MODE; - if (ReverseMode) - iap = &(proxy_sa.sin_addr); - else - iap = &(server->sa.sin_addr); - - debuglog(1, "we want client to use %s:%u", inet_ntoa(*iap), - htons(client_listen_sa.sin_port)); - - snprintf(tbuf, sizeof(tbuf), - "227 Entering Passive Mode (%u,%u,%u,%u,%u,%u)\r\n", - ((u_char *)iap)[0], ((u_char *)iap)[1], - ((u_char *)iap)[2], ((u_char *)iap)[3], - ((u_char *)&client_listen_sa.sin_port)[0], - ((u_char *)&client_listen_sa.sin_port)[1]); - debuglog(1, "to client (modified): %s", tbuf); - sendbuf = tbuf; - } else { - sendit: - sendbuf = (char *)server->line_buffer; - } - - /* - * send our (possibly modified) control command in sendbuf - * on it's way to the client - */ - j = 0; - i = strlen(sendbuf); - do { - rv = send(client->fd, sendbuf + j, i - j, 0); - if (rv == -1 && errno != EAGAIN && errno != EINTR) + /* Copy to linebuf while searching for a newline. */ + for (i = 0; i < *valid; i++) { + linebuf[i] = buf[i]; + if (buf[i] == '\0') + return (-1); + if (buf[i] == '\n') break; - else if (rv != -1) - j += rv; - } while (j >= 0 && j < i); + } + if (i == *valid) { + /* No newline found. */ + linebuf[0] = '\0'; + linelen = 0; + if (i < MAX_LINE) + return (0); + return (-1); + } + + linelen = i + 1; + linebuf[linelen] = '\0'; + *valid -= linelen; + + /* Move leftovers to the start. */ + if (*valid != 0) + bcopy(buf + linelen, buf, *valid); + + return ((int)linelen); +} + +void +handle_connection(const int listen_fd, short event, void *ev) +{ + struct sockaddr_storage tmp_ss; + struct sockaddr *client_sa, *server_sa, *fixed_server_sa; + struct sockaddr *client_to_proxy_sa, *proxy_to_server_sa; + struct session *s; + socklen_t len; + int client_fd, fc, on; + + /* + * We _must_ accept the connection, otherwise libevent will keep + * coming back, and we will chew up all CPU. + */ + client_sa = sstosa(&tmp_ss); + len = sizeof(struct sockaddr_storage); + if ((client_fd = accept(listen_fd, client_sa, &len)) < 0) { + logmsg(LOG_CRIT, "accept failed: %s", strerror(errno)); + return; + } + + /* Refuse connection if the maximum is reached. */ + if (session_count >= max_sessions) { + logmsg(LOG_ERR, "client limit (%d) reached, refusing " + "connection from %s", max_sessions, sock_ntop(client_sa)); + close(client_fd); + return; + } + + /* Allocate session and copy back the info from the accept(). */ + s = init_session(); + if (s == NULL) { + logmsg(LOG_CRIT, "init_session failed"); + close(client_fd); + return; + } + s->client_fd = client_fd; + memcpy(sstosa(&s->client_ss), client_sa, client_sa->sa_len); + + /* Cast it once, and be done with it. */ + client_sa = sstosa(&s->client_ss); + server_sa = sstosa(&s->server_ss); + client_to_proxy_sa = sstosa(&tmp_ss); + proxy_to_server_sa = sstosa(&s->proxy_ss); + fixed_server_sa = sstosa(&fixed_server_ss); + + /* Log id/client early to ease debugging. */ + logmsg(LOG_DEBUG, "#%d accepted connection from %s", s->id, + sock_ntop(client_sa)); + + /* + * Find out the real server and port that the client wanted. + */ + len = sizeof(struct sockaddr_storage); + if ((getsockname(s->client_fd, client_to_proxy_sa, &len)) < 0) { + logmsg(LOG_CRIT, "#%d getsockname failed: %s", s->id, + strerror(errno)); + goto fail; + } + if (server_lookup(client_sa, client_to_proxy_sa, server_sa) != 0) { + logmsg(LOG_CRIT, "#%d server lookup failed (no rdr?)", s->id); + goto fail; + } + if (fixed_server) { + memcpy(sstosa(&s->orig_server_ss), server_sa, + server_sa->sa_len); + memcpy(server_sa, fixed_server_sa, fixed_server_sa->sa_len); + } + + /* XXX: check we are not connecting to ourself. */ + + /* + * Setup socket and connect to server. + */ + if ((s->server_fd = socket(server_sa->sa_family, SOCK_STREAM, + IPPROTO_TCP)) < 0) { + logmsg(LOG_CRIT, "#%d server socket failed: %s", s->id, + strerror(errno)); + goto fail; + } + if (fixed_proxy && bind(s->server_fd, sstosa(&fixed_proxy_ss), + fixed_proxy_ss.ss_len) != 0) { + logmsg(LOG_CRIT, "#%d cannot bind fixed proxy address: %s", + s->id, strerror(errno)); + goto fail; + } + + /* Use non-blocking connect(), see CONNECT_TIMEOUT below. */ + if ((fc = fcntl(s->server_fd, F_GETFL)) == -1 || + fcntl(s->server_fd, F_SETFL, fc | O_NONBLOCK) == -1) { + logmsg(LOG_CRIT, "#%d cannot mark socket non-blocking: %s", + s->id, strerror(errno)); + goto fail; + } + if (connect(s->server_fd, server_sa, server_sa->sa_len) < 0 && + errno != EINPROGRESS) { + logmsg(LOG_CRIT, "#%d proxy cannot connect to server %s: %s", + s->id, sock_ntop(server_sa), strerror(errno)); + goto fail; + } + + len = sizeof(struct sockaddr_storage); + if ((getsockname(s->server_fd, proxy_to_server_sa, &len)) < 0) { + logmsg(LOG_CRIT, "#%d getsockname failed: %s", s->id, + strerror(errno)); + goto fail; + } + + logmsg(LOG_INFO, "#%d FTP session %d/%d started: client %s to server " + "%s via proxy %s ", s->id, session_count, max_sessions, + sock_ntop(client_sa), sock_ntop(server_sa), + sock_ntop(proxy_to_server_sa)); + + /* Keepalive is nice, but don't care if it fails. */ + on = 1; + setsockopt(s->client_fd, SOL_SOCKET, SO_KEEPALIVE, (void *)&on, + sizeof on); + setsockopt(s->server_fd, SOL_SOCKET, SO_KEEPALIVE, (void *)&on, + sizeof on); + + /* + * Setup buffered events. + */ + s->client_bufev = bufferevent_new(s->client_fd, &client_read, NULL, + &client_error, s); + if (s->client_bufev == NULL) { + logmsg(LOG_CRIT, "#%d bufferevent_new client failed", s->id); + goto fail; + } + bufferevent_settimeout(s->client_bufev, timeout, 0); + bufferevent_enable(s->client_bufev, EV_READ | EV_TIMEOUT); + + s->server_bufev = bufferevent_new(s->server_fd, &server_read, NULL, + &server_error, s); + if (s->server_bufev == NULL) { + logmsg(LOG_CRIT, "#%d bufferevent_new server failed", s->id); + goto fail; + } + bufferevent_settimeout(s->server_bufev, CONNECT_TIMEOUT, 0); + bufferevent_enable(s->server_bufev, EV_READ | EV_TIMEOUT); + + return; + + fail: + end_session(s); +} + +void +handle_signal(int sig, short event, void *arg) +{ + /* + * Signal handler rules don't apply, libevent decouples for us. + */ + + logmsg(LOG_ERR, "%s exiting on signal %d", __progname, sig); + + exit_daemon(); +} + + +struct session * +init_session(void) +{ + struct session *s; + + s = calloc(1, sizeof(struct session)); + if (s == NULL) + return (NULL); + + s->id = id_count++; + s->client_fd = -1; + s->server_fd = -1; + s->cbuf[0] = '\0'; + s->cbuf_valid = 0; + s->sbuf[0] = '\0'; + s->sbuf_valid = 0; + s->client_bufev = NULL; + s->server_bufev = NULL; + s->cmd = CMD_NONE; + s->port = 0; + + LIST_INSERT_HEAD(&sessions, s, entry); + session_count++; + + return (s); +} + +void +logmsg(int pri, const char *message, ...) +{ + va_list ap; + + if (pri > loglevel) + return; + + va_start(ap, message); + + if (daemonize) + /* syslog does its own vissing. */ + vsyslog(pri, message, ap); + else { + char buf[MAX_LOGLINE]; + char visbuf[2 * MAX_LOGLINE]; + + /* We don't care about truncation. */ + vsnprintf(buf, sizeof buf, message, ap); + strnvis(visbuf, buf, sizeof visbuf, VIS_CSTYLE | VIS_NL); + fprintf(stderr, "%s\n", visbuf); + } + + va_end(ap); } int main(int argc, char *argv[]) { - struct csiob client_iob, server_iob; - struct sigaction new_sa, old_sa; - int sval, ch, flags, i; - socklen_t salen; - int one = 1; - long timeout_seconds = 0; - struct timeval tv; -#ifdef LIBWRAP - int use_tcpwrapper = 0; -#endif /* LIBWRAP */ + struct rlimit rlp; + struct addrinfo hints, *res; + struct event ev, ev_sighup, ev_sigint, ev_sigterm; + int ch, error, listenfd, on; + const char *errstr; - while ((ch = getopt(argc, argv, "a:D:g:m:M:R:S:t:u:AnVwr")) != -1) { - char *p; + /* Defaults. */ + anonymous_only = 0; + daemonize = 1; + fixed_proxy = NULL; + fixed_server = NULL; + fixed_server_port = "21"; + ipv6_mode = 0; + listen_ip = NULL; + listen_port = "8021"; + loglevel = LOG_NOTICE; + max_sessions = 100; + qname = NULL; + rfc_mode = 0; + timeout = 24 * 3600; + verbose = 0; + + /* Other initialization. */ + id_count = 1; + session_count = 0; + + while ((ch = getopt(argc, argv, "6Aa:b:D:dm:P:p:q:R:rt:v")) != -1) { switch (ch) { - case 'a': - if (!*optarg) - usage(); - if ((Bind_Addr = inet_addr(optarg)) == INADDR_NONE) { - syslog(LOG_NOTICE, - "%s: invalid address", optarg); - usage(); - } + case '6': + ipv6_mode = 1; break; case 'A': - AnonFtpOnly = 1; /* restrict to anon usernames only */ + anonymous_only = 1; + break; + case 'a': + fixed_proxy = optarg; + break; + case 'b': + listen_ip = optarg; break; case 'D': - Debug_Level = strtol(optarg, &p, 10); - if (!*optarg || *p) - usage(); + loglevel = strtonum(optarg, LOG_EMERG, LOG_DEBUG, + &errstr); + if (errstr) + errx(1, "loglevel %s", errstr); break; - case 'g': - Group = optarg; + case 'd': + daemonize = 0; break; case 'm': - min_port = strtol(optarg, &p, 10); - if (!*optarg || *p) - usage(); - if (min_port < 0 || min_port > USHRT_MAX) - usage(); + max_sessions = strtonum(optarg, 1, 500, &errstr); + if (errstr) + errx(1, "max sessions %s", errstr); break; - case 'M': - max_port = strtol(optarg, &p, 10); - if (!*optarg || *p) - usage(); - if (max_port < 0 || max_port > USHRT_MAX) - usage(); + case 'P': + fixed_server_port = optarg; break; - case 'n': - NatMode = 1; /* pass all passives, we're using NAT */ + case 'p': + listen_port = optarg; + break; + case 'q': + if (strlen(optarg) >= PF_QNAME_SIZE) + errx(1, "queuename too long"); + qname = optarg; + break; + case 'R': + fixed_server = optarg; break; case 'r': - Use_Rdns = 1; /* look up hostnames */ - break; - case 'R': { - char *s, *t; - - if (!*optarg) - usage(); - if ((s = strdup(optarg)) == NULL) { - syslog (LOG_NOTICE, - "Insufficient memory (malloc failed)"); - exit(EX_UNAVAILABLE); - } - memset(&real_server_sa, 0, sizeof(real_server_sa)); - real_server_sa.sin_len = sizeof(struct sockaddr_in); - real_server_sa.sin_family = AF_INET; - t = strchr(s, ':'); - if (t == NULL) - real_server_sa.sin_port = htons(21); - else { - long port = strtol(t + 1, &p, 10); - - if (*p || port <= 0 || port > 65535) - usage(); - real_server_sa.sin_port = htons(port); - *t = 0; - } - real_server_sa.sin_addr.s_addr = inet_addr(s); - if (real_server_sa.sin_addr.s_addr == INADDR_NONE) - usage(); - free(s); - ReverseMode = 1; - break; - } - case 'S': - if (!inet_aton(optarg, &src_addr)) - usage(); + rfc_mode = 1; break; case 't': - timeout_seconds = strtol(optarg, &p, 10); - if (!*optarg || *p) + timeout = strtonum(optarg, 0, 86400, &errstr); + if (errstr) + errx(1, "timeout %s", errstr); + break; + case 'v': + verbose++; + if (verbose > 2) usage(); break; - case 'u': - User = optarg; - break; - case 'V': - Verbose = 1; - break; -#ifdef LIBWRAP - case 'w': - use_tcpwrapper = 1; /* do the libwrap thing */ - break; -#endif /* LIBWRAP */ default: usage(); - /* NOTREACHED */ - } - } - argc -= optind; - argv += optind; - - if (max_port < min_port) - usage(); - - openlog(__progname, LOG_NDELAY|LOG_PID, LOG_DAEMON); - - setlinebuf(stdout); - setlinebuf(stderr); - - memset(&client_iob, 0, sizeof(client_iob)); - memset(&server_iob, 0, sizeof(server_iob)); - - if (get_proxy_env(0, &real_server_sa, &client_iob.sa, - &proxy_sa) == -1) - exit(EX_PROTOCOL); - - /* - * We may now drop root privs, as we have done our ioctl for - * pf. If we do drop root, we can't make backchannel connections - * for PORT and EPRT come from port 20, which is not strictly - * RFC compliant. This shouldn't cause problems for all but - * the stupidest ftp clients and the stupidest packet filters. - */ - drop_privs(); - - /* - * We check_host after get_proxy_env so that checks are done - * against the original destination endpoint, not the endpoint - * of our side of the rdr. This allows the use of tcpwrapper - * rules to restrict destinations as well as sources of connections - * for ftp. - */ - if (Use_Rdns) - flags = 0; - else - flags = NI_NUMERICHOST | NI_NUMERICSERV; - - i = getnameinfo((struct sockaddr *)&client_iob.sa, - sizeof(client_iob.sa), ClientName, sizeof(ClientName), NULL, 0, - flags); - - if (i != 0 && i != EAI_NONAME && i != EAI_AGAIN) { - debuglog(2, "name resolution failure (client)"); - exit(EX_OSERR); - } - - i = getnameinfo((struct sockaddr *)&real_server_sa, - sizeof(real_server_sa), RealServerName, sizeof(RealServerName), - NULL, 0, flags); - - if (i != 0 && i != EAI_NONAME && i != EAI_AGAIN) { - debuglog(2, "name resolution failure (server)"); - exit(EX_OSERR); - } - -#ifdef LIBWRAP - if (use_tcpwrapper && !check_host(&client_iob.sa, &real_server_sa)) - exit(EX_NOPERM); -#endif - - client_iob.fd = 0; - - syslog(LOG_INFO, "accepted connection from %s:%u to %s:%u", ClientName, - ntohs(client_iob.sa.sin_port), RealServerName, - ntohs(real_server_sa.sin_port)); - - server_iob.fd = get_backchannel_socket(SOCK_STREAM, min_port, max_port, - -1, 1, &server_iob.sa); - - if (connect(server_iob.fd, (struct sockaddr *)&real_server_sa, - sizeof(real_server_sa)) != 0) { - syslog(LOG_INFO, "cannot connect to %s:%u (%m)", RealServerName, - ntohs(real_server_sa.sin_port)); - exit(EX_NOHOST); - } - - /* - * Now that we are connected to the real server, get the name - * of our end of the server socket so we know our IP address - * from the real server's perspective. - */ - salen = sizeof(server_iob.sa); - getsockname(server_iob.fd, (struct sockaddr *)&server_iob.sa, &salen); - - i = getnameinfo((struct sockaddr *)&server_iob.sa, - sizeof(server_iob.sa), OurName, sizeof(OurName), NULL, 0, flags); - - if (i != 0 && i != EAI_NONAME && i != EAI_AGAIN) { - debuglog(2, "name resolution failure (local)"); - exit(EX_OSERR); - } - - debuglog(1, "local socket is %s:%u", OurName, - ntohs(server_iob.sa.sin_port)); - - /* ignore SIGPIPE */ - bzero(&new_sa, sizeof(new_sa)); - new_sa.sa_handler = SIG_IGN; - (void)sigemptyset(&new_sa.sa_mask); - new_sa.sa_flags = SA_RESTART; - if (sigaction(SIGPIPE, &new_sa, &old_sa) != 0) { - syslog(LOG_ERR, "sigaction() failed (%m)"); - exit(EX_OSERR); - } - - if (setsockopt(client_iob.fd, SOL_SOCKET, SO_OOBINLINE, (char *)&one, - sizeof(one)) == -1) { - syslog(LOG_NOTICE, "cannot set SO_OOBINLINE (%m)"); - exit(EX_OSERR); - } - - client_iob.line_buffer_size = STARTBUFSIZE; - client_iob.line_buffer = malloc(client_iob.line_buffer_size); - client_iob.io_buffer_size = STARTBUFSIZE; - client_iob.io_buffer = malloc(client_iob.io_buffer_size); - client_iob.next_byte = 0; - client_iob.io_buffer_len = 0; - client_iob.alive = 1; - client_iob.who = "client"; - client_iob.send_oob_flags = 0; - client_iob.real_sa = client_iob.sa; - - server_iob.line_buffer_size = STARTBUFSIZE; - server_iob.line_buffer = malloc(server_iob.line_buffer_size); - server_iob.io_buffer_size = STARTBUFSIZE; - server_iob.io_buffer = malloc(server_iob.io_buffer_size); - server_iob.next_byte = 0; - server_iob.io_buffer_len = 0; - server_iob.alive = 1; - server_iob.who = "server"; - server_iob.send_oob_flags = MSG_OOB; - server_iob.real_sa = real_server_sa; - - if (client_iob.line_buffer == NULL || client_iob.io_buffer == NULL || - server_iob.line_buffer == NULL || server_iob.io_buffer == NULL) { - syslog (LOG_NOTICE, "insufficient memory"); - exit(EX_UNAVAILABLE); - } - - while (client_iob.alive || server_iob.alive) { - int maxfd = 0; - fd_set *fdsp; - - if (client_iob.fd > maxfd) - maxfd = client_iob.fd; - if (client_listen_socket > maxfd) - maxfd = client_listen_socket; - if (client_data_socket > maxfd) - maxfd = client_data_socket; - if (server_iob.fd > maxfd) - maxfd = server_iob.fd; - if (server_listen_socket > maxfd) - maxfd = server_listen_socket; - if (server_data_socket > maxfd) - maxfd = server_data_socket; - - debuglog(3, "client is %s; server is %s", - client_iob.alive ? "alive" : "dead", - server_iob.alive ? "alive" : "dead"); - - fdsp = (fd_set *)calloc(howmany(maxfd + 1, NFDBITS), - sizeof(fd_mask)); - if (fdsp == NULL) { - syslog(LOG_NOTICE, "insufficient memory"); - exit(EX_UNAVAILABLE); - } - - if (client_iob.alive && telnet_getline(&client_iob, - &server_iob)) { - debuglog(3, "client line buffer is \"%s\"", - (char *)client_iob.line_buffer); - if (client_iob.line_buffer[0] != '\0') - do_client_cmd(&client_iob, &server_iob); - } else if (server_iob.alive && telnet_getline(&server_iob, - &client_iob)) { - debuglog(3, "server line buffer is \"%s\"", - (char *)server_iob.line_buffer); - if (server_iob.line_buffer[0] != '\0') - do_server_reply(&server_iob, &client_iob); - } else { - if (client_iob.alive) { - FD_SET(client_iob.fd, fdsp); - if (client_listen_socket >= 0) - FD_SET(client_listen_socket, fdsp); - if (client_data_socket >= 0) - FD_SET(client_data_socket, fdsp); - } - if (server_iob.alive) { - FD_SET(server_iob.fd, fdsp); - if (server_listen_socket >= 0) - FD_SET(server_listen_socket, fdsp); - if (server_data_socket >= 0) - FD_SET(server_data_socket, fdsp); - } - tv.tv_sec = timeout_seconds; - tv.tv_usec = 0; - - doselect: - sval = select(maxfd + 1, fdsp, NULL, NULL, - (tv.tv_sec == 0) ? NULL : &tv); - if (sval == 0) { - /* - * This proxy has timed out. Expire it - * quietly with an obituary in the syslogs - * for any passing mourners. - */ - syslog(LOG_INFO, - "timeout: no data for %ld seconds", - timeout_seconds); - exit(EX_OK); - } - if (sval == -1) { - if (errno == EINTR || errno == EAGAIN) - goto doselect; - syslog(LOG_NOTICE, - "select() failed (%m)"); - exit(EX_OSERR); - } - if (client_data_socket >= 0 && - FD_ISSET(client_data_socket, fdsp)) { - int rval; - - debuglog(3, "transfer: client to server"); - rval = xfer_data("client to server", - client_data_socket, - server_data_socket, - client_iob.sa.sin_addr, - real_server_sa.sin_addr); - if (rval <= 0) { - close_client_data(); - close_server_data(); - show_xfer_stats(); - } else - client_data_bytes += rval; - } - if (server_data_socket >= 0 && - FD_ISSET(server_data_socket, fdsp)) { - int rval; - - debuglog(3, "transfer: server to client"); - rval = xfer_data("server to client", - server_data_socket, - client_data_socket, - real_server_sa.sin_addr, - client_iob.sa.sin_addr); - if (rval <= 0) { - close_client_data(); - close_server_data(); - show_xfer_stats(); - } else - server_data_bytes += rval; - } - if (server_listen_socket >= 0 && - FD_ISSET(server_listen_socket, fdsp)) { - connect_port_backchannel(); - } - if (client_listen_socket >= 0 && - FD_ISSET(client_listen_socket, fdsp)) { - connect_pasv_backchannel(); - } - if (client_iob.alive && - FD_ISSET(client_iob.fd, fdsp)) { - client_iob.data_available = 1; - } - if (server_iob.alive && - FD_ISSET(server_iob.fd, fdsp)) { - server_iob.data_available = 1; - } - } - free(fdsp); - if (client_iob.got_eof) { - shutdown(server_iob.fd, 1); - shutdown(client_iob.fd, 0); - client_iob.got_eof = 0; - client_iob.alive = 0; - } - if (server_iob.got_eof) { - shutdown(client_iob.fd, 1); - shutdown(server_iob.fd, 0); - server_iob.got_eof = 0; - server_iob.alive = 0; } } - if (Verbose) - syslog(LOG_INFO, "session ended"); + if (listen_ip == NULL) + listen_ip = ipv6_mode ? "::1" : "127.0.0.1"; - exit(EX_OK); + /* Check for root to save the user from cryptic failure messages. */ + if (getuid() != 0) + errx(1, "needs to start as root"); + + /* Raise max. open files limit to satisfy max. sessions. */ + rlp.rlim_cur = rlp.rlim_max = (2 * max_sessions) + 10; + if (setrlimit(RLIMIT_NOFILE, &rlp) == -1) + err(1, "setrlimit"); + + if (fixed_proxy) { + memset(&hints, 0, sizeof hints); + hints.ai_flags = AI_NUMERICHOST; + hints.ai_family = ipv6_mode ? AF_INET6 : AF_INET; + hints.ai_socktype = SOCK_STREAM; + error = getaddrinfo(fixed_proxy, NULL, &hints, &res); + if (error) + errx(1, "getaddrinfo fixed proxy address failed: %s", + gai_strerror(error)); + memcpy(&fixed_proxy_ss, res->ai_addr, res->ai_addrlen); + logmsg(LOG_INFO, "using %s to connect to servers", + sock_ntop(sstosa(&fixed_proxy_ss))); + freeaddrinfo(res); + } + + if (fixed_server) { + memset(&hints, 0, sizeof hints); + hints.ai_family = ipv6_mode ? AF_INET6 : AF_INET; + hints.ai_socktype = SOCK_STREAM; + error = getaddrinfo(fixed_server, fixed_server_port, &hints, + &res); + if (error) + errx(1, "getaddrinfo fixed server address failed: %s", + gai_strerror(error)); + memcpy(&fixed_server_ss, res->ai_addr, res->ai_addrlen); + logmsg(LOG_INFO, "using fixed server %s", + sock_ntop(sstosa(&fixed_server_ss))); + freeaddrinfo(res); + } + + /* Setup listener. */ + memset(&hints, 0, sizeof hints); + hints.ai_flags = AI_NUMERICHOST | AI_PASSIVE; + hints.ai_family = ipv6_mode ? AF_INET6 : AF_INET; + hints.ai_socktype = SOCK_STREAM; + error = getaddrinfo(listen_ip, listen_port, &hints, &res); + if (error) + errx(1, "getaddrinfo listen address failed: %s", + gai_strerror(error)); + if ((listenfd = socket(res->ai_family, SOCK_STREAM, IPPROTO_TCP)) == -1) + errx(1, "socket failed"); + on = 1; + if (setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, (void *)&on, + sizeof on) != 0) + err(1, "setsockopt failed"); + if (bind(listenfd, (struct sockaddr *)res->ai_addr, + (socklen_t)res->ai_addrlen) != 0) + err(1, "bind failed"); + if (listen(listenfd, TCP_BACKLOG) != 0) + err(1, "listen failed"); + freeaddrinfo(res); + + /* Initialize pf. */ + init_filter(qname, verbose); + + if (daemonize) { + if (daemon(0, 0) == -1) + err(1, "cannot daemonize"); + openlog(__progname, LOG_PID | LOG_NDELAY, LOG_DAEMON); + } + + /* Use logmsg for output from here on. */ + + if (!drop_privs()) { + logmsg(LOG_ERR, "cannot drop privileges: %s", strerror(errno)); + exit(1); + } + + event_init(); + + /* Setup signal handler. */ + signal(SIGPIPE, SIG_IGN); + signal_set(&ev_sighup, SIGHUP, handle_signal, NULL); + signal_set(&ev_sigint, SIGINT, handle_signal, NULL); + signal_set(&ev_sigterm, SIGTERM, handle_signal, NULL); + signal_add(&ev_sighup, NULL); + signal_add(&ev_sigint, NULL); + signal_add(&ev_sigterm, NULL); + + event_set(&ev, listenfd, EV_READ | EV_PERSIST, handle_connection, &ev); + event_add(&ev, NULL); + + logmsg(LOG_NOTICE, "listening on %s port %s", listen_ip, listen_port); + + /* Vroom, vroom. */ + event_dispatch(); + + logmsg(LOG_ERR, "event_dispatch error: %s", strerror(errno)); + exit_daemon(); + + /* NOTREACHED */ + return (1); +} + +u_int16_t +parse_port(int mode) +{ + unsigned int port, v[6]; + int n; + char *p; + + /* Find the last space or left-parenthesis. */ + for (p = linebuf + linelen; p > linebuf; p--) + if (*p == ' ' || *p == '(') + break; + if (p == linebuf) + return (0); + + switch (mode) { + case CMD_PORT: + n = sscanf(p, " %u,%u,%u,%u,%u,%u", &v[0], &v[1], &v[2], + &v[3], &v[4], &v[5]); + if (n == 6 && v[0] < 256 && v[1] < 256 && v[2] < 256 && + v[3] < 256 && v[4] < 256 && v[5] < 256) + return ((v[4] << 8) | v[5]); + break; + case CMD_PASV: + n = sscanf(p, "(%u,%u,%u,%u,%u,%u)", &v[0], &v[1], &v[2], + &v[3], &v[4], &v[5]); + if (n == 6 && v[0] < 256 && v[1] < 256 && v[2] < 256 && + v[3] < 256 && v[4] < 256 && v[5] < 256) + return ((v[4] << 8) | v[5]); + break; + case CMD_EPSV: + n = sscanf(p, "(|||%u|)", &port); + if (n == 1 && port < 65536) + return (port); + break; + case CMD_EPRT: + n = sscanf(p, " |1|%u.%u.%u.%u|%u|", &v[0], &v[1], &v[2], + &v[3], &port); + if (n == 5 && v[0] < 256 && v[1] < 256 && v[2] < 256 && + v[3] < 256 && port < 65536) + return (port); + n = sscanf(p, " |2|%*[a-fA-F0-9:]|%u|", &port); + if (n == 1 && port < 65536) + return (port); + break; + default: + return (0); + } + + return (0); +} + +u_int16_t +pick_proxy_port(void) +{ + /* Random should be good enough for avoiding port collisions. */ + return (IPPORT_HIFIRSTAUTO + (arc4random() % + (IPPORT_HILASTAUTO - IPPORT_HIFIRSTAUTO))); +} + +void +proxy_reply(int cmd, struct sockaddr *sa, u_int16_t port) +{ + int i, r; + + switch (cmd) { + case CMD_PORT: + r = snprintf(linebuf, sizeof linebuf, + "PORT %s,%u,%u\r\n", sock_ntop(sa), port / 256, + port % 256); + break; + case CMD_PASV: + r = snprintf(linebuf, sizeof linebuf, + "227 Entering Passive Mode (%s,%u,%u)\r\n", sock_ntop(sa), + port / 256, port % 256); + break; + case CMD_EPRT: + if (sa->sa_family == AF_INET) + r = snprintf(linebuf, sizeof linebuf, + "EPRT |1|%s|%u|\r\n", sock_ntop(sa), port); + else if (sa->sa_family == AF_INET6) + r = snprintf(linebuf, sizeof linebuf, + "EPRT |2|%s|%u|\r\n", sock_ntop(sa), port); + break; + case CMD_EPSV: + r = snprintf(linebuf, sizeof linebuf, + "229 Entering Extended Passive Mode (|||%u|)\r\n", port); + break; + } + + if (r < 0 || r >= sizeof linebuf) { + logmsg(LOG_ERR, "proxy_reply failed: %d", r); + linebuf[0] = '\0'; + linelen = 0; + return; + } + linelen = (size_t)r; + + if (cmd == CMD_PORT || cmd == CMD_PASV) { + /* Replace dots in IP address with commas. */ + for (i = 0; i < linelen; i++) + if (linebuf[i] == '.') + linebuf[i] = ','; + } +} + +void +server_error(struct bufferevent *bufev, short what, void *arg) +{ + struct session *s = arg; + + if (what & EVBUFFER_EOF) + logmsg(LOG_INFO, "#%d server close", s->id); + else if (what == (EVBUFFER_ERROR | EVBUFFER_READ)) + logmsg(LOG_ERR, "#%d server refused connection", s->id); + else if (what & EVBUFFER_WRITE) + logmsg(LOG_ERR, "#%d server write error: %d", s->id, what); + else if (what & EVBUFFER_TIMEOUT) + logmsg(LOG_NOTICE, "#%d server timeout", s->id); + else + logmsg(LOG_ERR, "#%d abnormal server error: %d", s->id, what); + + end_session(s); +} + +int +server_parse(struct session *s) +{ + struct sockaddr *client_sa, *orig_sa, *proxy_sa, *server_sa; + int prepared = 0; + + if (s->cmd == CMD_NONE || linelen < 4 || linebuf[0] != '2') + goto out; + + /* + * The pf rules below do quite some NAT rewriting, to keep up + * appearances. Points to keep in mind: + * 1) The client must think it's talking to the real server, + * for both control and data connections. Transparently. + * 2) The server must think that the proxy is the client. + * 3) Source and destination ports are rewritten to minimize + * port collisions, to aid security (some systems pick weak + * ports) or to satisfy RFC requirements (source port 20). + */ + + /* Cast this once, to make code below it more readable. */ + client_sa = sstosa(&s->client_ss); + server_sa = sstosa(&s->server_ss); + proxy_sa = sstosa(&s->proxy_ss); + if (fixed_server) + /* Fixed server: data connections must appear to come + from / go to the original server, not the fixed one. */ + orig_sa = sstosa(&s->orig_server_ss); + else + /* Server not fixed: orig_server == server. */ + orig_sa = sstosa(&s->server_ss); + + /* Passive modes. */ + if ((s->cmd == CMD_PASV && strncmp("227 ", linebuf, 4) == 0) || + (s->cmd == CMD_EPSV && strncmp("229 ", linebuf, 4) == 0)) { + s->port = parse_port(s->cmd); + if (s->port < MIN_PORT) { + logmsg(LOG_CRIT, "#%d bad port in '%s'", s->id, + linebuf); + return (0); + } + s->proxy_port = pick_proxy_port(); + logmsg(LOG_INFO, "#%d passive: client to server port %d" + " via port %d", s->id, s->port, s->proxy_port); + + if (prepare_commit(s->id) == -1) + goto fail; + prepared = 1; + + proxy_reply(s->cmd, orig_sa, s->proxy_port); + logmsg(LOG_DEBUG, "#%d proxy: %s", s->id, linebuf); + + /* rdr from $client to $orig_server port $proxy_port -> $server + port $port */ + if (add_rdr(s->id, client_sa, orig_sa, s->proxy_port, + server_sa, s->port) == -1) + goto fail; + + /* nat from $client to $server port $port -> $proxy */ + if (add_nat(s->id, client_sa, server_sa, s->port, proxy_sa, + PF_NAT_PROXY_PORT_LOW, PF_NAT_PROXY_PORT_HIGH) == -1) + goto fail; + + /* pass in from $client to $server port $port */ + if (add_filter(s->id, PF_IN, client_sa, server_sa, + s->port) == -1) + goto fail; + + /* pass out from $proxy to $server port $port */ + if (add_filter(s->id, PF_OUT, proxy_sa, server_sa, + s->port) == -1) + goto fail; + } + + /* Active modes. */ + if ((s->cmd == CMD_PORT || s->cmd == CMD_EPRT) && + strncmp("200 ", linebuf, 4) == 0) { + logmsg(LOG_INFO, "#%d active: server to client port %d" + " via port %d", s->id, s->port, s->proxy_port); + + if (prepare_commit(s->id) == -1) + goto fail; + prepared = 1; + + /* rdr from $server to $proxy port $proxy_port -> $client port + $port */ + if (add_rdr(s->id, server_sa, proxy_sa, s->proxy_port, + client_sa, s->port) == -1) + goto fail; + + /* nat from $server to $client port $port -> $orig_server port + $natport */ + if (rfc_mode && s->cmd == CMD_PORT) { + /* Rewrite sourceport to RFC mandated 20. */ + if (add_nat(s->id, server_sa, client_sa, s->port, + orig_sa, 20, 20) == -1) + goto fail; + } else { + /* Let pf pick a source port from the standard range. */ + if (add_nat(s->id, server_sa, client_sa, s->port, + orig_sa, PF_NAT_PROXY_PORT_LOW, + PF_NAT_PROXY_PORT_HIGH) == -1) + goto fail; + } + + /* pass in from $server to $client port $port */ + if (add_filter(s->id, PF_IN, server_sa, client_sa, s->port) == + -1) + goto fail; + + /* pass out from $orig_server to $client port $port */ + if (add_filter(s->id, PF_OUT, orig_sa, client_sa, s->port) == + -1) + goto fail; + } + + /* Commit rules if they were prepared. */ + if (prepared && (do_commit() == -1)) { + if (errno != EBUSY) + goto fail; + /* One more try if busy. */ + usleep(5000); + if (do_commit() == -1) + goto fail; + } + + out: + s->cmd = CMD_NONE; + s->port = 0; + + return (1); + + fail: + logmsg(LOG_CRIT, "#%d pf operation failed: %s", s->id, strerror(errno)); + if (prepared) + do_rollback(); + return (0); +} + +void +server_read(struct bufferevent *bufev, void *arg) +{ + struct session *s = arg; + size_t buf_avail, read; + int n; + + bufferevent_settimeout(bufev, timeout, 0); + + do { + buf_avail = sizeof s->sbuf - s->sbuf_valid; + read = bufferevent_read(bufev, s->sbuf + s->sbuf_valid, + buf_avail); + s->sbuf_valid += read; + + while ((n = getline(s->sbuf, &s->sbuf_valid)) > 0) { + logmsg(LOG_DEBUG, "#%d server: %s", s->id, linebuf); + if (!server_parse(s)) { + end_session(s); + return; + } + bufferevent_write(s->client_bufev, linebuf, linelen); + } + + if (n == -1) { + logmsg(LOG_ERR, "#%d server reply too long or not" + " clean", s->id); + end_session(s); + return; + } + } while (read == buf_avail); +} + +const char * +sock_ntop(struct sockaddr *sa) +{ + static int n = 0; + + /* Cycle to next buffer. */ + n = (n + 1) % NTOP_BUFS; + ntop_buf[n][0] = '\0'; + + if (sa->sa_family == AF_INET) { + struct sockaddr_in *sin = (struct sockaddr_in *)sa; + + return (inet_ntop(AF_INET, &sin->sin_addr, ntop_buf[n], + sizeof ntop_buf[0])); + } + + if (sa->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; + + return (inet_ntop(AF_INET6, &sin6->sin6_addr, ntop_buf[n], + sizeof ntop_buf[0])); + } + + return (NULL); +} + +void +usage(void) +{ + fprintf(stderr, "usage: %s [-6Adrv] [-a address] [-b address]" + " [-D level] [-m maxsessions]\n [-P port]" + " [-p port] [-q queue] [-R address] [-t timeout]\n", __progname); + exit(1); } diff --git a/contrib/pf/libevent/buffer.c b/contrib/pf/libevent/buffer.c new file mode 100644 index 000000000000..77efd0cfa3e2 --- /dev/null +++ b/contrib/pf/libevent/buffer.c @@ -0,0 +1,456 @@ +/* + * Copyright (c) 2002, 2003 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_VASPRINTF +/* If we have vasprintf, we need to define this before we include stdio.h. */ +#define _GNU_SOURCE +#endif + +#include + +#ifdef HAVE_SYS_TIME_H +#include +#endif + +#ifdef HAVE_SYS_IOCTL_H +#include +#endif + +#include +#include +#include +#include +#ifdef HAVE_STDARG_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "event.h" + +struct evbuffer * +evbuffer_new(void) +{ + struct evbuffer *buffer; + + buffer = calloc(1, sizeof(struct evbuffer)); + + return (buffer); +} + +void +evbuffer_free(struct evbuffer *buffer) +{ + if (buffer->orig_buffer != NULL) + free(buffer->orig_buffer); + free(buffer); +} + +/* + * This is a destructive add. The data from one buffer moves into + * the other buffer. + */ + +#define SWAP(x,y) do { \ + (x)->buffer = (y)->buffer; \ + (x)->orig_buffer = (y)->orig_buffer; \ + (x)->misalign = (y)->misalign; \ + (x)->totallen = (y)->totallen; \ + (x)->off = (y)->off; \ +} while (0) + +int +evbuffer_add_buffer(struct evbuffer *outbuf, struct evbuffer *inbuf) +{ + int res; + + /* Short cut for better performance */ + if (outbuf->off == 0) { + struct evbuffer tmp; + size_t oldoff = inbuf->off; + + /* Swap them directly */ + SWAP(&tmp, outbuf); + SWAP(outbuf, inbuf); + SWAP(inbuf, &tmp); + + /* + * Optimization comes with a price; we need to notify the + * buffer if necessary of the changes. oldoff is the amount + * of data that we tranfered from inbuf to outbuf + */ + if (inbuf->off != oldoff && inbuf->cb != NULL) + (*inbuf->cb)(inbuf, oldoff, inbuf->off, inbuf->cbarg); + if (oldoff && outbuf->cb != NULL) + (*outbuf->cb)(outbuf, 0, oldoff, outbuf->cbarg); + + return (0); + } + + res = evbuffer_add(outbuf, inbuf->buffer, inbuf->off); + if (res == 0) { + /* We drain the input buffer on success */ + evbuffer_drain(inbuf, inbuf->off); + } + + return (res); +} + +int +evbuffer_add_vprintf(struct evbuffer *buf, const char *fmt, va_list ap) +{ + char *buffer; + size_t space; + size_t oldoff = buf->off; + int sz; + va_list aq; + + for (;;) { + buffer = (char *)buf->buffer + buf->off; + space = buf->totallen - buf->misalign - buf->off; + +#ifndef va_copy +#define va_copy(dst, src) memcpy(&(dst), &(src), sizeof(va_list)) +#endif + va_copy(aq, ap); + +#ifdef WIN32 + sz = vsnprintf(buffer, space - 1, fmt, aq); + buffer[space - 1] = '\0'; +#else + sz = vsnprintf(buffer, space, fmt, aq); +#endif + + va_end(aq); + + if (sz == -1) + return (-1); + if (sz < space) { + buf->off += sz; + if (buf->cb != NULL) + (*buf->cb)(buf, oldoff, buf->off, buf->cbarg); + return (sz); + } + if (evbuffer_expand(buf, sz + 1) == -1) + return (-1); + + } + /* NOTREACHED */ +} + +int +evbuffer_add_printf(struct evbuffer *buf, const char *fmt, ...) +{ + int res = -1; + va_list ap; + + va_start(ap, fmt); + res = evbuffer_add_vprintf(buf, fmt, ap); + va_end(ap); + + return (res); +} + +/* Reads data from an event buffer and drains the bytes read */ + +int +evbuffer_remove(struct evbuffer *buf, void *data, size_t datlen) +{ + size_t nread = datlen; + if (nread >= buf->off) + nread = buf->off; + + memcpy(data, buf->buffer, nread); + evbuffer_drain(buf, nread); + + return (nread); +} + +/* + * Reads a line terminated by either '\r\n', '\n\r' or '\r' or '\n'. + * The returned buffer needs to be freed by the called. + */ + +char * +evbuffer_readline(struct evbuffer *buffer) +{ + u_char *data = EVBUFFER_DATA(buffer); + size_t len = EVBUFFER_LENGTH(buffer); + char *line; + unsigned int i; + + for (i = 0; i < len; i++) { + if (data[i] == '\r' || data[i] == '\n') + break; + } + + if (i == len) + return (NULL); + + if ((line = malloc(i + 1)) == NULL) { + fprintf(stderr, "%s: out of memory\n", __func__); + evbuffer_drain(buffer, i); + return (NULL); + } + + memcpy(line, data, i); + line[i] = '\0'; + + /* + * Some protocols terminate a line with '\r\n', so check for + * that, too. + */ + if ( i < len - 1 ) { + char fch = data[i], sch = data[i+1]; + + /* Drain one more character if needed */ + if ( (sch == '\r' || sch == '\n') && sch != fch ) + i += 1; + } + + evbuffer_drain(buffer, i + 1); + + return (line); +} + +/* Adds data to an event buffer */ + +static inline void +evbuffer_align(struct evbuffer *buf) +{ + memmove(buf->orig_buffer, buf->buffer, buf->off); + buf->buffer = buf->orig_buffer; + buf->misalign = 0; +} + +/* Expands the available space in the event buffer to at least datlen */ + +int +evbuffer_expand(struct evbuffer *buf, size_t datlen) +{ + size_t need = buf->misalign + buf->off + datlen; + + /* If we can fit all the data, then we don't have to do anything */ + if (buf->totallen >= need) + return (0); + + /* + * If the misalignment fulfills our data needs, we just force an + * alignment to happen. Afterwards, we have enough space. + */ + if (buf->misalign >= datlen) { + evbuffer_align(buf); + } else { + void *newbuf; + size_t length = buf->totallen; + + if (length < 256) + length = 256; + while (length < need) + length <<= 1; + + if (buf->orig_buffer != buf->buffer) + evbuffer_align(buf); + if ((newbuf = realloc(buf->buffer, length)) == NULL) + return (-1); + + buf->orig_buffer = buf->buffer = newbuf; + buf->totallen = length; + } + + return (0); +} + +int +evbuffer_add(struct evbuffer *buf, const void *data, size_t datlen) +{ + size_t need = buf->misalign + buf->off + datlen; + size_t oldoff = buf->off; + + if (buf->totallen < need) { + if (evbuffer_expand(buf, datlen) == -1) + return (-1); + } + + memcpy(buf->buffer + buf->off, data, datlen); + buf->off += datlen; + + if (datlen && buf->cb != NULL) + (*buf->cb)(buf, oldoff, buf->off, buf->cbarg); + + return (0); +} + +void +evbuffer_drain(struct evbuffer *buf, size_t len) +{ + size_t oldoff = buf->off; + + if (len >= buf->off) { + buf->off = 0; + buf->buffer = buf->orig_buffer; + buf->misalign = 0; + goto done; + } + + buf->buffer += len; + buf->misalign += len; + + buf->off -= len; + + done: + /* Tell someone about changes in this buffer */ + if (buf->off != oldoff && buf->cb != NULL) + (*buf->cb)(buf, oldoff, buf->off, buf->cbarg); + +} + +/* + * Reads data from a file descriptor into a buffer. + */ + +#define EVBUFFER_MAX_READ 4096 + +int +evbuffer_read(struct evbuffer *buf, int fd, int howmuch) +{ + u_char *p; + size_t oldoff = buf->off; + int n = EVBUFFER_MAX_READ; +#ifdef WIN32 + DWORD dwBytesRead; +#endif + +#ifdef FIONREAD + if (ioctl(fd, FIONREAD, &n) == -1 || n == 0) { + n = EVBUFFER_MAX_READ; + } else if (n > EVBUFFER_MAX_READ && n > howmuch) { + /* + * It's possible that a lot of data is available for + * reading. We do not want to exhaust resources + * before the reader has a chance to do something + * about it. If the reader does not tell us how much + * data we should read, we artifically limit it. + */ + if (n > buf->totallen << 2) + n = buf->totallen << 2; + if (n < EVBUFFER_MAX_READ) + n = EVBUFFER_MAX_READ; + } +#endif + if (howmuch < 0 || howmuch > n) + howmuch = n; + + /* If we don't have FIONREAD, we might waste some space here */ + if (evbuffer_expand(buf, howmuch) == -1) + return (-1); + + /* We can append new data at this point */ + p = buf->buffer + buf->off; + +#ifndef WIN32 + n = read(fd, p, howmuch); + if (n == -1) + return (-1); + if (n == 0) + return (0); +#else + n = ReadFile((HANDLE)fd, p, howmuch, &dwBytesRead, NULL); + if (n == 0) + return (-1); + if (dwBytesRead == 0) + return (0); + n = dwBytesRead; +#endif + + buf->off += n; + + /* Tell someone about changes in this buffer */ + if (buf->off != oldoff && buf->cb != NULL) + (*buf->cb)(buf, oldoff, buf->off, buf->cbarg); + + return (n); +} + +int +evbuffer_write(struct evbuffer *buffer, int fd) +{ + int n; +#ifdef WIN32 + DWORD dwBytesWritten; +#endif + +#ifndef WIN32 + n = write(fd, buffer->buffer, buffer->off); + if (n == -1) + return (-1); + if (n == 0) + return (0); +#else + n = WriteFile((HANDLE)fd, buffer->buffer, buffer->off, &dwBytesWritten, NULL); + if (n == 0) + return (-1); + if (dwBytesWritten == 0) + return (0); + n = dwBytesWritten; +#endif + evbuffer_drain(buffer, n); + + return (n); +} + +u_char * +evbuffer_find(struct evbuffer *buffer, const u_char *what, size_t len) +{ + size_t remain = buffer->off; + u_char *search = buffer->buffer; + u_char *p; + + while ((p = memchr(search, *what, remain)) != NULL) { + remain = buffer->off - (size_t)(search - buffer->buffer); + if (remain < len) + break; + if (memcmp(p, what, len) == 0) + return (p); + search = p + 1; + } + + return (NULL); +} + +void evbuffer_setcb(struct evbuffer *buffer, + void (*cb)(struct evbuffer *, size_t, size_t, void *), + void *cbarg) +{ + buffer->cb = cb; + buffer->cbarg = cbarg; +} diff --git a/contrib/pf/libevent/evbuffer.c b/contrib/pf/libevent/evbuffer.c new file mode 100644 index 000000000000..52712bce5856 --- /dev/null +++ b/contrib/pf/libevent/evbuffer.c @@ -0,0 +1,413 @@ +/* + * Copyright (c) 2002-2004 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_SYS_TIME_H +#include +#endif + +#include +#include +#include +#include +#ifdef HAVE_STDARG_H +#include +#endif + +#include "event.h" + +/* prototypes */ + +void bufferevent_setwatermark(struct bufferevent *, short, size_t, size_t); +void bufferevent_read_pressure_cb(struct evbuffer *, size_t, size_t, void *); + +static int +bufferevent_add(struct event *ev, int timeout) +{ + struct timeval tv, *ptv = NULL; + + if (timeout) { + timerclear(&tv); + tv.tv_sec = timeout; + ptv = &tv; + } + + return (event_add(ev, ptv)); +} + +/* + * This callback is executed when the size of the input buffer changes. + * We use it to apply back pressure on the reading side. + */ + +void +bufferevent_read_pressure_cb(struct evbuffer *buf, size_t old, size_t now, + void *arg) { + struct bufferevent *bufev = arg; + /* + * If we are below the watermark then reschedule reading if it's + * still enabled. + */ + if (bufev->wm_read.high == 0 || now < bufev->wm_read.high) { + evbuffer_setcb(buf, NULL, NULL); + + if (bufev->enabled & EV_READ) + bufferevent_add(&bufev->ev_read, bufev->timeout_read); + } +} + +static void +bufferevent_readcb(int fd, short event, void *arg) +{ + struct bufferevent *bufev = arg; + int res = 0; + short what = EVBUFFER_READ; + size_t len; + int howmuch = -1; + + if (event == EV_TIMEOUT) { + what |= EVBUFFER_TIMEOUT; + goto error; + } + + /* + * If we have a high watermark configured then we don't want to + * read more data than would make us reach the watermark. + */ + if (bufev->wm_read.high != 0) + howmuch = bufev->wm_read.high; + + res = evbuffer_read(bufev->input, fd, howmuch); + if (res == -1) { + if (errno == EAGAIN || errno == EINTR) + goto reschedule; + /* error case */ + what |= EVBUFFER_ERROR; + } else if (res == 0) { + /* eof case */ + what |= EVBUFFER_EOF; + } + + if (res <= 0) + goto error; + + bufferevent_add(&bufev->ev_read, bufev->timeout_read); + + /* See if this callbacks meets the water marks */ + len = EVBUFFER_LENGTH(bufev->input); + if (bufev->wm_read.low != 0 && len < bufev->wm_read.low) + return; + if (bufev->wm_read.high != 0 && len > bufev->wm_read.high) { + struct evbuffer *buf = bufev->input; + event_del(&bufev->ev_read); + + /* Now schedule a callback for us */ + evbuffer_setcb(buf, bufferevent_read_pressure_cb, bufev); + return; + } + + /* Invoke the user callback - must always be called last */ + if (bufev->readcb != NULL) + (*bufev->readcb)(bufev, bufev->cbarg); + return; + + reschedule: + bufferevent_add(&bufev->ev_read, bufev->timeout_read); + return; + + error: + (*bufev->errorcb)(bufev, what, bufev->cbarg); +} + +static void +bufferevent_writecb(int fd, short event, void *arg) +{ + struct bufferevent *bufev = arg; + int res = 0; + short what = EVBUFFER_WRITE; + + if (event == EV_TIMEOUT) { + what |= EVBUFFER_TIMEOUT; + goto error; + } + + if (EVBUFFER_LENGTH(bufev->output)) { + res = evbuffer_write(bufev->output, fd); + if (res == -1) { +#ifndef WIN32 +/*todo. evbuffer uses WriteFile when WIN32 is set. WIN32 system calls do not + *set errno. thus this error checking is not portable*/ + if (errno == EAGAIN || + errno == EINTR || + errno == EINPROGRESS) + goto reschedule; + /* error case */ + what |= EVBUFFER_ERROR; + +#else + goto reschedule; +#endif + + } else if (res == 0) { + /* eof case */ + what |= EVBUFFER_EOF; + } + if (res <= 0) + goto error; + } + + if (EVBUFFER_LENGTH(bufev->output) != 0) + bufferevent_add(&bufev->ev_write, bufev->timeout_write); + + /* + * Invoke the user callback if our buffer is drained or below the + * low watermark. + */ + if (bufev->writecb != NULL && + EVBUFFER_LENGTH(bufev->output) <= bufev->wm_write.low) + (*bufev->writecb)(bufev, bufev->cbarg); + + return; + + reschedule: + if (EVBUFFER_LENGTH(bufev->output) != 0) + bufferevent_add(&bufev->ev_write, bufev->timeout_write); + return; + + error: + (*bufev->errorcb)(bufev, what, bufev->cbarg); +} + +/* + * Create a new buffered event object. + * + * The read callback is invoked whenever we read new data. + * The write callback is invoked whenever the output buffer is drained. + * The error callback is invoked on a write/read error or on EOF. + * + * Both read and write callbacks maybe NULL. The error callback is not + * allowed to be NULL and have to be provided always. + */ + +struct bufferevent * +bufferevent_new(int fd, evbuffercb readcb, evbuffercb writecb, + everrorcb errorcb, void *cbarg) +{ + struct bufferevent *bufev; + + if ((bufev = calloc(1, sizeof(struct bufferevent))) == NULL) + return (NULL); + + if ((bufev->input = evbuffer_new()) == NULL) { + free(bufev); + return (NULL); + } + + if ((bufev->output = evbuffer_new()) == NULL) { + evbuffer_free(bufev->input); + free(bufev); + return (NULL); + } + + event_set(&bufev->ev_read, fd, EV_READ, bufferevent_readcb, bufev); + event_set(&bufev->ev_write, fd, EV_WRITE, bufferevent_writecb, bufev); + + bufev->readcb = readcb; + bufev->writecb = writecb; + bufev->errorcb = errorcb; + + bufev->cbarg = cbarg; + + /* + * Set to EV_WRITE so that using bufferevent_write is going to + * trigger a callback. Reading needs to be explicitly enabled + * because otherwise no data will be available. + */ + bufev->enabled = EV_WRITE; + + return (bufev); +} + +int +bufferevent_priority_set(struct bufferevent *bufev, int priority) +{ + if (event_priority_set(&bufev->ev_read, priority) == -1) + return (-1); + if (event_priority_set(&bufev->ev_write, priority) == -1) + return (-1); + + return (0); +} + +/* Closing the file descriptor is the responsibility of the caller */ + +void +bufferevent_free(struct bufferevent *bufev) +{ + event_del(&bufev->ev_read); + event_del(&bufev->ev_write); + + evbuffer_free(bufev->input); + evbuffer_free(bufev->output); + + free(bufev); +} + +/* + * Returns 0 on success; + * -1 on failure. + */ + +int +bufferevent_write(struct bufferevent *bufev, void *data, size_t size) +{ + int res; + + res = evbuffer_add(bufev->output, data, size); + + if (res == -1) + return (res); + + /* If everything is okay, we need to schedule a write */ + if (size > 0 && (bufev->enabled & EV_WRITE)) + bufferevent_add(&bufev->ev_write, bufev->timeout_write); + + return (res); +} + +int +bufferevent_write_buffer(struct bufferevent *bufev, struct evbuffer *buf) +{ + int res; + + res = bufferevent_write(bufev, buf->buffer, buf->off); + if (res != -1) + evbuffer_drain(buf, buf->off); + + return (res); +} + +size_t +bufferevent_read(struct bufferevent *bufev, void *data, size_t size) +{ + struct evbuffer *buf = bufev->input; + + if (buf->off < size) + size = buf->off; + + /* Copy the available data to the user buffer */ + memcpy(data, buf->buffer, size); + + if (size) + evbuffer_drain(buf, size); + + return (size); +} + +int +bufferevent_enable(struct bufferevent *bufev, short event) +{ + if (event & EV_READ) { + if (bufferevent_add(&bufev->ev_read, bufev->timeout_read) == -1) + return (-1); + } + if (event & EV_WRITE) { + if (bufferevent_add(&bufev->ev_write, bufev->timeout_write) == -1) + return (-1); + } + + bufev->enabled |= event; + return (0); +} + +int +bufferevent_disable(struct bufferevent *bufev, short event) +{ + if (event & EV_READ) { + if (event_del(&bufev->ev_read) == -1) + return (-1); + } + if (event & EV_WRITE) { + if (event_del(&bufev->ev_write) == -1) + return (-1); + } + + bufev->enabled &= ~event; + return (0); +} + +/* + * Sets the read and write timeout for a buffered event. + */ + +void +bufferevent_settimeout(struct bufferevent *bufev, + int timeout_read, int timeout_write) { + bufev->timeout_read = timeout_read; + bufev->timeout_write = timeout_write; +} + +/* + * Sets the water marks + */ + +void +bufferevent_setwatermark(struct bufferevent *bufev, short events, + size_t lowmark, size_t highmark) +{ + if (events & EV_READ) { + bufev->wm_read.low = lowmark; + bufev->wm_read.high = highmark; + } + + if (events & EV_WRITE) { + bufev->wm_write.low = lowmark; + bufev->wm_write.high = highmark; + } + + /* If the watermarks changed then see if we should call read again */ + bufferevent_read_pressure_cb(bufev->input, + 0, EVBUFFER_LENGTH(bufev->input), bufev); +} + +int +bufferevent_base_set(struct event_base *base, struct bufferevent *bufev) +{ + int res; + + res = event_base_set(base, &bufev->ev_read); + if (res == -1) + return (res); + + res = event_base_set(base, &bufev->ev_write); + return (res); +} diff --git a/contrib/pf/libevent/event-internal.h b/contrib/pf/libevent/event-internal.h new file mode 100644 index 000000000000..becb6691d52a --- /dev/null +++ b/contrib/pf/libevent/event-internal.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2000-2004 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _EVENT_INTERNAL_H_ +#define _EVENT_INTERNAL_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct event_base { + const struct eventop *evsel; + void *evbase; + int event_count; /* counts number of total events */ + int event_count_active; /* counts number of active events */ + + int event_gotterm; /* Set to terminate loop */ + + /* active event management */ + struct event_list **activequeues; + int nactivequeues; + + struct event_list eventqueue; + struct timeval event_tv; + + RB_HEAD(event_tree, event) timetree; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _EVENT_INTERNAL_H_ */ diff --git a/contrib/pf/libevent/event.c b/contrib/pf/libevent/event.c new file mode 100644 index 000000000000..f6d2b1cc42ca --- /dev/null +++ b/contrib/pf/libevent/event.c @@ -0,0 +1,878 @@ +/* + * Copyright (c) 2000-2004 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#undef WIN32_LEAN_AND_MEAN +#include "misc.h" +#endif +#include +#include +#ifdef HAVE_SYS_TIME_H +#include +#else +#include +#endif +#include +#include +#include +#ifndef WIN32 +#include +#endif +#include +#include +#include +#include + +#include "event.h" +#include "event-internal.h" +#include "log.h" + +#ifdef HAVE_EVENT_PORTS +extern const struct eventop evportops; +#endif +#ifdef HAVE_SELECT +extern const struct eventop selectops; +#endif +#ifdef HAVE_POLL +extern const struct eventop pollops; +#endif +#ifdef HAVE_RTSIG +extern const struct eventop rtsigops; +#endif +#ifdef HAVE_EPOLL +extern const struct eventop epollops; +#endif +#ifdef HAVE_WORKING_KQUEUE +extern const struct eventop kqops; +#endif +#ifdef HAVE_DEVPOLL +extern const struct eventop devpollops; +#endif +#ifdef WIN32 +extern const struct eventop win32ops; +#endif + +/* In order of preference */ +const struct eventop *eventops[] = { +#ifdef HAVE_EVENT_PORTS + &evportops, +#endif +#ifdef HAVE_WORKING_KQUEUE + &kqops, +#endif +#ifdef HAVE_EPOLL + &epollops, +#endif +#ifdef HAVE_DEVPOLL + &devpollops, +#endif +#ifdef HAVE_RTSIG + &rtsigops, +#endif +#ifdef HAVE_POLL + &pollops, +#endif +#ifdef HAVE_SELECT + &selectops, +#endif +#ifdef WIN32 + &win32ops, +#endif + NULL +}; + +/* Global state */ +struct event_list signalqueue; + +struct event_base *current_base = NULL; + +/* Handle signals - This is a deprecated interface */ +int (*event_sigcb)(void); /* Signal callback when gotsig is set */ +volatile sig_atomic_t event_gotsig; /* Set in signal handler */ + +/* Prototypes */ +static void event_queue_insert(struct event_base *, struct event *, int); +static void event_queue_remove(struct event_base *, struct event *, int); +static int event_haveevents(struct event_base *); + +static void event_process_active(struct event_base *); + +static int timeout_next(struct event_base *, struct timeval *); +static void timeout_process(struct event_base *); +static void timeout_correct(struct event_base *, struct timeval *); + +static int +compare(struct event *a, struct event *b) +{ + if (timercmp(&a->ev_timeout, &b->ev_timeout, <)) + return (-1); + else if (timercmp(&a->ev_timeout, &b->ev_timeout, >)) + return (1); + if (a < b) + return (-1); + else if (a > b) + return (1); + return (0); +} + +static int +gettime(struct timeval *tp) +{ +#ifdef HAVE_CLOCK_GETTIME + struct timespec ts; + +#ifdef HAVE_CLOCK_MONOTONIC + if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1) +#else + if (clock_gettime(CLOCK_REALTIME, &ts) == -1) +#endif + return (-1); + tp->tv_sec = ts.tv_sec; + tp->tv_usec = ts.tv_nsec / 1000; +#else + gettimeofday(tp, NULL); +#endif + + return (0); +} + +RB_PROTOTYPE(event_tree, event, ev_timeout_node, compare); + +RB_GENERATE(event_tree, event, ev_timeout_node, compare); + + +void * +event_init(void) +{ + int i; + + if ((current_base = calloc(1, sizeof(struct event_base))) == NULL) + event_err(1, "%s: calloc"); + + event_sigcb = NULL; + event_gotsig = 0; + gettime(¤t_base->event_tv); + + RB_INIT(¤t_base->timetree); + TAILQ_INIT(¤t_base->eventqueue); + TAILQ_INIT(&signalqueue); + + current_base->evbase = NULL; + for (i = 0; eventops[i] && !current_base->evbase; i++) { + current_base->evsel = eventops[i]; + + current_base->evbase = current_base->evsel->init(); + } + + if (current_base->evbase == NULL) + event_errx(1, "%s: no event mechanism available", __func__); + + if (getenv("EVENT_SHOW_METHOD")) + event_msgx("libevent using: %s\n", + current_base->evsel->name); + + /* allocate a single active event queue */ + event_base_priority_init(current_base, 1); + + return (current_base); +} + +void +event_base_free(struct event_base *base) +{ + int i; + + if (base == NULL && current_base) + base = current_base; + if (base == current_base) + current_base = NULL; + + assert(base); + assert(TAILQ_EMPTY(&base->eventqueue)); + for (i=0; i < base->nactivequeues; ++i) + assert(TAILQ_EMPTY(base->activequeues[i])); + + assert(RB_EMPTY(&base->timetree)); + + for (i = 0; i < base->nactivequeues; ++i) + free(base->activequeues[i]); + free(base->activequeues); + + if (base->evsel->dealloc != NULL) + base->evsel->dealloc(base->evbase); + + free(base); +} + +int +event_priority_init(int npriorities) +{ + return event_base_priority_init(current_base, npriorities); +} + +int +event_base_priority_init(struct event_base *base, int npriorities) +{ + int i; + + if (base->event_count_active) + return (-1); + + if (base->nactivequeues && npriorities != base->nactivequeues) { + for (i = 0; i < base->nactivequeues; ++i) { + free(base->activequeues[i]); + } + free(base->activequeues); + } + + /* Allocate our priority queues */ + base->nactivequeues = npriorities; + base->activequeues = (struct event_list **)calloc(base->nactivequeues, + npriorities * sizeof(struct event_list *)); + if (base->activequeues == NULL) + event_err(1, "%s: calloc", __func__); + + for (i = 0; i < base->nactivequeues; ++i) { + base->activequeues[i] = malloc(sizeof(struct event_list)); + if (base->activequeues[i] == NULL) + event_err(1, "%s: malloc", __func__); + TAILQ_INIT(base->activequeues[i]); + } + + return (0); +} + +int +event_haveevents(struct event_base *base) +{ + return (base->event_count > 0); +} + +/* + * Active events are stored in priority queues. Lower priorities are always + * process before higher priorities. Low priority events can starve high + * priority ones. + */ + +static void +event_process_active(struct event_base *base) +{ + struct event *ev; + struct event_list *activeq = NULL; + int i; + short ncalls; + + if (!base->event_count_active) + return; + + for (i = 0; i < base->nactivequeues; ++i) { + if (TAILQ_FIRST(base->activequeues[i]) != NULL) { + activeq = base->activequeues[i]; + break; + } + } + + assert(activeq != NULL); + + for (ev = TAILQ_FIRST(activeq); ev; ev = TAILQ_FIRST(activeq)) { + event_queue_remove(base, ev, EVLIST_ACTIVE); + + /* Allows deletes to work */ + ncalls = ev->ev_ncalls; + ev->ev_pncalls = &ncalls; + while (ncalls) { + ncalls--; + ev->ev_ncalls = ncalls; + (*ev->ev_callback)((int)ev->ev_fd, ev->ev_res, ev->ev_arg); + if (event_gotsig) + return; + } + } +} + +/* + * Wait continously for events. We exit only if no events are left. + */ + +int +event_dispatch(void) +{ + return (event_loop(0)); +} + +int +event_base_dispatch(struct event_base *event_base) +{ + return (event_base_loop(event_base, 0)); +} + +static void +event_loopexit_cb(int fd, short what, void *arg) +{ + struct event_base *base = arg; + base->event_gotterm = 1; +} + +/* not thread safe */ + +int +event_loopexit(struct timeval *tv) +{ + return (event_once(-1, EV_TIMEOUT, event_loopexit_cb, + current_base, tv)); +} + +int +event_base_loopexit(struct event_base *event_base, struct timeval *tv) +{ + return (event_once(-1, EV_TIMEOUT, event_loopexit_cb, + event_base, tv)); +} + +/* not thread safe */ + +int +event_loop(int flags) +{ + return event_base_loop(current_base, flags); +} + +int +event_base_loop(struct event_base *base, int flags) +{ + const struct eventop *evsel = base->evsel; + void *evbase = base->evbase; + struct timeval tv; + int res, done; + + done = 0; + while (!done) { + /* Calculate the initial events that we are waiting for */ + if (evsel->recalc(base, evbase, 0) == -1) + return (-1); + + /* Terminate the loop if we have been asked to */ + if (base->event_gotterm) { + base->event_gotterm = 0; + break; + } + + /* You cannot use this interface for multi-threaded apps */ + while (event_gotsig) { + event_gotsig = 0; + if (event_sigcb) { + res = (*event_sigcb)(); + if (res == -1) { + errno = EINTR; + return (-1); + } + } + } + + /* Check if time is running backwards */ + gettime(&tv); + if (timercmp(&tv, &base->event_tv, <)) { + struct timeval off; + event_debug(("%s: time is running backwards, corrected", + __func__)); + timersub(&base->event_tv, &tv, &off); + timeout_correct(base, &off); + } + base->event_tv = tv; + + if (!base->event_count_active && !(flags & EVLOOP_NONBLOCK)) + timeout_next(base, &tv); + else + timerclear(&tv); + + /* If we have no events, we just exit */ + if (!event_haveevents(base)) { + event_debug(("%s: no events registered.", __func__)); + return (1); + } + + res = evsel->dispatch(base, evbase, &tv); + + if (res == -1) + return (-1); + + timeout_process(base); + + if (base->event_count_active) { + event_process_active(base); + if (!base->event_count_active && (flags & EVLOOP_ONCE)) + done = 1; + } else if (flags & EVLOOP_NONBLOCK) + done = 1; + } + + event_debug(("%s: asked to terminate loop.", __func__)); + return (0); +} + +/* Sets up an event for processing once */ + +struct event_once { + struct event ev; + + void (*cb)(int, short, void *); + void *arg; +}; + +/* One-time callback, it deletes itself */ + +static void +event_once_cb(int fd, short events, void *arg) +{ + struct event_once *eonce = arg; + + (*eonce->cb)(fd, events, eonce->arg); + free(eonce); +} + +/* Schedules an event once */ + +int +event_once(int fd, short events, + void (*callback)(int, short, void *), void *arg, struct timeval *tv) +{ + struct event_once *eonce; + struct timeval etv; + int res; + + /* We cannot support signals that just fire once */ + if (events & EV_SIGNAL) + return (-1); + + if ((eonce = calloc(1, sizeof(struct event_once))) == NULL) + return (-1); + + eonce->cb = callback; + eonce->arg = arg; + + if (events == EV_TIMEOUT) { + if (tv == NULL) { + timerclear(&etv); + tv = &etv; + } + + evtimer_set(&eonce->ev, event_once_cb, eonce); + } else if (events & (EV_READ|EV_WRITE)) { + events &= EV_READ|EV_WRITE; + + event_set(&eonce->ev, fd, events, event_once_cb, eonce); + } else { + /* Bad event combination */ + free(eonce); + return (-1); + } + + res = event_add(&eonce->ev, tv); + if (res != 0) { + free(eonce); + return (res); + } + + return (0); +} + +void +event_set(struct event *ev, int fd, short events, + void (*callback)(int, short, void *), void *arg) +{ + /* Take the current base - caller needs to set the real base later */ + ev->ev_base = current_base; + + ev->ev_callback = callback; + ev->ev_arg = arg; + ev->ev_fd = fd; + ev->ev_events = events; + ev->ev_flags = EVLIST_INIT; + ev->ev_ncalls = 0; + ev->ev_pncalls = NULL; + + /* by default, we put new events into the middle priority */ + ev->ev_pri = current_base->nactivequeues/2; +} + +int +event_base_set(struct event_base *base, struct event *ev) +{ + /* Only innocent events may be assigned to a different base */ + if (ev->ev_flags != EVLIST_INIT) + return (-1); + + ev->ev_base = base; + ev->ev_pri = base->nactivequeues/2; + + return (0); +} + +/* + * Set's the priority of an event - if an event is already scheduled + * changing the priority is going to fail. + */ + +int +event_priority_set(struct event *ev, int pri) +{ + if (ev->ev_flags & EVLIST_ACTIVE) + return (-1); + if (pri < 0 || pri >= ev->ev_base->nactivequeues) + return (-1); + + ev->ev_pri = pri; + + return (0); +} + +/* + * Checks if a specific event is pending or scheduled. + */ + +int +event_pending(struct event *ev, short event, struct timeval *tv) +{ + struct timeval now, res; + int flags = 0; + + if (ev->ev_flags & EVLIST_INSERTED) + flags |= (ev->ev_events & (EV_READ|EV_WRITE)); + if (ev->ev_flags & EVLIST_ACTIVE) + flags |= ev->ev_res; + if (ev->ev_flags & EVLIST_TIMEOUT) + flags |= EV_TIMEOUT; + if (ev->ev_flags & EVLIST_SIGNAL) + flags |= EV_SIGNAL; + + event &= (EV_TIMEOUT|EV_READ|EV_WRITE|EV_SIGNAL); + + /* See if there is a timeout that we should report */ + if (tv != NULL && (flags & event & EV_TIMEOUT)) { + gettime(&now); + timersub(&ev->ev_timeout, &now, &res); + /* correctly remap to real time */ + gettimeofday(&now, NULL); + timeradd(&now, &res, tv); + } + + return (flags & event); +} + +int +event_add(struct event *ev, struct timeval *tv) +{ + struct event_base *base = ev->ev_base; + const struct eventop *evsel = base->evsel; + void *evbase = base->evbase; + + event_debug(( + "event_add: event: %p, %s%s%scall %p", + ev, + ev->ev_events & EV_READ ? "EV_READ " : " ", + ev->ev_events & EV_WRITE ? "EV_WRITE " : " ", + tv ? "EV_TIMEOUT " : " ", + ev->ev_callback)); + + assert(!(ev->ev_flags & ~EVLIST_ALL)); + + if (tv != NULL) { + struct timeval now; + + if (ev->ev_flags & EVLIST_TIMEOUT) + event_queue_remove(base, ev, EVLIST_TIMEOUT); + + /* Check if it is active due to a timeout. Rescheduling + * this timeout before the callback can be executed + * removes it from the active list. */ + if ((ev->ev_flags & EVLIST_ACTIVE) && + (ev->ev_res & EV_TIMEOUT)) { + /* See if we are just active executing this + * event in a loop + */ + if (ev->ev_ncalls && ev->ev_pncalls) { + /* Abort loop */ + *ev->ev_pncalls = 0; + } + + event_queue_remove(base, ev, EVLIST_ACTIVE); + } + + gettime(&now); + timeradd(&now, tv, &ev->ev_timeout); + + event_debug(( + "event_add: timeout in %d seconds, call %p", + tv->tv_sec, ev->ev_callback)); + + event_queue_insert(base, ev, EVLIST_TIMEOUT); + } + + if ((ev->ev_events & (EV_READ|EV_WRITE)) && + !(ev->ev_flags & (EVLIST_INSERTED|EVLIST_ACTIVE))) { + event_queue_insert(base, ev, EVLIST_INSERTED); + + return (evsel->add(evbase, ev)); + } else if ((ev->ev_events & EV_SIGNAL) && + !(ev->ev_flags & EVLIST_SIGNAL)) { + event_queue_insert(base, ev, EVLIST_SIGNAL); + + return (evsel->add(evbase, ev)); + } + + return (0); +} + +int +event_del(struct event *ev) +{ + struct event_base *base; + const struct eventop *evsel; + void *evbase; + + event_debug(("event_del: %p, callback %p", + ev, ev->ev_callback)); + + /* An event without a base has not been added */ + if (ev->ev_base == NULL) + return (-1); + + base = ev->ev_base; + evsel = base->evsel; + evbase = base->evbase; + + assert(!(ev->ev_flags & ~EVLIST_ALL)); + + /* See if we are just active executing this event in a loop */ + if (ev->ev_ncalls && ev->ev_pncalls) { + /* Abort loop */ + *ev->ev_pncalls = 0; + } + + if (ev->ev_flags & EVLIST_TIMEOUT) + event_queue_remove(base, ev, EVLIST_TIMEOUT); + + if (ev->ev_flags & EVLIST_ACTIVE) + event_queue_remove(base, ev, EVLIST_ACTIVE); + + if (ev->ev_flags & EVLIST_INSERTED) { + event_queue_remove(base, ev, EVLIST_INSERTED); + return (evsel->del(evbase, ev)); + } else if (ev->ev_flags & EVLIST_SIGNAL) { + event_queue_remove(base, ev, EVLIST_SIGNAL); + return (evsel->del(evbase, ev)); + } + + return (0); +} + +void +event_active(struct event *ev, int res, short ncalls) +{ + /* We get different kinds of events, add them together */ + if (ev->ev_flags & EVLIST_ACTIVE) { + ev->ev_res |= res; + return; + } + + ev->ev_res = res; + ev->ev_ncalls = ncalls; + ev->ev_pncalls = NULL; + event_queue_insert(ev->ev_base, ev, EVLIST_ACTIVE); +} + +int +timeout_next(struct event_base *base, struct timeval *tv) +{ + struct timeval dflt = TIMEOUT_DEFAULT; + + struct timeval now; + struct event *ev; + + if ((ev = RB_MIN(event_tree, &base->timetree)) == NULL) { + *tv = dflt; + return (0); + } + + if (gettime(&now) == -1) + return (-1); + + if (timercmp(&ev->ev_timeout, &now, <=)) { + timerclear(tv); + return (0); + } + + timersub(&ev->ev_timeout, &now, tv); + + assert(tv->tv_sec >= 0); + assert(tv->tv_usec >= 0); + + event_debug(("timeout_next: in %d seconds", tv->tv_sec)); + return (0); +} + +static void +timeout_correct(struct event_base *base, struct timeval *off) +{ + struct event *ev; + + /* + * We can modify the key element of the node without destroying + * the key, beause we apply it to all in the right order. + */ + RB_FOREACH(ev, event_tree, &base->timetree) + timersub(&ev->ev_timeout, off, &ev->ev_timeout); +} + +void +timeout_process(struct event_base *base) +{ + struct timeval now; + struct event *ev, *next; + + gettime(&now); + + for (ev = RB_MIN(event_tree, &base->timetree); ev; ev = next) { + if (timercmp(&ev->ev_timeout, &now, >)) + break; + next = RB_NEXT(event_tree, &base->timetree, ev); + + event_queue_remove(base, ev, EVLIST_TIMEOUT); + + /* delete this event from the I/O queues */ + event_del(ev); + + event_debug(("timeout_process: call %p", + ev->ev_callback)); + event_active(ev, EV_TIMEOUT, 1); + } +} + +void +event_queue_remove(struct event_base *base, struct event *ev, int queue) +{ + int docount = 1; + + if (!(ev->ev_flags & queue)) + event_errx(1, "%s: %p(fd %d) not on queue %x", __func__, + ev, ev->ev_fd, queue); + + if (ev->ev_flags & EVLIST_INTERNAL) + docount = 0; + + if (docount) + base->event_count--; + + ev->ev_flags &= ~queue; + switch (queue) { + case EVLIST_ACTIVE: + if (docount) + base->event_count_active--; + TAILQ_REMOVE(base->activequeues[ev->ev_pri], + ev, ev_active_next); + break; + case EVLIST_SIGNAL: + TAILQ_REMOVE(&signalqueue, ev, ev_signal_next); + break; + case EVLIST_TIMEOUT: + RB_REMOVE(event_tree, &base->timetree, ev); + break; + case EVLIST_INSERTED: + TAILQ_REMOVE(&base->eventqueue, ev, ev_next); + break; + default: + event_errx(1, "%s: unknown queue %x", __func__, queue); + } +} + +void +event_queue_insert(struct event_base *base, struct event *ev, int queue) +{ + int docount = 1; + + if (ev->ev_flags & queue) { + /* Double insertion is possible for active events */ + if (queue & EVLIST_ACTIVE) + return; + + event_errx(1, "%s: %p(fd %d) already on queue %x", __func__, + ev, ev->ev_fd, queue); + } + + if (ev->ev_flags & EVLIST_INTERNAL) + docount = 0; + + if (docount) + base->event_count++; + + ev->ev_flags |= queue; + switch (queue) { + case EVLIST_ACTIVE: + if (docount) + base->event_count_active++; + TAILQ_INSERT_TAIL(base->activequeues[ev->ev_pri], + ev,ev_active_next); + break; + case EVLIST_SIGNAL: + TAILQ_INSERT_TAIL(&signalqueue, ev, ev_signal_next); + break; + case EVLIST_TIMEOUT: { + struct event *tmp = RB_INSERT(event_tree, &base->timetree, ev); + assert(tmp == NULL); + break; + } + case EVLIST_INSERTED: + TAILQ_INSERT_TAIL(&base->eventqueue, ev, ev_next); + break; + default: + event_errx(1, "%s: unknown queue %x", __func__, queue); + } +} + +/* Functions for debugging */ + +const char * +event_get_version(void) +{ + return (VERSION); +} + +/* + * No thread-safe interface needed - the information should be the same + * for all threads. + */ + +const char * +event_get_method(void) +{ + return (current_base->evsel->name); +} diff --git a/contrib/pf/libevent/event.h b/contrib/pf/libevent/event.h new file mode 100644 index 000000000000..3f2032dd068e --- /dev/null +++ b/contrib/pf/libevent/event.h @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2000-2004 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _EVENT_H_ +#define _EVENT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#undef WIN32_LEAN_AND_MEAN +typedef unsigned char u_char; +typedef unsigned short u_short; +#endif + +#define EVLIST_TIMEOUT 0x01 +#define EVLIST_INSERTED 0x02 +#define EVLIST_SIGNAL 0x04 +#define EVLIST_ACTIVE 0x08 +#define EVLIST_INTERNAL 0x10 +#define EVLIST_INIT 0x80 + +/* EVLIST_X_ Private space: 0x1000-0xf000 */ +#define EVLIST_ALL (0xf000 | 0x9f) + +#define EV_TIMEOUT 0x01 +#define EV_READ 0x02 +#define EV_WRITE 0x04 +#define EV_SIGNAL 0x08 +#define EV_PERSIST 0x10 /* Persistant event */ + +/* Fix so that ppl dont have to run with */ +#ifndef TAILQ_ENTRY +#define _EVENT_DEFINED_TQENTRY +#define TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ +} +#endif /* !TAILQ_ENTRY */ +#ifndef RB_ENTRY +#define _EVENT_DEFINED_RBENTRY +#define RB_ENTRY(type) \ +struct { \ + struct type *rbe_left; /* left element */ \ + struct type *rbe_right; /* right element */ \ + struct type *rbe_parent; /* parent element */ \ + int rbe_color; /* node color */ \ +} +#endif /* !RB_ENTRY */ + +struct event_base; +struct event { + TAILQ_ENTRY (event) ev_next; + TAILQ_ENTRY (event) ev_active_next; + TAILQ_ENTRY (event) ev_signal_next; + RB_ENTRY (event) ev_timeout_node; + + struct event_base *ev_base; + int ev_fd; + short ev_events; + short ev_ncalls; + short *ev_pncalls; /* Allows deletes in callback */ + + struct timeval ev_timeout; + + int ev_pri; /* smaller numbers are higher priority */ + + void (*ev_callback)(int, short, void *arg); + void *ev_arg; + + int ev_res; /* result passed to event callback */ + int ev_flags; +}; + +#define EVENT_SIGNAL(ev) (int)(ev)->ev_fd +#define EVENT_FD(ev) (int)(ev)->ev_fd + +/* + * Key-Value pairs. Can be used for HTTP headers but also for + * query argument parsing. + */ +struct evkeyval { + TAILQ_ENTRY(evkeyval) next; + + char *key; + char *value; +}; + +#ifdef _EVENT_DEFINED_TQENTRY +#undef TAILQ_ENTRY +struct event_list; +struct evkeyvalq; +#undef _EVENT_DEFINED_TQENTRY +#else +TAILQ_HEAD (event_list, event); +TAILQ_HEAD (evkeyvalq, evkeyval); +#endif /* _EVENT_DEFINED_TQENTRY */ +#ifdef _EVENT_DEFINED_RBENTRY +#undef RB_ENTRY +#undef _EVENT_DEFINED_RBENTRY +#endif /* _EVENT_DEFINED_RBENTRY */ + +struct eventop { + char *name; + void *(*init)(void); + int (*add)(void *, struct event *); + int (*del)(void *, struct event *); + int (*recalc)(struct event_base *, void *, int); + int (*dispatch)(struct event_base *, void *, struct timeval *); + void (*dealloc)(void *); +}; + +#define TIMEOUT_DEFAULT {5, 0} + +void *event_init(void); +int event_dispatch(void); +int event_base_dispatch(struct event_base *); +void event_base_free(struct event_base *); + +#define _EVENT_LOG_DEBUG 0 +#define _EVENT_LOG_MSG 1 +#define _EVENT_LOG_WARN 2 +#define _EVENT_LOG_ERR 3 +typedef void (*event_log_cb)(int severity, const char *msg); +void event_set_log_callback(event_log_cb cb); + +/* Associate a different event base with an event */ +int event_base_set(struct event_base *, struct event *); + +#define EVLOOP_ONCE 0x01 +#define EVLOOP_NONBLOCK 0x02 +int event_loop(int); +int event_base_loop(struct event_base *, int); +int event_loopexit(struct timeval *); /* Causes the loop to exit */ +int event_base_loopexit(struct event_base *, struct timeval *); + +#define evtimer_add(ev, tv) event_add(ev, tv) +#define evtimer_set(ev, cb, arg) event_set(ev, -1, 0, cb, arg) +#define evtimer_del(ev) event_del(ev) +#define evtimer_pending(ev, tv) event_pending(ev, EV_TIMEOUT, tv) +#define evtimer_initialized(ev) ((ev)->ev_flags & EVLIST_INIT) + +#define timeout_add(ev, tv) event_add(ev, tv) +#define timeout_set(ev, cb, arg) event_set(ev, -1, 0, cb, arg) +#define timeout_del(ev) event_del(ev) +#define timeout_pending(ev, tv) event_pending(ev, EV_TIMEOUT, tv) +#define timeout_initialized(ev) ((ev)->ev_flags & EVLIST_INIT) + +#define signal_add(ev, tv) event_add(ev, tv) +#define signal_set(ev, x, cb, arg) \ + event_set(ev, x, EV_SIGNAL|EV_PERSIST, cb, arg) +#define signal_del(ev) event_del(ev) +#define signal_pending(ev, tv) event_pending(ev, EV_SIGNAL, tv) +#define signal_initialized(ev) ((ev)->ev_flags & EVLIST_INIT) + +void event_set(struct event *, int, short, void (*)(int, short, void *), void *); +int event_once(int, short, void (*)(int, short, void *), void *, struct timeval *); + +int event_add(struct event *, struct timeval *); +int event_del(struct event *); +void event_active(struct event *, int, short); + +int event_pending(struct event *, short, struct timeval *); + +#ifdef WIN32 +#define event_initialized(ev) ((ev)->ev_flags & EVLIST_INIT && (ev)->ev_fd != (int)INVALID_HANDLE_VALUE) +#else +#define event_initialized(ev) ((ev)->ev_flags & EVLIST_INIT) +#endif + +/* Some simple debugging functions */ +const char *event_get_version(void); +const char *event_get_method(void); + +/* These functions deal with event priorities */ + +int event_priority_init(int); +int event_base_priority_init(struct event_base *, int); +int event_priority_set(struct event *, int); + +/* These functions deal with buffering input and output */ + +struct evbuffer { + u_char *buffer; + u_char *orig_buffer; + + size_t misalign; + size_t totallen; + size_t off; + + void (*cb)(struct evbuffer *, size_t, size_t, void *); + void *cbarg; +}; + +/* Just for error reporting - use other constants otherwise */ +#define EVBUFFER_READ 0x01 +#define EVBUFFER_WRITE 0x02 +#define EVBUFFER_EOF 0x10 +#define EVBUFFER_ERROR 0x20 +#define EVBUFFER_TIMEOUT 0x40 + +struct bufferevent; +typedef void (*evbuffercb)(struct bufferevent *, void *); +typedef void (*everrorcb)(struct bufferevent *, short what, void *); + +struct event_watermark { + size_t low; + size_t high; +}; + +struct bufferevent { + struct event ev_read; + struct event ev_write; + + struct evbuffer *input; + struct evbuffer *output; + + struct event_watermark wm_read; + struct event_watermark wm_write; + + evbuffercb readcb; + evbuffercb writecb; + everrorcb errorcb; + void *cbarg; + + int timeout_read; /* in seconds */ + int timeout_write; /* in seconds */ + + short enabled; /* events that are currently enabled */ +}; + +struct bufferevent *bufferevent_new(int fd, + evbuffercb readcb, evbuffercb writecb, everrorcb errorcb, void *cbarg); +int bufferevent_base_set(struct event_base *base, struct bufferevent *bufev); +int bufferevent_priority_set(struct bufferevent *bufev, int pri); +void bufferevent_free(struct bufferevent *bufev); +int bufferevent_write(struct bufferevent *bufev, void *data, size_t size); +int bufferevent_write_buffer(struct bufferevent *bufev, struct evbuffer *buf); +size_t bufferevent_read(struct bufferevent *bufev, void *data, size_t size); +int bufferevent_enable(struct bufferevent *bufev, short event); +int bufferevent_disable(struct bufferevent *bufev, short event); +void bufferevent_settimeout(struct bufferevent *bufev, + int timeout_read, int timeout_write); + +#define EVBUFFER_LENGTH(x) (x)->off +#define EVBUFFER_DATA(x) (x)->buffer +#define EVBUFFER_INPUT(x) (x)->input +#define EVBUFFER_OUTPUT(x) (x)->output + +struct evbuffer *evbuffer_new(void); +void evbuffer_free(struct evbuffer *); +int evbuffer_expand(struct evbuffer *, size_t); +int evbuffer_add(struct evbuffer *, const void *, size_t); +int evbuffer_remove(struct evbuffer *, void *, size_t); +char *evbuffer_readline(struct evbuffer *); +int evbuffer_add_buffer(struct evbuffer *, struct evbuffer *); +int evbuffer_add_printf(struct evbuffer *, const char *fmt, ...); +int evbuffer_add_vprintf(struct evbuffer *, const char *fmt, va_list ap); +void evbuffer_drain(struct evbuffer *, size_t); +int evbuffer_write(struct evbuffer *, int); +int evbuffer_read(struct evbuffer *, int, int); +u_char *evbuffer_find(struct evbuffer *, const u_char *, size_t); +void evbuffer_setcb(struct evbuffer *, void (*)(struct evbuffer *, size_t, size_t, void *), void *); + +/* + * Marshaling tagged data - We assume that all tags are inserted in their + * numeric order - so that unknown tags will always be higher than the + * known ones - and we can just ignore the end of an event buffer. + */ + +void evtag_init(void); + +void evtag_marshal(struct evbuffer *evbuf, u_int8_t tag, const void *data, + u_int32_t len); + +void encode_int(struct evbuffer *evbuf, u_int32_t number); + +void evtag_marshal_int(struct evbuffer *evbuf, u_int8_t tag, + u_int32_t integer); + +void evtag_marshal_string(struct evbuffer *buf, u_int8_t tag, + const char *string); + +void evtag_marshal_timeval(struct evbuffer *evbuf, u_int8_t tag, + struct timeval *tv); + +void evtag_test(void); + +int evtag_unmarshal(struct evbuffer *src, u_int8_t *ptag, + struct evbuffer *dst); +int evtag_peek(struct evbuffer *evbuf, u_int8_t *ptag); +int evtag_peek_length(struct evbuffer *evbuf, u_int32_t *plength); +int evtag_payload_length(struct evbuffer *evbuf, u_int32_t *plength); +int evtag_consume(struct evbuffer *evbuf); + +int evtag_unmarshal_int(struct evbuffer *evbuf, u_int8_t need_tag, + u_int32_t *pinteger); + +int evtag_unmarshal_fixed(struct evbuffer *src, u_int8_t need_tag, void *data, + size_t len); + +int evtag_unmarshal_string(struct evbuffer *evbuf, u_int8_t need_tag, + char **pstring); + +int evtag_unmarshal_timeval(struct evbuffer *evbuf, u_int8_t need_tag, + struct timeval *ptv); + +#ifdef __cplusplus +} +#endif + +#endif /* _EVENT_H_ */ diff --git a/contrib/pf/libevent/evsignal.h b/contrib/pf/libevent/evsignal.h new file mode 100644 index 000000000000..5b92bd6e55fb --- /dev/null +++ b/contrib/pf/libevent/evsignal.h @@ -0,0 +1,35 @@ +/* + * Copyright 2000-2002 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _EVSIGNAL_H_ +#define _EVSIGNAL_H_ + +void evsignal_init(void); +void evsignal_process(void); +int evsignal_add(struct event *); +int evsignal_del(struct event *); + +#endif /* _EVSIGNAL_H_ */ diff --git a/contrib/pf/libevent/kqueue.c b/contrib/pf/libevent/kqueue.c new file mode 100644 index 000000000000..08369c6dd51d --- /dev/null +++ b/contrib/pf/libevent/kqueue.c @@ -0,0 +1,413 @@ +/* $OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $ */ + +/* + * Copyright 2000-2002 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#ifdef HAVE_SYS_TIME_H +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_INTTYPES_H +#include +#endif + +#if defined(HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) +#define INTPTR(x) (intptr_t)x +#else +#define INTPTR(x) x +#endif + +#include "event.h" +#include "log.h" + +#define EVLIST_X_KQINKERNEL 0x1000 + +#define NEVENT 64 + +struct kqop { + struct kevent *changes; + int nchanges; + struct kevent *events; + int nevents; + int kq; +}; + +void *kq_init (void); +int kq_add (void *, struct event *); +int kq_del (void *, struct event *); +int kq_recalc (struct event_base *, void *, int); +int kq_dispatch (struct event_base *, void *, struct timeval *); +int kq_insert (struct kqop *, struct kevent *); +void kq_dealloc (void *); + +const struct eventop kqops = { + "kqueue", + kq_init, + kq_add, + kq_del, + kq_recalc, + kq_dispatch, + kq_dealloc +}; + +void * +kq_init(void) +{ + int kq; + struct kqop *kqueueop; + + /* Disable kqueue when this environment variable is set */ + if (getenv("EVENT_NOKQUEUE")) + return (NULL); + + if (!(kqueueop = calloc(1, sizeof(struct kqop)))) + return (NULL); + + /* Initalize the kernel queue */ + + if ((kq = kqueue()) == -1) { + event_warn("kqueue"); + free (kqueueop); + return (NULL); + } + + kqueueop->kq = kq; + + /* Initalize fields */ + kqueueop->changes = malloc(NEVENT * sizeof(struct kevent)); + if (kqueueop->changes == NULL) { + free (kqueueop); + return (NULL); + } + kqueueop->events = malloc(NEVENT * sizeof(struct kevent)); + if (kqueueop->events == NULL) { + free (kqueueop->changes); + free (kqueueop); + return (NULL); + } + kqueueop->nevents = NEVENT; + + /* Check for Mac OS X kqueue bug. */ + kqueueop->changes[0].ident = -1; + kqueueop->changes[0].filter = EVFILT_READ; + kqueueop->changes[0].flags = EV_ADD; + /* + * If kqueue works, then kevent will succeed, and it will + * stick an error in events[0]. If kqueue is broken, then + * kevent will fail. + */ + if (kevent(kq, + kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 || + kqueueop->events[0].ident != -1 || + kqueueop->events[0].flags != EV_ERROR) { + event_warn("%s: detected broken kqueue; not using.", __func__); + free(kqueueop->changes); + free(kqueueop->events); + free(kqueueop); + close(kq); + return (NULL); + } + + return (kqueueop); +} + +int +kq_recalc(struct event_base *base, void *arg, int max) +{ + return (0); +} + +int +kq_insert(struct kqop *kqop, struct kevent *kev) +{ + int nevents = kqop->nevents; + + if (kqop->nchanges == nevents) { + struct kevent *newchange; + struct kevent *newresult; + + nevents *= 2; + + newchange = realloc(kqop->changes, + nevents * sizeof(struct kevent)); + if (newchange == NULL) { + event_warn("%s: malloc", __func__); + return (-1); + } + kqop->changes = newchange; + + newresult = realloc(kqop->events, + nevents * sizeof(struct kevent)); + + /* + * If we fail, we don't have to worry about freeing, + * the next realloc will pick it up. + */ + if (newresult == NULL) { + event_warn("%s: malloc", __func__); + return (-1); + } + kqop->events = newresult; + + kqop->nevents = nevents; + } + + memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent)); + + event_debug(("%s: fd %d %s%s", + __func__, kev->ident, + kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE", + kev->flags == EV_DELETE ? " (del)" : "")); + + return (0); +} + +static void +kq_sighandler(int sig) +{ + /* Do nothing here */ +} + +int +kq_dispatch(struct event_base *base, void *arg, struct timeval *tv) +{ + struct kqop *kqop = arg; + struct kevent *changes = kqop->changes; + struct kevent *events = kqop->events; + struct event *ev; + struct timespec ts; + int i, res; + + TIMEVAL_TO_TIMESPEC(tv, &ts); + + res = kevent(kqop->kq, changes, kqop->nchanges, + events, kqop->nevents, &ts); + kqop->nchanges = 0; + if (res == -1) { + if (errno != EINTR) { + event_warn("kevent"); + return (-1); + } + + return (0); + } + + event_debug(("%s: kevent reports %d", __func__, res)); + + for (i = 0; i < res; i++) { + int which = 0; + + if (events[i].flags & EV_ERROR) { + /* + * Error messages that can happen, when a delete fails. + * EBADF happens when the file discriptor has been + * closed, + * ENOENT when the file discriptor was closed and + * then reopened. + * EINVAL for some reasons not understood; EINVAL + * should not be returned ever; but FreeBSD does :-\ + * An error is also indicated when a callback deletes + * an event we are still processing. In that case + * the data field is set to ENOENT. + */ + if (events[i].data == EBADF || + events[i].data == EINVAL || + events[i].data == ENOENT) + continue; + errno = events[i].data; + return (-1); + } + + ev = (struct event *)events[i].udata; + + if (events[i].filter == EVFILT_READ) { + which |= EV_READ; + } else if (events[i].filter == EVFILT_WRITE) { + which |= EV_WRITE; + } else if (events[i].filter == EVFILT_SIGNAL) { + which |= EV_SIGNAL; + } + + if (!which) + continue; + + if (!(ev->ev_events & EV_PERSIST)) + event_del(ev); + + event_active(ev, which, + ev->ev_events & EV_SIGNAL ? events[i].data : 1); + } + + return (0); +} + + +int +kq_add(void *arg, struct event *ev) +{ + struct kqop *kqop = arg; + struct kevent kev; + + if (ev->ev_events & EV_SIGNAL) { + int nsignal = EVENT_SIGNAL(ev); + + memset(&kev, 0, sizeof(kev)); + kev.ident = nsignal; + kev.filter = EVFILT_SIGNAL; + kev.flags = EV_ADD; + if (!(ev->ev_events & EV_PERSIST)) + kev.flags |= EV_ONESHOT; + kev.udata = INTPTR(ev); + + if (kq_insert(kqop, &kev) == -1) + return (-1); + + if (signal(nsignal, kq_sighandler) == SIG_ERR) + return (-1); + + ev->ev_flags |= EVLIST_X_KQINKERNEL; + return (0); + } + + if (ev->ev_events & EV_READ) { + memset(&kev, 0, sizeof(kev)); + kev.ident = ev->ev_fd; + kev.filter = EVFILT_READ; +#ifdef NOTE_EOF + /* Make it behave like select() and poll() */ + kev.fflags = NOTE_EOF; +#endif + kev.flags = EV_ADD; + if (!(ev->ev_events & EV_PERSIST)) + kev.flags |= EV_ONESHOT; + kev.udata = INTPTR(ev); + + if (kq_insert(kqop, &kev) == -1) + return (-1); + + ev->ev_flags |= EVLIST_X_KQINKERNEL; + } + + if (ev->ev_events & EV_WRITE) { + memset(&kev, 0, sizeof(kev)); + kev.ident = ev->ev_fd; + kev.filter = EVFILT_WRITE; + kev.flags = EV_ADD; + if (!(ev->ev_events & EV_PERSIST)) + kev.flags |= EV_ONESHOT; + kev.udata = INTPTR(ev); + + if (kq_insert(kqop, &kev) == -1) + return (-1); + + ev->ev_flags |= EVLIST_X_KQINKERNEL; + } + + return (0); +} + +int +kq_del(void *arg, struct event *ev) +{ + struct kqop *kqop = arg; + struct kevent kev; + + if (!(ev->ev_flags & EVLIST_X_KQINKERNEL)) + return (0); + + if (ev->ev_events & EV_SIGNAL) { + int nsignal = EVENT_SIGNAL(ev); + + memset(&kev, 0, sizeof(kev)); + kev.ident = nsignal; + kev.filter = EVFILT_SIGNAL; + kev.flags = EV_DELETE; + + if (kq_insert(kqop, &kev) == -1) + return (-1); + + if (signal(nsignal, SIG_DFL) == SIG_ERR) + return (-1); + + ev->ev_flags &= ~EVLIST_X_KQINKERNEL; + return (0); + } + + if (ev->ev_events & EV_READ) { + memset(&kev, 0, sizeof(kev)); + kev.ident = ev->ev_fd; + kev.filter = EVFILT_READ; + kev.flags = EV_DELETE; + + if (kq_insert(kqop, &kev) == -1) + return (-1); + + ev->ev_flags &= ~EVLIST_X_KQINKERNEL; + } + + if (ev->ev_events & EV_WRITE) { + memset(&kev, 0, sizeof(kev)); + kev.ident = ev->ev_fd; + kev.filter = EVFILT_WRITE; + kev.flags = EV_DELETE; + + if (kq_insert(kqop, &kev) == -1) + return (-1); + + ev->ev_flags &= ~EVLIST_X_KQINKERNEL; + } + + return (0); +} + +void +kq_dealloc(void *arg) +{ + struct kqop *kqop = arg; + + if (kqop->changes) + free(kqop->changes); + if (kqop->events) + free(kqop->events); + if (kqop->kq) + close(kqop->kq); + memset(kqop, 0, sizeof(struct kqop)); + free(kqop); +} diff --git a/contrib/pf/libevent/log.c b/contrib/pf/libevent/log.c new file mode 100644 index 000000000000..c9275e363fc5 --- /dev/null +++ b/contrib/pf/libevent/log.c @@ -0,0 +1,219 @@ +/* $OpenBSD: err.c,v 1.2 2002/06/25 15:50:15 mickey Exp $ */ + +/* + * log.c + * + * Based on err.c, which was adapted from OpenBSD libc *err* *warn* code. + * + * Copyright (c) 2005 Nick Mathewson + * + * Copyright (c) 2000 Dug Song + * + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#undef WIN32_LEAN_AND_MEAN +#include "misc.h" +#endif +#include +#include +#ifdef HAVE_SYS_TIME_H +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include "event.h" + +#include "log.h" + +static void _warn_helper(int severity, int log_errno, const char *fmt, + va_list ap); +static void event_log(int severity, const char *msg); + +static int +event_vsnprintf(char *str, size_t size, const char *format, va_list args) +{ + int r; + if (size == 0) + return -1; +#ifdef WIN32 + r = _vsnprintf(str, size, format, args); +#else + r = vsnprintf(str, size, format, args); +#endif + str[size-1] = '\0'; + if (r < 0 || ((size_t)r) >= size) { + /* different platforms behave differently on overflow; + * handle both kinds. */ + return -1; + } + return r; +} + +static int +event_snprintf(char *str, size_t size, const char *format, ...) +{ + va_list ap; + int r; + va_start(ap, format); + r = event_vsnprintf(str, size, format, ap); + va_end(ap); + return r; +} + +void +event_err(int eval, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _warn_helper(_EVENT_LOG_ERR, errno, fmt, ap); + va_end(ap); + exit(eval); +} + +void +event_warn(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _warn_helper(_EVENT_LOG_WARN, errno, fmt, ap); + va_end(ap); +} + +void +event_errx(int eval, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _warn_helper(_EVENT_LOG_ERR, -1, fmt, ap); + va_end(ap); + exit(eval); +} + +void +event_warnx(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _warn_helper(_EVENT_LOG_WARN, -1, fmt, ap); + va_end(ap); +} + +void +event_msgx(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _warn_helper(_EVENT_LOG_MSG, -1, fmt, ap); + va_end(ap); +} + +void +_event_debugx(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _warn_helper(_EVENT_LOG_DEBUG, -1, fmt, ap); + va_end(ap); +} + +static void +_warn_helper(int severity, int log_errno, const char *fmt, va_list ap) +{ + char buf[1024]; + size_t len; + + if (fmt != NULL) + event_vsnprintf(buf, sizeof(buf), fmt, ap); + else + buf[0] = '\0'; + + if (log_errno >= 0) { + len = strlen(buf); + if (len < sizeof(buf) - 3) { + event_snprintf(buf + len, sizeof(buf) - len, ": %s", + strerror(log_errno)); + } + } + + event_log(severity, buf); +} + +static event_log_cb log_fn = NULL; + +void +event_set_log_callback(event_log_cb cb) +{ + log_fn = cb; +} + +static void +event_log(int severity, const char *msg) +{ + if (log_fn) + log_fn(severity, msg); + else { + const char *severity_str; + switch (severity) { + case _EVENT_LOG_DEBUG: + severity_str = "debug"; + break; + case _EVENT_LOG_MSG: + severity_str = "msg"; + break; + case _EVENT_LOG_WARN: + severity_str = "warn"; + break; + case _EVENT_LOG_ERR: + severity_str = "err"; + break; + default: + severity_str = "???"; + break; + } + (void)fprintf(stderr, "[%s] %s\n", severity_str, msg); + } +} diff --git a/contrib/pf/libevent/log.h b/contrib/pf/libevent/log.h new file mode 100644 index 000000000000..1f843cf984e0 --- /dev/null +++ b/contrib/pf/libevent/log.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2000-2004 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _LOG_H_ +#define _LOG_H_ + +void event_err(int eval, const char *fmt, ...); +void event_warn(const char *fmt, ...); +void event_errx(int eval, const char *fmt, ...); +void event_warnx(const char *fmt, ...); +void event_msgx(const char *fmt, ...); +void _event_debugx(const char *fmt, ...); + +#ifdef USE_DEBUG +#define event_debug(x) _event_debugx x +#else +#define event_debug(x) do {;} while (0) +#endif + +#endif diff --git a/contrib/pf/libevent/poll.c b/contrib/pf/libevent/poll.c new file mode 100644 index 000000000000..14ca8453739b --- /dev/null +++ b/contrib/pf/libevent/poll.c @@ -0,0 +1,388 @@ +/* $OpenBSD: poll.c,v 1.2 2002/06/25 15:50:15 mickey Exp $ */ + +/* + * Copyright 2000-2003 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#ifdef HAVE_SYS_TIME_H +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CHECK_INVARIANTS +#include +#endif + +#include "event.h" +#include "event-internal.h" +#include "evsignal.h" +#include "log.h" + +extern volatile sig_atomic_t evsignal_caught; + +struct pollop { + int event_count; /* Highest number alloc */ + int nfds; /* Size of event_* */ + int fd_count; /* Size of idxplus1_by_fd */ + struct pollfd *event_set; + struct event **event_r_back; + struct event **event_w_back; + int *idxplus1_by_fd; /* Index into event_set by fd; we add 1 so + * that 0 (which is easy to memset) can mean + * "no entry." */ +}; + +void *poll_init (void); +int poll_add (void *, struct event *); +int poll_del (void *, struct event *); +int poll_recalc (struct event_base *, void *, int); +int poll_dispatch (struct event_base *, void *, struct timeval *); +void poll_dealloc (void *); + +const struct eventop pollops = { + "poll", + poll_init, + poll_add, + poll_del, + poll_recalc, + poll_dispatch, + poll_dealloc +}; + +void * +poll_init(void) +{ + struct pollop *pollop; + + /* Disable poll when this environment variable is set */ + if (getenv("EVENT_NOPOLL")) + return (NULL); + + if (!(pollop = calloc(1, sizeof(struct pollop)))) + return (NULL); + + evsignal_init(); + + return (pollop); +} + +/* + * Called with the highest fd that we know about. If it is 0, completely + * recalculate everything. + */ + +int +poll_recalc(struct event_base *base, void *arg, int max) +{ + return (0); +} + +#ifdef CHECK_INVARIANTS +static void +poll_check_ok(struct pollop *pop) +{ + int i, idx; + struct event *ev; + + for (i = 0; i < pop->fd_count; ++i) { + idx = pop->idxplus1_by_fd[i]-1; + if (idx < 0) + continue; + assert(pop->event_set[idx].fd == i); + if (pop->event_set[idx].events & POLLIN) { + ev = pop->event_r_back[idx]; + assert(ev); + assert(ev->ev_events & EV_READ); + assert(ev->ev_fd == i); + } + if (pop->event_set[idx].events & POLLOUT) { + ev = pop->event_w_back[idx]; + assert(ev); + assert(ev->ev_events & EV_WRITE); + assert(ev->ev_fd == i); + } + } + for (i = 0; i < pop->nfds; ++i) { + struct pollfd *pfd = &pop->event_set[i]; + assert(pop->idxplus1_by_fd[pfd->fd] == i+1); + } +} +#else +#define poll_check_ok(pop) +#endif + +int +poll_dispatch(struct event_base *base, void *arg, struct timeval *tv) +{ + int res, i, sec, nfds; + struct pollop *pop = arg; + + poll_check_ok(pop); + sec = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000; + nfds = pop->nfds; + res = poll(pop->event_set, nfds, sec); + + if (res == -1) { + if (errno != EINTR) { + event_warn("poll"); + return (-1); + } + + evsignal_process(); + return (0); + } else if (evsignal_caught) + evsignal_process(); + + event_debug(("%s: poll reports %d", __func__, res)); + + if (res == 0) + return (0); + + for (i = 0; i < nfds; i++) { + int what = pop->event_set[i].revents; + struct event *r_ev = NULL, *w_ev = NULL; + if (!what) + continue; + + res = 0; + + /* If the file gets closed notify */ + if (what & (POLLHUP|POLLERR)) + what |= POLLIN|POLLOUT; + if (what & POLLIN) { + res |= EV_READ; + r_ev = pop->event_r_back[i]; + } + if (what & POLLOUT) { + res |= EV_WRITE; + w_ev = pop->event_w_back[i]; + } + if (res == 0) + continue; + + if (r_ev && (res & r_ev->ev_events)) { + if (!(r_ev->ev_events & EV_PERSIST)) + event_del(r_ev); + event_active(r_ev, res & r_ev->ev_events, 1); + } + if (w_ev && w_ev != r_ev && (res & w_ev->ev_events)) { + if (!(w_ev->ev_events & EV_PERSIST)) + event_del(w_ev); + event_active(w_ev, res & w_ev->ev_events, 1); + } + } + + return (0); +} + +int +poll_add(void *arg, struct event *ev) +{ + struct pollop *pop = arg; + struct pollfd *pfd = NULL; + int i; + + if (ev->ev_events & EV_SIGNAL) + return (evsignal_add(ev)); + if (!(ev->ev_events & (EV_READ|EV_WRITE))) + return (0); + + poll_check_ok(pop); + if (pop->nfds + 1 >= pop->event_count) { + struct pollfd *tmp_event_set; + struct event **tmp_event_r_back; + struct event **tmp_event_w_back; + int tmp_event_count; + + if (pop->event_count < 32) + tmp_event_count = 32; + else + tmp_event_count = pop->event_count * 2; + + /* We need more file descriptors */ + tmp_event_set = realloc(pop->event_set, + tmp_event_count * sizeof(struct pollfd)); + if (tmp_event_set == NULL) { + event_warn("realloc"); + return (-1); + } + pop->event_set = tmp_event_set; + + tmp_event_r_back = realloc(pop->event_r_back, + tmp_event_count * sizeof(struct event *)); + if (tmp_event_r_back == NULL) { + /* event_set overallocated; that's okay. */ + event_warn("realloc"); + return (-1); + } + pop->event_r_back = tmp_event_r_back; + + tmp_event_w_back = realloc(pop->event_w_back, + tmp_event_count * sizeof(struct event *)); + if (tmp_event_w_back == NULL) { + /* event_set and event_r_back overallocated; that's + * okay. */ + event_warn("realloc"); + return (-1); + } + pop->event_w_back = tmp_event_w_back; + + pop->event_count = tmp_event_count; + } + if (ev->ev_fd >= pop->fd_count) { + int *tmp_idxplus1_by_fd; + int new_count; + if (pop->fd_count < 32) + new_count = 32; + else + new_count = pop->fd_count * 2; + while (new_count <= ev->ev_fd) + new_count *= 2; + tmp_idxplus1_by_fd = + realloc(pop->idxplus1_by_fd, new_count * sizeof(int)); + if (tmp_idxplus1_by_fd == NULL) { + event_warn("realloc"); + return (-1); + } + pop->idxplus1_by_fd = tmp_idxplus1_by_fd; + memset(pop->idxplus1_by_fd + pop->fd_count, + 0, sizeof(int)*(new_count - pop->fd_count)); + pop->fd_count = new_count; + } + + i = pop->idxplus1_by_fd[ev->ev_fd] - 1; + if (i >= 0) { + pfd = &pop->event_set[i]; + } else { + i = pop->nfds++; + pfd = &pop->event_set[i]; + pfd->events = 0; + pfd->fd = ev->ev_fd; + pop->event_w_back[i] = pop->event_r_back[i] = NULL; + pop->idxplus1_by_fd[ev->ev_fd] = i + 1; + } + + pfd->revents = 0; + if (ev->ev_events & EV_WRITE) { + pfd->events |= POLLOUT; + pop->event_w_back[i] = ev; + } + if (ev->ev_events & EV_READ) { + pfd->events |= POLLIN; + pop->event_r_back[i] = ev; + } + poll_check_ok(pop); + + return (0); +} + +/* + * Nothing to be done here. + */ + +int +poll_del(void *arg, struct event *ev) +{ + struct pollop *pop = arg; + struct pollfd *pfd = NULL; + int i; + + if (ev->ev_events & EV_SIGNAL) + return (evsignal_del(ev)); + + if (!(ev->ev_events & (EV_READ|EV_WRITE))) + return (0); + + poll_check_ok(pop); + i = pop->idxplus1_by_fd[ev->ev_fd] - 1; + if (i < 0) + return (-1); + + /* Do we still want to read or write? */ + pfd = &pop->event_set[i]; + if (ev->ev_events & EV_READ) { + pfd->events &= ~POLLIN; + pop->event_r_back[i] = NULL; + } + if (ev->ev_events & EV_WRITE) { + pfd->events &= ~POLLOUT; + pop->event_w_back[i] = NULL; + } + poll_check_ok(pop); + if (pfd->events) + /* Another event cares about that fd. */ + return (0); + + /* Okay, so we aren't interested in that fd anymore. */ + pop->idxplus1_by_fd[ev->ev_fd] = 0; + + --pop->nfds; + if (i != pop->nfds) { + /* + * Shift the last pollfd down into the now-unoccupied + * position. + */ + memcpy(&pop->event_set[i], &pop->event_set[pop->nfds], + sizeof(struct pollfd)); + pop->event_r_back[i] = pop->event_r_back[pop->nfds]; + pop->event_w_back[i] = pop->event_w_back[pop->nfds]; + pop->idxplus1_by_fd[pop->event_set[i].fd] = i + 1; + } + + poll_check_ok(pop); + return (0); +} + +void +poll_dealloc(void *arg) +{ + struct pollop *pop = arg; + + if (pop->event_set) + free(pop->event_set); + if (pop->event_r_back) + free(pop->event_r_back); + if (pop->event_w_back) + free(pop->event_w_back); + if (pop->idxplus1_by_fd) + free(pop->idxplus1_by_fd); + + memset(pop, 0, sizeof(struct pollop)); + free(pop); +} diff --git a/contrib/pf/libevent/select.c b/contrib/pf/libevent/select.c new file mode 100644 index 000000000000..6ce81a232bbe --- /dev/null +++ b/contrib/pf/libevent/select.c @@ -0,0 +1,370 @@ +/* $OpenBSD: select.c,v 1.2 2002/06/25 15:50:15 mickey Exp $ */ + +/* + * Copyright 2000-2002 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#ifdef HAVE_SYS_TIME_H +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CHECK_INVARIANTS +#include +#endif + +#include "event.h" +#include "event-internal.h" +#include "evsignal.h" +#include "log.h" + +#ifndef howmany +#define howmany(x, y) (((x)+((y)-1))/(y)) +#endif + +extern volatile sig_atomic_t evsignal_caught; + +struct selectop { + int event_fds; /* Highest fd in fd set */ + int event_fdsz; + fd_set *event_readset_in; + fd_set *event_writeset_in; + fd_set *event_readset_out; + fd_set *event_writeset_out; + struct event **event_r_by_fd; + struct event **event_w_by_fd; +}; + +void *select_init (void); +int select_add (void *, struct event *); +int select_del (void *, struct event *); +int select_recalc (struct event_base *, void *, int); +int select_dispatch (struct event_base *, void *, struct timeval *); +void select_dealloc (void *); + +const struct eventop selectops = { + "select", + select_init, + select_add, + select_del, + select_recalc, + select_dispatch, + select_dealloc +}; + +static int select_resize(struct selectop *sop, int fdsz); + +void * +select_init(void) +{ + struct selectop *sop; + + /* Disable select when this environment variable is set */ + if (getenv("EVENT_NOSELECT")) + return (NULL); + + if (!(sop = calloc(1, sizeof(struct selectop)))) + return (NULL); + + select_resize(sop, howmany(32 + 1, NFDBITS)*sizeof(fd_mask)); + + evsignal_init(); + + return (sop); +} + +#ifdef CHECK_INVARIANTS +static void +check_selectop(struct selectop *sop) +{ + int i; + for (i=0;i<=sop->event_fds;++i) { + if (FD_ISSET(i, sop->event_readset_in)) { + assert(sop->event_r_by_fd[i]); + assert(sop->event_r_by_fd[i]->ev_events & EV_READ); + assert(sop->event_r_by_fd[i]->ev_fd == i); + } else { + assert(! sop->event_r_by_fd[i]); + } + if (FD_ISSET(i, sop->event_writeset_in)) { + assert(sop->event_w_by_fd[i]); + assert(sop->event_w_by_fd[i]->ev_events & EV_WRITE); + assert(sop->event_w_by_fd[i]->ev_fd == i); + } else { + assert(! sop->event_w_by_fd[i]); + } + } + +} +#else +#define check_selectop(sop) do { (void) sop; } while (0) +#endif + +/* + * Called with the highest fd that we know about. If it is 0, completely + * recalculate everything. + */ + +int +select_recalc(struct event_base *base, void *arg, int max) +{ + struct selectop *sop = arg; + + check_selectop(sop); + + return (0); +} + +int +select_dispatch(struct event_base *base, void *arg, struct timeval *tv) +{ + int res, i; + struct selectop *sop = arg; + + check_selectop(sop); + + memcpy(sop->event_readset_out, sop->event_readset_in, + sop->event_fdsz); + memcpy(sop->event_writeset_out, sop->event_writeset_in, + sop->event_fdsz); + + res = select(sop->event_fds + 1, sop->event_readset_out, + sop->event_writeset_out, NULL, tv); + + check_selectop(sop); + + if (res == -1) { + if (errno != EINTR) { + event_warn("select"); + return (-1); + } + + evsignal_process(); + return (0); + } else if (evsignal_caught) + evsignal_process(); + + event_debug(("%s: select reports %d", __func__, res)); + + check_selectop(sop); + for (i = 0; i <= sop->event_fds; ++i) { + struct event *r_ev = NULL, *w_ev = NULL; + res = 0; + if (FD_ISSET(i, sop->event_readset_out)) { + r_ev = sop->event_r_by_fd[i]; + res |= EV_READ; + } + if (FD_ISSET(i, sop->event_writeset_out)) { + w_ev = sop->event_w_by_fd[i]; + res |= EV_WRITE; + } + if (r_ev && (res & r_ev->ev_events)) { + if (!(r_ev->ev_events & EV_PERSIST)) + event_del(r_ev); + event_active(r_ev, res & r_ev->ev_events, 1); + } + if (w_ev && w_ev != r_ev && (res & w_ev->ev_events)) { + if (!(w_ev->ev_events & EV_PERSIST)) + event_del(w_ev); + event_active(w_ev, res & w_ev->ev_events, 1); + } + } + check_selectop(sop); + + return (0); +} + + +static int +select_resize(struct selectop *sop, int fdsz) +{ + int n_events, n_events_old; + + fd_set *readset_in = NULL; + fd_set *writeset_in = NULL; + fd_set *readset_out = NULL; + fd_set *writeset_out = NULL; + struct event **r_by_fd = NULL; + struct event **w_by_fd = NULL; + + n_events = (fdsz/sizeof(fd_mask)) * NFDBITS; + n_events_old = (sop->event_fdsz/sizeof(fd_mask)) * NFDBITS; + + if (sop->event_readset_in) + check_selectop(sop); + + if ((readset_in = realloc(sop->event_readset_in, fdsz)) == NULL) + goto error; + sop->event_readset_in = readset_in; + if ((readset_out = realloc(sop->event_readset_out, fdsz)) == NULL) + goto error; + sop->event_readset_out = readset_out; + if ((writeset_in = realloc(sop->event_writeset_in, fdsz)) == NULL) + goto error; + sop->event_writeset_in = writeset_in; + if ((writeset_out = realloc(sop->event_writeset_out, fdsz)) == NULL) + goto error; + sop->event_writeset_out = writeset_out; + if ((r_by_fd = realloc(sop->event_r_by_fd, + n_events*sizeof(struct event*))) == NULL) + goto error; + sop->event_r_by_fd = r_by_fd; + if ((w_by_fd = realloc(sop->event_w_by_fd, + n_events * sizeof(struct event*))) == NULL) + goto error; + sop->event_w_by_fd = w_by_fd; + + memset((char *)sop->event_readset_in + sop->event_fdsz, 0, + fdsz - sop->event_fdsz); + memset((char *)sop->event_writeset_in + sop->event_fdsz, 0, + fdsz - sop->event_fdsz); + memset(sop->event_r_by_fd + n_events_old, 0, + (n_events-n_events_old) * sizeof(struct event*)); + memset(sop->event_w_by_fd + n_events_old, 0, + (n_events-n_events_old) * sizeof(struct event*)); + + sop->event_fdsz = fdsz; + check_selectop(sop); + + return (0); + + error: + event_warn("malloc"); + return (-1); +} + + +int +select_add(void *arg, struct event *ev) +{ + struct selectop *sop = arg; + + if (ev->ev_events & EV_SIGNAL) + return (evsignal_add(ev)); + + check_selectop(sop); + /* + * Keep track of the highest fd, so that we can calculate the size + * of the fd_sets for select(2) + */ + if (sop->event_fds < ev->ev_fd) { + int fdsz = sop->event_fdsz; + + if (fdsz < sizeof(fd_mask)) + fdsz = sizeof(fd_mask); + + while (fdsz < + (howmany(ev->ev_fd + 1, NFDBITS) * sizeof(fd_mask))) + fdsz *= 2; + + if (fdsz != sop->event_fdsz) { + if (select_resize(sop, fdsz)) { + check_selectop(sop); + return (-1); + } + } + + sop->event_fds = ev->ev_fd; + } + + if (ev->ev_events & EV_READ) { + FD_SET(ev->ev_fd, sop->event_readset_in); + sop->event_r_by_fd[ev->ev_fd] = ev; + } + if (ev->ev_events & EV_WRITE) { + FD_SET(ev->ev_fd, sop->event_writeset_in); + sop->event_w_by_fd[ev->ev_fd] = ev; + } + check_selectop(sop); + + return (0); +} + +/* + * Nothing to be done here. + */ + +int +select_del(void *arg, struct event *ev) +{ + struct selectop *sop = arg; + + check_selectop(sop); + if (ev->ev_events & EV_SIGNAL) + return (evsignal_del(ev)); + + if (sop->event_fds < ev->ev_fd) { + check_selectop(sop); + return (0); + } + + if (ev->ev_events & EV_READ) { + FD_CLR(ev->ev_fd, sop->event_readset_in); + sop->event_r_by_fd[ev->ev_fd] = NULL; + } + + if (ev->ev_events & EV_WRITE) { + FD_CLR(ev->ev_fd, sop->event_writeset_in); + sop->event_w_by_fd[ev->ev_fd] = NULL; + } + + check_selectop(sop); + return (0); +} + +void +select_dealloc(void *arg) +{ + struct selectop *sop = arg; + + if (sop->event_readset_in) + free(sop->event_readset_in); + if (sop->event_writeset_in) + free(sop->event_writeset_in); + if (sop->event_readset_out) + free(sop->event_readset_out); + if (sop->event_writeset_out) + free(sop->event_writeset_out); + if (sop->event_r_by_fd) + free(sop->event_r_by_fd); + if (sop->event_w_by_fd) + free(sop->event_w_by_fd); + + memset(sop, 0, sizeof(struct selectop)); + free(sop); +} diff --git a/contrib/pf/libevent/signal.c b/contrib/pf/libevent/signal.c new file mode 100644 index 000000000000..71bcffcba5b3 --- /dev/null +++ b/contrib/pf/libevent/signal.c @@ -0,0 +1,180 @@ +/* $OpenBSD: select.c,v 1.2 2002/06/25 15:50:15 mickey Exp $ */ + +/* + * Copyright 2000-2002 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#ifdef HAVE_SYS_TIME_H +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_FCNTL_H +#include +#endif + +#include "event.h" +#include "evsignal.h" +#include "log.h" + +extern struct event_list signalqueue; + +static sig_atomic_t evsigcaught[NSIG]; +volatile sig_atomic_t evsignal_caught = 0; + +static struct event ev_signal; +static int ev_signal_pair[2]; +static int ev_signal_added; + +static void evsignal_handler(int sig); + +/* Callback for when the signal handler write a byte to our signaling socket */ +static void +evsignal_cb(int fd, short what, void *arg) +{ + static char signals[100]; + struct event *ev = arg; + ssize_t n; + + n = read(fd, signals, sizeof(signals)); + if (n == -1) + event_err(1, "%s: read", __func__); + event_add(ev, NULL); +} + +#ifdef HAVE_SETFD +#define FD_CLOSEONEXEC(x) do { \ + if (fcntl(x, F_SETFD, 1) == -1) \ + event_warn("fcntl(%d, F_SETFD)", x); \ +} while (0) +#else +#define FD_CLOSEONEXEC(x) +#endif + +void +evsignal_init(void) +{ + /* + * Our signal handler is going to write to one end of the socket + * pair to wake up our event loop. The event loop then scans for + * signals that got delivered. + */ + if (socketpair(AF_UNIX, SOCK_STREAM, 0, ev_signal_pair) == -1) + event_err(1, "%s: socketpair", __func__); + + FD_CLOSEONEXEC(ev_signal_pair[0]); + FD_CLOSEONEXEC(ev_signal_pair[1]); + + fcntl(ev_signal_pair[0], F_SETFL, O_NONBLOCK); + + event_set(&ev_signal, ev_signal_pair[1], EV_READ, + evsignal_cb, &ev_signal); + ev_signal.ev_flags |= EVLIST_INTERNAL; +} + +int +evsignal_add(struct event *ev) +{ + int evsignal; + struct sigaction sa; + + if (ev->ev_events & (EV_READ|EV_WRITE)) + event_errx(1, "%s: EV_SIGNAL incompatible use", __func__); + evsignal = EVENT_SIGNAL(ev); + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = evsignal_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags |= SA_RESTART; + + if (sigaction(evsignal, &sa, NULL) == -1) + return (-1); + + if (!ev_signal_added) { + ev_signal_added = 1; + event_add(&ev_signal, NULL); + } + + return (0); +} + +/* + * Nothing to be done here. + */ + +int +evsignal_del(struct event *ev) +{ + int evsignal; + + evsignal = EVENT_SIGNAL(ev); + + return (sigaction(EVENT_SIGNAL(ev),(struct sigaction *)SIG_DFL, NULL)); +} + +static void +evsignal_handler(int sig) +{ + int save_errno = errno; + + evsigcaught[sig]++; + evsignal_caught = 1; + + /* Wake up our notification mechanism */ + write(ev_signal_pair[0], "a", 1); + errno = save_errno; +} + +void +evsignal_process(void) +{ + struct event *ev; + sig_atomic_t ncalls; + + evsignal_caught = 0; + TAILQ_FOREACH(ev, &signalqueue, ev_signal_next) { + ncalls = evsigcaught[EVENT_SIGNAL(ev)]; + if (ncalls) { + if (!(ev->ev_events & EV_PERSIST)) + event_del(ev); + event_active(ev, EV_SIGNAL, ncalls); + evsigcaught[EVENT_SIGNAL(ev)] = 0; + } + } +} + diff --git a/contrib/pf/man/pf.4 b/contrib/pf/man/pf.4 index a394627c0168..1164202d234b 100644 --- a/contrib/pf/man/pf.4 +++ b/contrib/pf/man/pf.4 @@ -1,4 +1,4 @@ -.\" $OpenBSD: pf.4,v 1.54 2004/12/22 17:17:55 dhartmei Exp $ +.\" $OpenBSD: pf.4,v 1.58 2007/02/09 11:39:06 henning Exp $ .\" .\" Copyright (C) 2001, Kjell Wooding. All rights reserved. .\" @@ -184,6 +184,11 @@ using the obtained through a preceding .Dv DIOCGETRULES call. +If +.Va action +is set to +.Dv PF_GET_CLR_CNTR , +the per-rule statistics on the requested rule are cleared. .It Dv DIOCGETADDRS Fa "struct pfioc_pooladdr *pp" Get a .Va ticket @@ -346,6 +351,7 @@ struct pf_status { u_int32_t debug; u_int32_t hostid; char ifname[IFNAMSIZ]; + u_int8_t pf_chksum[MD5_DIGEST_LENGTH]; }; .Ed .It Dv DIOCCLRSTATUS @@ -389,19 +395,14 @@ struct pfioc_states { .Pp If .Va ps_len -is zero, all states will be gathered into -.Va pf_states -and +is non-zero on entry, as many states as possible that can fit into this +size will be copied into the supplied buffer +.Va ps_states . +On exit, .Va ps_len -will be set to the size they take in memory (i.e., +is always set to the total size required to hold all state table entries +(i.e., it is set to .Li sizeof(struct pf_state) * nr ) . -If -.Va ps_len -is non-zero, as many states that can fit into -.Va ps_len -as possible will be gathered, and -.Va ps_len -will be updated to the size those rules take in memory. .It Dv DIOCCHANGERULE Fa "struct pfioc_rule *pcr" Add or remove the .Va rule @@ -483,7 +484,8 @@ struct pfioc_limit { unsigned limit; }; -enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS }; +enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, + PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; .Ed .It Dv DIOCGETLIMIT Fa "struct pfioc_limit *pl" Get the hard @@ -521,10 +523,15 @@ struct pfioc_table { .It Dv DIOCRADDTABLES Fa "struct pfioc_table *io" Create one or more tables. On entry, -.Va pfrio_buffer[pfrio_size] -contains a table of -.Vt pfr_table -structures. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_table +containing at least +.Vt pfrio_size +elements. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_table . On exit, .Va pfrio_nadd contains the number of tables effectively created. @@ -539,12 +546,17 @@ struct pfr_table { .It Dv DIOCRDELTABLES Fa "struct pfioc_table *io" Delete one or more tables. On entry, -.Va pfrio_buffer[pfrio_size] -contains a table of -.Vt pfr_table -structures. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_table +containing at least +.Vt pfrio_size +elements. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_table . On exit, -.Va pfrio_nadd +.Va pfrio_ndel contains the number of tables effectively deleted. .It Dv DIOCRGETTABLES Fa "struct pfioc_table *io" Get the list of all tables. @@ -583,10 +595,15 @@ struct pfr_tstats { .It Dv DIOCRCLRTSTATS Fa "struct pfioc_table *io" Clear the statistics of one or more tables. On entry, -.Va pfrio_buffer[pfrio_size] -contains a table of -.Vt pfr_table -structures. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_table +containing at least +.Vt pfrio_size +elements. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_table . On exit, .Va pfrio_nzero contains the number of tables effectively cleared. @@ -603,10 +620,15 @@ Add one or more addresses to a table. On entry, .Va pfrio_table contains the table ID and -.Va pfrio_buffer[pfrio_size] -contains the list of -.Vt pfr_addr -structures to add. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements to add to the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . On exit, .Va pfrio_nadd contains the number of addresses effectively added. @@ -629,10 +651,15 @@ Delete one or more addresses from a table. On entry, .Va pfrio_table contains the table ID and -.Va pfrio_buffer[pfrio_size] -contains the list of -.Vt pfr_addr -structures to delete. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements to delete from the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . On exit, .Va pfrio_ndel contains the number of addresses effectively deleted. @@ -643,10 +670,15 @@ This is the most complicated command, which uses all the structure members. On entry, .Va pfrio_table contains the table ID and -.Va pfrio_buffer[pfrio_size] -contains the new list of -.Vt pfr_addr -structures. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements which become the new contents of the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . Additionally, if .Va pfrio_size2 is non-zero, @@ -701,10 +733,15 @@ Clear the statistics of one or more addresses. On entry, .Va pfrio_table contains the table ID and -.Va pfrio_buffer[pfrio_size] -contains a table of -.Vt pfr_addr -structures to clear. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements to be cleared from the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . On exit, .Va pfrio_nzero contains the number of addresses effectively cleared. @@ -713,13 +750,18 @@ Test if the given addresses match a table. On entry, .Va pfrio_table contains the table ID and -.Va pfrio_buffer[pfrio_size] -contains a table of -.Vt pfr_addr -structures to test. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements, each of which will be tested for a match in the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . On exit, the kernel updates the .Vt pfr_addr -table by setting the +array by setting the .Va pfra_fback member appropriately. .It Dv DIOCRSETTFLAGS Fa "struct pfioc_table *io" @@ -729,14 +771,19 @@ or .Dv PFR_TFLAG_PERSIST flags of a table. On entry, -.Va pfrio_buffer[pfrio_size] -contains a table of -.Vt pfr_table -structures, and +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_table +containing at least +.Vt pfrio_size +elements. +.Va pfrio_esize +must be the size of +.Vt struct pfr_table . .Va pfrio_setflag -contains the flags to add, while +must contain the flags to add, while .Va pfrio_clrflag -contains the flags to remove. +must contain the flags to remove. On exit, .Va pfrio_nchange and @@ -751,7 +798,7 @@ On entry, .Va pfrio_table contains the table ID and .Va pfrio_buffer[pfrio_size] -contains the list of +contains an array of .Vt pfr_addr structures to put in the table. A valid ticket must also be supplied to @@ -953,10 +1000,6 @@ struct pfioc_iface { int pfiio_nzero; int pfiio_flags; }; - -#define PFI_FLAG_GROUP 0x0001 /* gets groups of interfaces */ -#define PFI_FLAG_INSTANCE 0x0002 /* gets single interfaces */ -#define PFI_FLAG_ALLMASK 0x0003 .Ed .Pp If not empty, @@ -966,61 +1009,45 @@ can be used to restrict the search to a specific interface or driver. is the user-supplied buffer for returning the data. On entry, .Va pfiio_size -represents the number of -.Va pfi_if +contains the number of +.Vt pfi_kif entries that can fit into the buffer. The kernel will replace this value by the real number of entries it wants to return. .Va pfiio_esize should be set to -.Li sizeof(struct pfi_if) . -.Va pfiio_flags -should be set to -.Dv PFI_FLAG_GROUP , -.Dv PFI_FLAG_INSTANCE , -or both, to tell the kernel to return a group of interfaces -(drivers, like "fxp"), real interface instances (like "fxp1") or both. +.Li sizeof(struct pfi_kif) . +.Pp The data is returned in the -.Vt pfi_if +.Vt pfi_kif structure described below: .Bd -literal -struct pfi_if { - char pfif_name[IFNAMSIZ]; - u_int64_t pfif_packets[2][2][2]; - u_int64_t pfif_bytes[2][2][2]; - u_int64_t pfif_addcnt; - u_int64_t pfif_delcnt; - long pfif_tzero; - int pfif_states; - int pfif_rules; - int pfif_flags; +struct pfi_kif { + RB_ENTRY(pfi_kif) pfik_tree; + char pfik_name[IFNAMSIZ]; + u_int64_t pfik_packets[2][2][2]; + u_int64_t pfik_bytes[2][2][2]; + u_int32_t pfik_tzero; + int pfik_flags; + struct pf_state_tree_lan_ext pfik_lan_ext; + struct pf_state_tree_ext_gwy pfik_ext_gwy; + TAILQ_ENTRY(pfi_kif) pfik_w_states; + void *pfik_ah_cookie; + struct ifnet *pfik_ifp; + struct ifg_group *pfik_group; + int pfik_states; + int pfik_rules; + TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs; }; - -#define PFI_IFLAG_GROUP 0x0001 /* group of interfaces */ -#define PFI_IFLAG_INSTANCE 0x0002 /* single instance */ -#define PFI_IFLAG_CLONABLE 0x0010 /* clonable group */ -#define PFI_IFLAG_DYNAMIC 0x0020 /* dynamic group */ -#define PFI_IFLAG_ATTACHED 0x0040 /* interface attached */ .Ed -.It Dv DIOCICLRISTATS Fa "struct pfioc_iface *io" -Clear the statistics counters of one or more interfaces. -.Va pfiio_name -and -.Va pfiio_flags -can be used to select which interfaces need to be cleared. -The filtering process is the same as for -.Dv DIOCIGETIFACES . -.Va pfiio_nzero -will be set by the kernel to the number of interfaces and drivers -that have been cleared. .It Dv DIOCSETIFFLAG Fa "struct pfioc_iface *io" -Set the user setable flags (described below) of the pf internal interface -description. +Set the user setable flags (described above) of the +.Nm +internal interface description. The filtering process is the same as for .Dv DIOCIGETIFACES . .Bd -literal -#define PFI_IFLAG_SKIP 0x0100 /* skip interface */ -#define PFI_IFLAG_SETABLE_MASK 0x0100 /* mask */ +#define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ .Ed .It Dv DIOCCLRIFFLAG Fa "struct pfioc_iface *io" Works as diff --git a/contrib/pf/man/pf.conf.5 b/contrib/pf/man/pf.conf.5 index 817fa0b854c6..bb210fc7fc09 100644 --- a/contrib/pf/man/pf.conf.5 +++ b/contrib/pf/man/pf.conf.5 @@ -1,4 +1,4 @@ -.\" $OpenBSD: pf.conf.5,v 1.326 2005/03/01 18:10:44 jmc Exp $ +.\" $OpenBSD: pf.conf.5,v 1.376 2006/12/01 07:23:26 camield Exp $ .\" .\" Copyright (c) 2002, Daniel Hartmeier .\" All rights reserved. @@ -62,8 +62,7 @@ Queueing provides rule-based bandwidth control. Translation rules specify how addresses are to be mapped or redirected to other addresses. .It Cm Packet Filtering -Stateful and stateless packet filtering provides rule-based blocking or -passing of packets. +Packet filtering provides rule-based blocking or passing of packets. .El .Pp With the exception of @@ -80,11 +79,7 @@ enforces this order (see .Ar set require-order below). .Sh MACROS -Much like -.Xr cpp 1 -or -.Xr m4 1 , -macros can be defined that will later be expanded in context. +Macros can be defined that will later be expanded in context. Macro names must start with a letter, and may contain letters, digits and underscores. Macro names may not be reserved words (for example @@ -97,8 +92,8 @@ For example, .Bd -literal -offset indent ext_if = \&"kue0\&" all_ifs = \&"{\&" $ext_if lo0 \&"}\&" -pass out on $ext_if from any to any keep state -pass in on $ext_if proto tcp from any to any port 25 keep state +pass out on $ext_if from any to any +pass in on $ext_if proto tcp from any to any port 25 .Ed .Sh TABLES Tables are named structures which can hold a collection of addresses and @@ -181,9 +176,9 @@ when running with .Pp For example, .Bd -literal -offset indent -table const { 10/8, 172.16/12, 192.168/16 } -table persist -block on fxp0 from { , } to any +table \*(Ltprivate\*(Gt const { 10/8, 172.16/12, 192.168/16 } +table \*(Ltbadhosts\*(Gt persist +block on fxp0 from { \*(Ltprivate\*(Gt, \*(Ltbadhosts\*(Gt } to any .Ed .Pp creates a table called private, to hold RFC 1918 private network @@ -201,8 +196,8 @@ these hosts can be blocked by using A table can also be initialized with an address list specified in one or more external files, using the following syntax: .Bd -literal -offset indent -table persist file \&"/etc/spammers\&" file \&"/etc/openrelays\&" -block on fxp0 from to any +table \*(Ltspam\*(Gt persist file \&"/etc/spammers\&" file \&"/etc/openrelays\&" +block on fxp0 from \*(Ltspam\*(Gt to any .Ed .Pp The files @@ -217,7 +212,7 @@ When the resolver is called to add a hostname to a table, .Em all resulting IPv4 and IPv6 addresses are placed into the table. IP addresses can also be entered in a table by specifying a valid interface -name or the +name, a valid interface group or the .Em self keyword, in which case all addresses assigned to the interface(s) will be added to the table. @@ -310,7 +305,12 @@ This value is used to define the scale factor, it should not actually be reached (set a lower state limit, see below). .El .Pp -These values can be defined both globally and for each rule. +Adaptive timeouts are enabled by default, with an adaptive.start value +equal to 60% of the state limit, and an adaptive.end value equal to +120% of the state limit. +They can be disabled by setting both adaptive.start and adaptive.end to 0. +.Pp +The adaptive timeout values can be defined both globally and for each rule. When used on a per-rule basis, the values relate to the number of states created by the rule, otherwise to the total number of states. @@ -358,8 +358,10 @@ set limit states 20000 .Pp sets the maximum number of entries in the memory pool used by state table entries (generated by -.Ar keep state -rules) to 20000. +.Ar pass +rules which do not specify +.Ar no state ) +to 20000. Using .Bd -literal -offset indent set limit frags 20000 @@ -369,7 +371,7 @@ sets the maximum number of entries in the memory pool used for fragment reassembly (generated by .Ar scrub rules) to 20000. -Finally, +Using .Bd -literal -offset indent set limit src-nodes 2000 .Ed @@ -378,16 +380,63 @@ sets the maximum number of entries in the memory pool used for tracking source IP addresses (generated by the .Ar sticky-address and -.Ar source-track +.Ar src.track options) to 2000. +Using +.Bd -literal -offset indent +set limit tables 1000 +set limit table-entries 100000 +.Ed .Pp -These can be combined: +sets limits on the memory pools used by tables. +The first limits the number of tables that can exist to 1000. +The second limits the overall number of addresses that can be stored +in tables to 100000. +.Pp +Various limits can be combined on a single line: .Bd -literal -offset indent set limit { states 20000, frags 20000, src-nodes 2000 } .Ed .Pp +.It Ar set ruleset-optimization +.Bl -tag -width xxxxxxxx -compact +.It Ar none +Disable the ruleset optimizer. +This is the default behaviour. +.It Ar basic +Enable basic ruleset optimization, which does four things to improve the +performance of ruleset evaluations: +.Pp +.Bl -enum -compact +.It +remove duplicate rules +.It +remove rules that are a subset of another rule +.It +combine multiple rules into a table when advantageous +.It +re-order the rules to improve evaluation performance +.El +.Pp +.It Ar profile +Uses the currently loaded ruleset as a feedback profile to tailor the +ordering of quick rules to actual network traffic. +.El +.Pp +It is important to note that the ruleset optimizer will modify the ruleset +to improve performance. +A side effect of the ruleset modification is that per-rule accounting +statistics will have different meanings than before. +If per-rule accounting is important for billing purposes or whatnot, +either the ruleset optimizer should not be used or a label field should +be added to all of the accounting rules to act as optimization barriers. +.Pp +Optimization can also be set as a command-line argument to +.Xr pfctl 8 , +overriding the settings in +.Nm . .It Ar set optimization -Optimize the engine for one of the following network environments: +Optimize state timeouts for one of the following network environments: .Pp .Bl -tag -width xxxx -compact .It Ar normal @@ -442,8 +491,6 @@ option sets the default behaviour for states: .Bl -tag -width group-bound -compact .It Ar if-bound States are bound to interface. -.It Ar group-bound -States are bound to interface group (i.e. ppp) .It Ar floating States can match packets on any interfaces (the default). .El @@ -452,6 +499,21 @@ For example: .Bd -literal -offset indent set state-policy if-bound .Ed +.It Ar set hostid +The 32-bit +.Ar hostid +identifies this firewall's state table entries to other firewalls +in a +.Xr pfsync 4 +failover cluster. +By default the hostid is set to a pseudo-random value, however it may be +desirable to manually configure it, for example to more easily identify the +source of state table entries. +.Bd -literal -offset indent +set hostid 1 +.Ed +.Pp +The hostid may be specified in either decimal or hexadecimal. .It Ar set require-order By default .Xr pfctl 8 @@ -483,7 +545,7 @@ For example: .Pp .Dl set fingerprints \&"/etc/pf.os.devel\&" .Pp -.It Ar set skip on +.It Ar set skip on Aq Ar ifspec List interfaces for which packets should not be filtered. Packets passing in or out on such interfaces are passed as if pf was disabled, i.e. pf does not process them in any way. @@ -550,9 +612,9 @@ Using the modifier (see below) is recommended in combination with the .Ar no-df modifier to ensure unique IP identifiers. -.It Ar min-ttl +.It Ar min-ttl Aq Ar number Enforces a minimum TTL for matching IP packets. -.It Ar max-mss +.It Ar max-mss Aq Ar number Enforces a maximum MSS for matching TCP packets. .It Ar random-id Replaces the IP identification field with random values to compensate @@ -763,9 +825,9 @@ declaration. .Ar altq on has the following keywords: .Bl -tag -width xxxx -.It Ar +.It Aq Ar interface Queueing is enabled on the named interface. -.It Ar +.It Aq Ar scheduler Specifies which queueing scheduler to use. Currently supported values are @@ -775,7 +837,7 @@ for Class Based Queueing, for Priority Queueing and .Ar hfsc for the Hierarchical Fair Service Curve scheduler. -.It Ar bandwidth +.It Ar bandwidth Aq Ar bw The maximum bitrate for all queues on an interface may be specified using the .Ar bandwidth @@ -793,15 +855,17 @@ gigabits per second, respectively. The value must not exceed the interface bandwidth. If .Ar bandwidth -is not specified, the interface bandwidth is used. -.It Ar qlimit +is not specified, the interface bandwidth is used +(but take note that some interfaces do not know their bandwidth, +or can adapt their bandwidth rates). +.It Ar qlimit Aq Ar limit The maximum number of packets held in the queue. The default is 50. -.It Ar tbrsize +.It Ar tbrsize Aq Ar size Adjusts the size, in bytes, of the token bucket regulator. If not specified, heuristics based on the interface bandwidth are used to determine the size. -.It Ar queue +.It Ar queue Aq Ar list Defines a list of subqueues to create on an interface. .El .Pp @@ -830,10 +894,10 @@ in a parent declaration. The following keywords can be used: .Bl -tag -width xxxx -.It Ar on +.It Ar on Aq Ar interface Specifies the interface the queue operates on. If not given, it operates on all matching interfaces. -.It Ar bandwidth +.It Ar bandwidth Aq Ar bw Specifies the maximum bitrate to be processed by the queue. This value must not exceed the value of the parent .Ar queue @@ -843,7 +907,7 @@ If not specified, defaults to 100% of the parent queue's bandwidth. The .Ar priq scheduler does not support bandwidth specification. -.It Ar priority +.It Ar priority Aq Ar level Between queues a priority level can be set. For .Ar cbq @@ -859,7 +923,7 @@ queues with a higher priority are always served first. and .Ar Hfsc queues with a higher priority are preferred in the case of overload. -.It Ar qlimit +.It Ar qlimit Aq Ar limit The maximum number of packets held in the queue. The default is 50. .El @@ -867,7 +931,9 @@ The default is 50. The .Ar scheduler can get additional parameters with -.Ar Ns Li (\& Ar No ) . +.Xo Aq Ar scheduler +.Pf ( Aq Ar parameters ) . +.Xc Parameters are as follows: .Bl -tag -width Fl .It Ar default @@ -901,15 +967,16 @@ The .Ar scheduler supports some additional options: .Bl -tag -width Fl -.It Ar realtime +.It Ar realtime Aq Ar sc The minimum required bandwidth for the queue. -.It Ar upperlimit +.It Ar upperlimit Aq Ar sc The maximum allowed bandwidth for the queue. -.It Ar linkshare +.It Ar linkshare Aq Ar sc The bandwidth share of a backlogged queue. .El .Pp - is an acronym for +.Aq Ar sc +is an acronym for .Ar service curve . .Pp The format for service curve specifications is @@ -973,13 +1040,13 @@ queue ssh_bulk bandwidth 50% priority 0 cbq(borrow) block return out on dc0 inet all queue std pass out on dc0 inet proto tcp from $developerhosts to any port 80 \e - keep state queue developers + queue developers pass out on dc0 inet proto tcp from $employeehosts to any port 80 \e - keep state queue employees + queue employees pass out on dc0 inet proto tcp from any to any port 22 \e - keep state queue(ssh_bulk, ssh_interactive) + queue(ssh_bulk, ssh_interactive) pass out on dc0 inet proto tcp from any to any port 25 \e - keep state queue mail + queue mail .Ed .Sh TRANSLATION Translation rules modify either the source or destination address of the @@ -1039,9 +1106,9 @@ The packet is redirected to another destination and possibly a different port. .Ar rdr rules can optionally specify port ranges instead of single ports. -rdr ... port 2000:2999 -> ... port 4000 +rdr ... port 2000:2999 -\*(Gt ... port 4000 redirects ports 2000 to 2999 (inclusive) to port 4000. -rdr ... port 2000:2999 -> ... port 4000:* +rdr ... port 2000:2999 -\*(Gt ... port 4000:* redirects port 2000 to 4000, 2001 to 4001, ..., 2999 to 4999. .El .Pp @@ -1059,8 +1126,17 @@ Port numbers are never translated with a .Ar binat rule. .Pp -For each packet processed by the translator, the translation rules are -evaluated in sequential order, from first to last. +Evaluation order of the translation rules is dependent on the type +of the translation rules and of the direction of a packet. +.Ar binat +rules are always evaluated first. +Then either the +.Ar rdr +rules are evaluated on an inbound packet or the +.Ar nat +rules on an outbound packet. +Rules of the same type are evaluated in the same order in which they +appear in the ruleset. The first matching rule decides what action is taken. .Pp The @@ -1086,7 +1162,7 @@ or to the firewall itself. Note that redirecting external incoming connections to the loopback address, as in .Bd -literal -offset indent -rdr on ne3 inet proto tcp to port 8025 -> 127.0.0.1 port 25 +rdr on ne3 inet proto tcp to port spamd -\*(Gt 127.0.0.1 port smtp .Ed .Pp will effectively allow an external host to connect to daemons @@ -1122,6 +1198,8 @@ assigned to queues for the purpose of bandwidth control. For each packet processed by the packet filter, the filter rules are evaluated in sequential order, from first to last. The last matching rule decides what action is taken. +If no rule matches the packet, the default action is to pass +the packet. .Pp The following actions can be used in the filter: .Bl -tag -width xxxx @@ -1161,24 +1239,87 @@ Options returning ICMP packets currently have no effect if operates on a .Xr bridge 4 , as the code to support this feature has not yet been implemented. -.It Ar pass -The packet is passed. -.El .Pp -If no rule matches the packet, the default action is -.Ar pass . -.Pp -To block everything by default and only pass packets -that match explicit rules, one uses +The simplest mechanism to block everything by default and only pass +packets that match explicit rules is specify a first filter rule of: .Bd -literal -offset indent block all .Ed +.It Ar pass +The packet is passed; +state is created state unless the +.Ar no state +option is specified. +.El .Pp -as the first filter rule. +By default +.Xr pf 4 +filters packets statefully; the first time a packet matches a +.Ar pass +rule, a state entry is created; for subsequent packets the filter checks +whether the packet matches any state. +If it does, the packet is passed without evaluation of any rules. +After the connection is closed or times out, the state entry is automatically +removed. .Pp +This has several advantages. +For TCP connections, comparing a packet to a state involves checking +its sequence numbers, as well as TCP timestamps if a +.Ar scrub reassemble tcp +rule applies to the connection. +If these values are outside the narrow windows of expected +values, the packet is dropped. +This prevents spoofing attacks, such as when an attacker sends packets with +a fake source address/port but does not know the connection's sequence +numbers. +Similarly, +.Xr pf 4 +knows how to match ICMP replies to states. +For example, +.Bd -literal -offset indent +pass out inet proto icmp all icmp-type echoreq +.Ed +.Pp +allows echo requests (such as those created by +.Xr ping 8 ) +out statefully, and matches incoming echo replies correctly to states. +.Pp +Also, looking up states is usually faster than evaluating rules. +If there are 50 rules, all of them are evaluated sequentially in O(n). +Even with 50000 states, only 16 comparisons are needed to match a +state, since states are stored in a binary search tree that allows +searches in O(log2 n). +.Pp +Furthermore, correct handling of ICMP error messages is critical to +many protocols, particularly TCP. +.Xr pf 4 +matches ICMP error messages to the correct connection, checks them against +connection parameters, and passes them if appropriate. +For example if an ICMP source quench message referring to a stateful TCP +connection arrives, it will be matched to the state and get passed. +.Pp +Finally, state tracking is required for +.Ar nat , binat No and Ar rdr +rules, in order to track address and port translations and reverse the +translation on returning packets. +.Pp +.Xr pf 4 +will also create state for other protocols which are effectively stateless by +nature. +UDP packets are matched to states using only host addresses and ports, +and other protocols are matched to states using only the host addresses. +.Pp +If stateless filtering of individual packets is desired, +the +.Ar no state +keyword can be used to specify that state will not be created +if this is the last matching rule. +A number of parameters can also be set to affect how +.Xr pf 4 +handles state tracking. See -.Sx FILTER EXAMPLES -below. +.Sx STATEFUL TRACKING OPTIONS +below for further details. .Sh PARAMETERS The rule parameters specify the packets to which a rule applies. A packet always comes in on, or goes out through, one interface. @@ -1198,22 +1339,14 @@ nor are specified, the rule will match packets in both directions. .It Ar log In addition to the action specified, a log message is generated. -All packets for that connection are logged, unless the -.Ar keep state , -.Ar modulate state -or -.Ar synproxy state -options are specified, in which case only the -packet that establishes the state is logged. -(See -.Ar keep state , -.Ar modulate state -and -.Ar synproxy state -below). -The logged packets are sent to the +Only the packet that establishes the state is logged, +unless the +.Ar no state +option is specified. +The logged packets are sent to a .Xr pflog 4 -interface. +interface, by default +.Ar pflog0 . This interface is monitored by the .Xr pflogd 8 logging daemon, which dumps the logged packets to the file @@ -1221,35 +1354,48 @@ logging daemon, which dumps the logged packets to the file in .Xr pcap 3 binary format. -.It Ar log-all -Used with -.Ar keep state , -.Ar modulate state -or -.Ar synproxy state -rules to force logging of all packets for a connection. +.It Ar log (all) +Used to force logging of all packets for a connection. +This is not necessary when +.Ar no state +is explicitly specified. As with .Ar log , packets are logged to .Xr pflog 4 . +.It Ar log (user) +Logs the +.Ux +user ID of the user that owns the socket and the PID of the process that +has the socket open where the packet is sourced from or destined to +(depending on which socket is local). +This is in addition to the normal information logged. +.It Ar log (to Aq Ar interface ) +Send logs to the specified +.Xr pflog 4 +interface instead of +.Ar pflog0 . .It Ar quick If a packet matches a rule which has the .Ar quick option set, this rule is considered the last matching rule, and evaluation of subsequent rules is skipped. -.It Ar on +.It Ar on Aq Ar interface This rule applies only to packets coming in on, or going out through, this -particular interface. -It is also possible to simply give the interface driver name, like ppp or fxp, -to make the rule match packets flowing through a group of interfaces. -.It Ar +particular interface or interface group. +For more information on interface groups, +see the +.Ic group +keyword in +.Xr ifconfig 8 . +.It Aq Ar af This rule applies only to packets of this address family. Supported values are .Ar inet and .Ar inet6 . -.It Ar proto +.It Ar proto Aq Ar protocol This rule applies only to packets of this protocol. Common protocols are .Xr icmp 4 , @@ -1262,8 +1408,11 @@ For a list of all the protocol name to number mappings used by see the file .Em /etc/protocols . .It Xo -.Ar from port os -.Ar to port +.Ar from Aq Ar source +.Ar port Aq Ar source +.Ar os Aq Ar source +.Ar to Aq Ar dest +.Ar port Aq Ar dest .Xc This rule applies only to packets with the specified source and destination addresses and ports. @@ -1274,16 +1423,20 @@ symbolic host names or interface names, or as any of the following keywords: .Bl -tag -width xxxxxxxxxxxxxx -compact .It Ar any Any address. -.It Ar route