1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-10-20 02:38:43 +00:00

netlink: use protocol specific receive buffer

Implement Netlink socket receive buffer as a simple TAILQ of nl_buf's,
same part of struct sockbuf that is used for send buffer already.
This shaves a lot of code and a lot of extra processing.  The pcb rids
of the I/O queues as the socket buffer is exactly the queue.  The
message writer is simplified a lot, as we now always deal with linear
buf.  Notion of different buffer types goes away as way as different
kinds of writers.  The only things remaining are: a socket writer and
a group writer.
The impact on the network stack is that we no longer use mbufs, so
a workaround from d187154750 disappears.

Note on message throttling.  Now the taskqueue throttling mechanism
needs to look at both socket buffers protected by their respective
locks and on flags in the pcb that are protected by the pcb lock.
There is definitely some room for optimization, but this changes tries
to preserve as much as possible.

Note on new nl_soreceive().  It emulates soreceive_generic().  It
must undergo further optimization, see large comment put in there.

Note on tests/sys/netlink/test_netlink_message_writer.py. This test
boiled down almost to nothing with mbufs removed.  However, I left
it with minimal functionality (it basically checks that allocating N
bytes we get N bytes) as it is one of not so many examples of ktest
framework that allows to test KPIs with python.

Note on Linux support. It got much simplier: Netlink message writer
loses notion of Linux support lifetime, it is same regardless of
process ABI.  On socket write from Linux process we perform
conversion immediately in nl_receive_message() and on an output
conversion to Linux happens in in nl_send_one(). XXX: both
conversions use M_NOWAIT allocation, which used to be the case
before this change, too.

Reviewed by:		melifaro
Differential Revision:	https://reviews.freebsd.org/D42524
This commit is contained in:
Gleb Smirnoff 2024-01-02 13:04:01 -08:00
parent 0ad011ecec
commit 17083b94a9
13 changed files with 416 additions and 1067 deletions

View File

@ -32,7 +32,6 @@
#include <sys/ck.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/vnode.h>
@ -44,6 +43,7 @@
#include <netlink/netlink.h>
#include <netlink/netlink_ctl.h>
#include <netlink/netlink_linux.h>
#include <netlink/netlink_var.h>
#include <netlink/netlink_route.h>
#include <compat/linux/linux.h>
@ -187,6 +187,7 @@ handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw)
if (out_hdr != NULL) {
memcpy(out_hdr, hdr, hdr->nlmsg_len);
nw->num_messages++;
return (true);
}
return (false);
@ -518,8 +519,7 @@ nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *
}
static bool
nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp,
struct nl_writer *nw)
nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
{
if (hdr->nlmsg_type < NLMSG_MIN_TYPE) {
switch (hdr->nlmsg_type) {
@ -536,7 +536,7 @@ nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp,
}
}
switch (netlink_family) {
switch (nlp->nl_proto) {
case NETLINK_ROUTE:
return (rtnl_to_linux(hdr, nlp, nw));
default:
@ -544,64 +544,49 @@ nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp,
}
}
static struct mbuf *
nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp)
static bool
nlmsgs_to_linux(struct nl_writer *nw, struct nlpcb *nlp)
{
RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length);
struct nl_writer nw = {};
struct nl_buf *nb, *orig;
u_int offset, msglen, orig_messages __diagused;
struct mbuf *m = NULL;
if (!nlmsg_get_chain_writer(&nw, data_length, &m)) {
RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d",
data_length);
return (NULL);
}
RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__,
nw->buf->datalen, nw->num_messages);
orig = nw->buf;
nb = nl_buf_alloc(orig->datalen + SCRATCH_BUFFER_SIZE, M_NOWAIT);
if (__predict_false(nb == NULL))
return (false);
nw->buf = nb;
#ifdef INVARIANTS
orig_messages = nw->num_messages;
#endif
nw->num_messages = 0;
/* Assume correct headers. Buffer IS mutable */
int count = 0;
for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) {
struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset];
int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
count++;
for (offset = 0;
offset + sizeof(struct nlmsghdr) <= orig->datalen;
offset += msglen) {
struct nlmsghdr *hdr = (struct nlmsghdr *)&orig->data[offset];
if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) {
msglen = NLMSG_ALIGN(hdr->nlmsg_len);
if (!nlmsg_to_linux(hdr, nlp, nw)) {
RT_LOG(LOG_DEBUG, "failed to process msg type %d",
hdr->nlmsg_type);
m_freem(m);
return (NULL);
nl_buf_free(nb);
return (false);
}
offset += msglen;
}
nlmsg_flush(&nw);
RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count,
m ? m_length(m, NULL) : 0);
return (m);
}
MPASS(nw->num_messages == orig_messages);
MPASS(nw->buf == nb);
nl_buf_free(orig);
RT_LOG(LOG_DEBUG3, "%p: out %u bytes", __func__, offset);
static struct mbuf *
mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp)
{
/* XXX: easiest solution, not optimized for performance */
int data_length = m_length(m, NULL);
char *buf = malloc(data_length, M_LINUX, M_NOWAIT);
if (buf == NULL) {
RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message",
data_length);
m_freem(m);
return (NULL);
}
m_copydata(m, 0, data_length, buf);
m_freem(m);
m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp);
free(buf, M_LINUX);
return (m);
return (true);
}
static struct linux_netlink_provider linux_netlink_v1 = {
.mbufs_to_linux = mbufs_to_linux,
.msgs_to_linux = nlmsgs_to_linux,
.msg_from_linux = nlmsg_from_linux,
};

View File

@ -29,9 +29,9 @@
#include <sys/cdefs.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <netlink/netlink.h>
#include <netlink/netlink_ctl.h>
#include <netlink/netlink_var.h>
#include <netlink/netlink_message_writer.h>
#define KTEST_CALLER
@ -39,54 +39,47 @@
#ifdef INVARIANTS
struct test_mbuf_attrs {
struct test_nlbuf_attrs {
uint32_t size;
uint32_t expected_avail;
uint32_t expected_count;
uint32_t wtype;
int waitok;
};
#define _OUT(_field) offsetof(struct test_mbuf_attrs, _field)
static const struct nlattr_parser nla_p_mbuf_w[] = {
#define _OUT(_field) offsetof(struct test_nlbuf_attrs, _field)
static const struct nlattr_parser nla_p_nlbuf_w[] = {
{ .type = 1, .off = _OUT(size), .cb = nlattr_get_uint32 },
{ .type = 2, .off = _OUT(expected_avail), .cb = nlattr_get_uint32 },
{ .type = 3, .off = _OUT(expected_count), .cb = nlattr_get_uint32 },
{ .type = 4, .off = _OUT(wtype), .cb = nlattr_get_uint32 },
{ .type = 5, .off = _OUT(waitok), .cb = nlattr_get_uint32 },
{ .type = 3, .off = _OUT(waitok), .cb = nlattr_get_uint32 },
};
#undef _OUT
NL_DECLARE_ATTR_PARSER(mbuf_w_parser, nla_p_mbuf_w);
NL_DECLARE_ATTR_PARSER(nlbuf_w_parser, nla_p_nlbuf_w);
static int
test_mbuf_parser(struct ktest_test_context *ctx, struct nlattr *nla)
test_nlbuf_parser(struct ktest_test_context *ctx, struct nlattr *nla)
{
struct test_mbuf_attrs *attrs = npt_alloc(ctx->npt, sizeof(*attrs));
struct test_nlbuf_attrs *attrs = npt_alloc(ctx->npt, sizeof(*attrs));
ctx->arg = attrs;
if (attrs != NULL)
return (nl_parse_nested(nla, &mbuf_w_parser, ctx->npt, attrs));
return (nl_parse_nested(nla, &nlbuf_w_parser, ctx->npt, attrs));
return (ENOMEM);
}
static int
test_mbuf_writer_allocation(struct ktest_test_context *ctx)
test_nlbuf_writer_allocation(struct ktest_test_context *ctx)
{
struct test_mbuf_attrs *attrs = ctx->arg;
bool ret;
struct test_nlbuf_attrs *attrs = ctx->arg;
struct nl_writer nw = {};
u_int alloc_len;
bool ret;
ret = nlmsg_get_buf_type_wrapper(&nw, attrs->size, attrs->wtype, attrs->waitok);
ret = nlmsg_get_buf_wrapper(&nw, attrs->size, attrs->waitok);
if (!ret)
return (EINVAL);
int alloc_len = nw.alloc_len;
alloc_len = nw.buf->buflen;
KTEST_LOG(ctx, "requested %u, allocated %d", attrs->size, alloc_len);
/* Set cleanup callback */
nw.writer_target = NS_WRITER_TARGET_SOCKET;
nlmsg_set_callback_wrapper(&nw);
/* Mark enomem to avoid reallocation */
nw.enomem = true;
@ -95,9 +88,7 @@ test_mbuf_writer_allocation(struct ktest_test_context *ctx)
return (EINVAL);
}
/* Mark as empty to free the storage */
nw.offset = 0;
nlmsg_flush(&nw);
nl_buf_free(nw.buf);
if (alloc_len < attrs->expected_avail) {
KTEST_LOG(ctx, "alloc_len %d, expected %u",
@ -107,60 +98,15 @@ test_mbuf_writer_allocation(struct ktest_test_context *ctx)
return (0);
}
static int
test_mbuf_chain_allocation(struct ktest_test_context *ctx)
{
struct test_mbuf_attrs *attrs = ctx->arg;
int mflags = attrs->waitok ? M_WAITOK : M_NOWAIT;
struct mbuf *chain = nl_get_mbuf_chain_wrapper(attrs->size, mflags);
if (chain == NULL) {
KTEST_LOG(ctx, "nl_get_mbuf_chain(%u) returned NULL", attrs->size);
return (EINVAL);
}
/* Iterate and check number of mbufs and space */
uint32_t allocated_count = 0, allocated_size = 0;
for (struct mbuf *m = chain; m != NULL; m = m->m_next) {
allocated_count++;
allocated_size += M_SIZE(m);
}
m_freem(chain);
if (attrs->expected_avail > allocated_size) {
KTEST_LOG(ctx, "expected/allocated avail(bytes) %u/%u"
" expected/allocated count %u/%u",
attrs->expected_avail, allocated_size,
attrs->expected_count, allocated_count);
return (EINVAL);
}
if (attrs->expected_count > 0 && (attrs->expected_count != allocated_count)) {
KTEST_LOG(ctx, "expected/allocated avail(bytes) %u/%u"
" expected/allocated count %u/%u",
attrs->expected_avail, allocated_size,
attrs->expected_count, allocated_count);
return (EINVAL);
}
return (0);
}
#endif
static const struct ktest_test_info tests[] = {
#ifdef INVARIANTS
{
.name = "test_mbuf_writer_allocation",
.desc = "test different mbuf sizes in the mbuf writer",
.func = &test_mbuf_writer_allocation,
.parse = &test_mbuf_parser,
},
{
.name = "test_mbuf_chain_allocation",
.desc = "verify allocation different chain sizes",
.func = &test_mbuf_chain_allocation,
.parse = &test_mbuf_parser,
.name = "test_nlbuf_writer_allocation",
.desc = "test different buffer sizes in the netlink writer",
.func = &test_nlbuf_writer_allocation,
.parse = &test_nlbuf_parser,
},
#endif
};

View File

@ -30,28 +30,14 @@
#if defined(_KERNEL) && defined(INVARIANTS)
bool nlmsg_get_buf_type_wrapper(struct nl_writer *nw, int size, int type, bool waitok);
void nlmsg_set_callback_wrapper(struct nl_writer *nw);
struct mbuf *nl_get_mbuf_chain_wrapper(int len, int malloc_flags);
bool nlmsg_get_buf_wrapper(struct nl_writer *nw, u_int size, bool waitok);
#ifndef KTEST_CALLER
bool
nlmsg_get_buf_type_wrapper(struct nl_writer *nw, int size, int type, bool waitok)
nlmsg_get_buf_wrapper(struct nl_writer *nw, u_int size, bool waitok)
{
return (nlmsg_get_buf_type(nw, size, type, waitok));
}
void
nlmsg_set_callback_wrapper(struct nl_writer *nw)
{
nlmsg_set_callback(nw);
}
struct mbuf *
nl_get_mbuf_chain_wrapper(int len, int malloc_flags)
{
return (nl_get_mbuf_chain(len, malloc_flags));
return (nlmsg_get_buf(nw, size, waitok));
}
#endif

View File

@ -179,53 +179,76 @@ nl_get_groups_compat(struct nlpcb *nlp)
}
static void
nl_send_one_group(struct mbuf *m, struct nlpcb *nlp, int num_messages,
int io_flags)
nl_send_one_group(struct nl_writer *nw, struct nl_buf *nb, struct nlpcb *nlp)
{
if (__predict_false(nlp->nl_flags & NLF_MSG_INFO))
nl_add_msg_info(m);
nl_send_one(m, nlp, num_messages, io_flags);
nl_add_msg_info(nb);
nw->buf = nb;
(void)nl_send_one(nw);
}
static struct nl_buf *
nl_buf_copy(struct nl_buf *nb)
{
struct nl_buf *copy;
copy = nl_buf_alloc(nb->buflen, M_NOWAIT);
if (__predict_false(copy == NULL))
return (NULL);
memcpy(copy, nb, sizeof(*nb) + nb->buflen);
if (nb->control != NULL) {
copy->control = m_copym(nb->control, 0, M_COPYALL, M_NOWAIT);
if (__predict_false(copy->control == NULL)) {
nl_buf_free(copy);
return (NULL);
}
}
return (copy);
}
/*
* Broadcasts message @m to the protocol @proto group specified by @group_id
* Broadcasts in the writer's buffer.
*/
void
nl_send_group(struct mbuf *m, int num_messages, int proto, int group_id)
bool
nl_send_group(struct nl_writer *nw)
{
struct nl_buf *nb = nw->buf;
struct nlpcb *nlp_last = NULL;
struct nlpcb *nlp;
NLCTL_TRACKER;
IF_DEBUG_LEVEL(LOG_DEBUG2) {
struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *);
NL_LOG(LOG_DEBUG2, "MCAST mbuf len %u msg type %d len %u to group %d/%d",
m->m_len, hdr->nlmsg_type, hdr->nlmsg_len, proto, group_id);
struct nlmsghdr *hdr = (struct nlmsghdr *)nb->data;
NL_LOG(LOG_DEBUG2, "MCAST len %u msg type %d len %u to group %d/%d",
nb->datalen, hdr->nlmsg_type, hdr->nlmsg_len,
nw->group.proto, nw->group.id);
}
nw->buf = NULL;
struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
if (__predict_false(ctl == NULL)) {
/*
* Can be the case when notification is sent within VNET
* which doesn't have any netlink sockets.
*/
m_freem(m);
return;
nl_buf_free(nb);
return (false);
}
NLCTL_RLOCK(ctl);
int io_flags = NL_IOF_UNTRANSLATED;
CK_LIST_FOREACH(nlp, &ctl->ctl_pcb_head, nl_next) {
if (nl_isset_group_locked(nlp, group_id) && nlp->nl_proto == proto) {
if (nl_isset_group_locked(nlp, nw->group.id) &&
nlp->nl_proto == nw->group.proto) {
if (nlp_last != NULL) {
struct mbuf *m_copy;
m_copy = m_copym(m, 0, M_COPYALL, M_NOWAIT);
if (m_copy != NULL)
nl_send_one_group(m_copy, nlp_last,
num_messages, io_flags);
else {
struct nl_buf *copy;
copy = nl_buf_copy(nb);
if (copy != NULL) {
nl_send_one_group(nw, copy, nlp_last);
} else {
NLP_LOCK(nlp_last);
if (nlp_last->nl_socket != NULL)
sorwakeup(nlp_last->nl_socket);
@ -236,11 +259,13 @@ nl_send_group(struct mbuf *m, int num_messages, int proto, int group_id)
}
}
if (nlp_last != NULL)
nl_send_one_group(m, nlp_last, num_messages, io_flags);
nl_send_one_group(nw, nb, nlp_last);
else
m_freem(m);
nl_buf_free(nb);
NLCTL_RUNLOCK(ctl);
return (true);
}
bool
@ -331,7 +356,7 @@ nl_pru_attach(struct socket *so, int proto, struct thread *td)
free(nlp, M_PCB);
return (error);
}
so->so_rcv.sb_mtx = &so->so_rcv_mtx;
TAILQ_INIT(&so->so_rcv.nl_queue);
TAILQ_INIT(&so->so_snd.nl_queue);
so->so_pcb = nlp;
nlp->nl_socket = so;
@ -344,7 +369,6 @@ nl_pru_attach(struct socket *so, int proto, struct thread *td)
nlp->nl_need_thread_setup = true;
NLP_LOCK_INIT(nlp);
refcount_init(&nlp->nl_refcount, 1);
nl_init_io(nlp);
nlp->nl_taskqueue = taskqueue_create("netlink_socket", M_WAITOK,
taskqueue_thread_enqueue, &nlp->nl_taskqueue);
@ -467,15 +491,6 @@ nl_pru_connect(struct socket *so, struct sockaddr *sa, struct thread *td)
return (0);
}
static void
destroy_nlpcb(struct nlpcb *nlp)
{
NLP_LOCK(nlp);
nl_free_io(nlp);
NLP_LOCK_DESTROY(nlp);
free(nlp, M_PCB);
}
static void
destroy_nlpcb_epoch(epoch_context_t ctx)
{
@ -483,10 +498,10 @@ destroy_nlpcb_epoch(epoch_context_t ctx)
nlp = __containerof(ctx, struct nlpcb, nl_epoch_ctx);
destroy_nlpcb(nlp);
NLP_LOCK_DESTROY(nlp);
free(nlp, M_PCB);
}
static void
nl_close(struct socket *so)
{
@ -522,9 +537,12 @@ nl_close(struct socket *so)
while ((nb = TAILQ_FIRST(&so->so_snd.nl_queue)) != NULL) {
TAILQ_REMOVE(&so->so_snd.nl_queue, nb, tailq);
free(nb, M_NETLINK);
nl_buf_free(nb);
}
while ((nb = TAILQ_FIRST(&so->so_rcv.nl_queue)) != NULL) {
TAILQ_REMOVE(&so->so_rcv.nl_queue, nb, tailq);
nl_buf_free(nb);
}
sbdestroy(so, SO_RCV);
NL_LOG(LOG_DEBUG3, "socket %p, detached", so);
@ -597,10 +615,8 @@ nl_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
len = roundup2(uio->uio_resid, 8) + SCRATCH_BUFFER_SIZE;
if (nlp->nl_linux)
len += roundup2(uio->uio_resid, 8);
nb = malloc(sizeof(*nb) + len, M_NETLINK, M_WAITOK);
nb = nl_buf_alloc(len, M_WAITOK);
nb->datalen = uio->uio_resid;
nb->buflen = len;
nb->offset = 0;
error = uiomove(&nb->data[0], uio->uio_resid, uio);
if (__predict_false(error))
goto out;
@ -635,19 +651,107 @@ nl_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
out:
SOCK_IO_SEND_UNLOCK(so);
free(nb, M_NETLINK);
if (nb != NULL)
nl_buf_free(nb);
return (error);
}
static int
nl_pru_rcvd(struct socket *so, int flags)
nl_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
struct mbuf **mp, struct mbuf **controlp, int *flagsp)
{
static const struct sockaddr_nl nl_empty_src = {
.nl_len = sizeof(struct sockaddr_nl),
.nl_family = PF_NETLINK,
.nl_pid = 0 /* comes from the kernel */
};
struct sockbuf *sb = &so->so_rcv;
struct nl_buf *nb;
int flags, error;
u_int overflow;
bool nonblock, trunc, peek;
MPASS(mp == NULL && uio != NULL);
NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
MPASS(sotonlpcb(so) != NULL);
if (psa != NULL)
*psa = sodupsockaddr((const struct sockaddr *)&nl_empty_src,
M_WAITOK);
flags = flagsp != NULL ? *flagsp & ~MSG_TRUNC : 0;
trunc = flagsp != NULL ? *flagsp & MSG_TRUNC : false;
nonblock = (so->so_state & SS_NBIO) ||
(flags & (MSG_DONTWAIT | MSG_NBIO));
peek = flags & MSG_PEEK;
error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
if (__predict_false(error))
return (error);
SOCK_RECVBUF_LOCK(so);
while ((nb = TAILQ_FIRST(&sb->nl_queue)) == NULL) {
if (nonblock) {
SOCK_RECVBUF_UNLOCK(so);
SOCK_IO_RECV_UNLOCK(so);
return (EWOULDBLOCK);
}
error = sbwait(so, SO_RCV);
if (error) {
SOCK_RECVBUF_UNLOCK(so);
SOCK_IO_RECV_UNLOCK(so);
return (error);
}
}
/*
* XXXGL
* Here we emulate a PR_ATOMIC behavior of soreceive_generic() where
* we take only the first "record" in the socket buffer and send it
* to uio whole or truncated ignoring how many netlink messages are
* in the record and how much space is left in the uio.
* This needs to be fixed at next refactoring. First, we should perform
* truncation only if the very first message doesn't fit into uio.
* That will help an application with small buffer not to lose data.
* Second, we should continue working on the sb->nl_queue as long as
* there is more space in the uio. That will boost applications with
* large buffers.
*/
if (__predict_true(!peek)) {
TAILQ_REMOVE(&sb->nl_queue, nb, tailq);
sb->sb_acc -= nb->datalen;
sb->sb_ccc -= nb->datalen;
}
SOCK_RECVBUF_UNLOCK(so);
overflow = __predict_false(nb->datalen > uio->uio_resid) ?
nb->datalen - uio->uio_resid : 0;
error = uiomove(nb->data, (int)nb->datalen, uio);
if (__predict_false(overflow > 0)) {
flags |= MSG_TRUNC;
if (trunc)
uio->uio_resid -= overflow;
}
if (controlp != NULL) {
*controlp = nb->control;
nb->control = NULL;
}
if (__predict_true(!peek))
nl_buf_free(nb);
if (uio->uio_td)
uio->uio_td->td_ru.ru_msgrcv++;
if (flagsp != NULL)
*flagsp |= flags;
SOCK_IO_RECV_UNLOCK(so);
nl_on_transmit(sotonlpcb(so));
return (0);
return (error);
}
static int
@ -798,8 +902,7 @@ nl_setsbopt(struct socket *so, struct sockopt *sopt)
}
#define NETLINK_PROTOSW \
.pr_flags = PR_ATOMIC | PR_ADDR | PR_WANTRCVD | \
PR_SOCKBUF, \
.pr_flags = PR_ATOMIC | PR_ADDR | PR_SOCKBUF, \
.pr_ctloutput = nl_ctloutput, \
.pr_setsbopt = nl_setsbopt, \
.pr_attach = nl_pru_attach, \
@ -807,7 +910,7 @@ nl_setsbopt(struct socket *so, struct sockopt *sopt)
.pr_connect = nl_pru_connect, \
.pr_disconnect = nl_pru_disconnect, \
.pr_sosend = nl_sosend, \
.pr_rcvd = nl_pru_rcvd, \
.pr_soreceive = nl_soreceive, \
.pr_shutdown = nl_pru_shutdown, \
.pr_sockaddr = nl_sockaddr, \
.pr_close = nl_close

View File

@ -111,7 +111,6 @@ static bool
get_stub_writer(struct nl_writer *nw)
{
bzero(nw, sizeof(*nw));
nw->writer_type = NS_WRITER_TYPE_STUB;
nw->enomem = true;
return (false);

View File

@ -51,69 +51,36 @@ _DECLARE_DEBUG(LOG_INFO);
* sending netlink data between the kernel and userland.
*/
static const struct sockaddr_nl _nl_empty_src = {
.nl_len = sizeof(struct sockaddr_nl),
.nl_family = PF_NETLINK,
.nl_pid = 0 /* comes from the kernel */
};
static const struct sockaddr *nl_empty_src = (const struct sockaddr *)&_nl_empty_src;
static bool nl_process_nbuf(struct nl_buf *nb, struct nlpcb *nlp);
static void
queue_push(struct nl_io_queue *q, struct mbuf *mq)
struct nl_buf *
nl_buf_alloc(size_t len, int mflag)
{
while (mq != NULL) {
struct mbuf *m = mq;
mq = mq->m_nextpkt;
m->m_nextpkt = NULL;
struct nl_buf *nb;
q->length += m_length(m, NULL);
STAILQ_INSERT_TAIL(&q->head, m, m_stailqpkt);
nb = malloc(sizeof(struct nl_buf) + len, M_NETLINK, mflag);
if (__predict_true(nb != NULL)) {
nb->buflen = len;
nb->datalen = nb->offset = 0;
nb->control = NULL;
}
}
static struct mbuf *
queue_pop(struct nl_io_queue *q)
{
if (!STAILQ_EMPTY(&q->head)) {
struct mbuf *m = STAILQ_FIRST(&q->head);
STAILQ_REMOVE_HEAD(&q->head, m_stailqpkt);
m->m_nextpkt = NULL;
q->length -= m_length(m, NULL);
return (m);
}
return (NULL);
}
static struct mbuf *
queue_head(const struct nl_io_queue *q)
{
return (STAILQ_FIRST(&q->head));
}
static inline bool
queue_empty(const struct nl_io_queue *q)
{
return (q->length == 0);
}
static void
queue_free(struct nl_io_queue *q)
{
while (!STAILQ_EMPTY(&q->head)) {
struct mbuf *m = STAILQ_FIRST(&q->head);
STAILQ_REMOVE_HEAD(&q->head, m_stailqpkt);
m->m_nextpkt = NULL;
m_freem(m);
}
q->length = 0;
return (nb);
}
void
nl_add_msg_info(struct mbuf *m)
nl_buf_free(struct nl_buf *nb)
{
if (nb->control)
m_freem(nb->control);
free(nb, M_NETLINK);
}
void
nl_add_msg_info(struct nl_buf *nb)
{
/* XXXGL pass nlp as arg? */
struct nlpcb *nlp = nl_get_thread_nlp(curthread);
NL_LOG(LOG_DEBUG2, "Trying to recover nlp from thread %p: %p",
curthread, nlp);
@ -139,27 +106,15 @@ nl_add_msg_info(struct mbuf *m)
};
while (m->m_next != NULL)
m = m->m_next;
m->m_next = sbcreatecontrol(data, sizeof(data),
nb->control = sbcreatecontrol(data, sizeof(data),
NETLINK_MSG_INFO, SOL_NETLINK, M_NOWAIT);
NL_LOG(LOG_DEBUG2, "Storing %u bytes of data, ctl: %p",
(unsigned)sizeof(data), m->m_next);
}
static __noinline struct mbuf *
extract_msg_info(struct mbuf *m)
{
while (m->m_next != NULL) {
if (m->m_next->m_type == MT_CONTROL) {
struct mbuf *ctl = m->m_next;
m->m_next = NULL;
return (ctl);
}
m = m->m_next;
}
return (NULL);
if (__predict_true(nb->control != NULL))
NL_LOG(LOG_DEBUG2, "Storing %u bytes of control data, ctl: %p",
(unsigned)sizeof(data), nb->control);
else
NL_LOG(LOG_DEBUG2, "Failed to allocate %u bytes of control",
(unsigned)sizeof(data));
}
void
@ -174,65 +129,31 @@ nl_schedule_taskqueue(struct nlpcb *nlp)
}
}
static bool
tx_check_locked(struct nlpcb *nlp)
{
if (queue_empty(&nlp->tx_queue))
return (true);
/*
* Check if something can be moved from the internal TX queue
* to the socket queue.
*/
bool appended = false;
struct sockbuf *sb = &nlp->nl_socket->so_rcv;
SOCKBUF_LOCK(sb);
while (true) {
struct mbuf *m = queue_head(&nlp->tx_queue);
if (m != NULL) {
struct mbuf *ctl = NULL;
if (__predict_false(m->m_next != NULL))
ctl = extract_msg_info(m);
if (sbappendaddr_locked(sb, nl_empty_src, m, ctl) != 0) {
/* appended successfully */
queue_pop(&nlp->tx_queue);
appended = true;
} else
break;
} else
break;
}
SOCKBUF_UNLOCK(sb);
if (appended)
sorwakeup(nlp->nl_socket);
return (queue_empty(&nlp->tx_queue));
}
static bool
nl_process_received_one(struct nlpcb *nlp)
{
struct socket *so = nlp->nl_socket;
struct sockbuf *sb = &so->so_snd;
struct sockbuf *sb;
struct nl_buf *nb;
bool reschedule = false;
NLP_LOCK(nlp);
nlp->nl_task_pending = false;
if (!tx_check_locked(nlp)) {
/* TX overflow queue still not empty, ignore RX */
NLP_UNLOCK(nlp);
return (false);
}
int prev_hiwat = nlp->tx_queue.hiwat;
NLP_UNLOCK(nlp);
/*
* Do not process queued up requests if there is no space to queue
* replies.
*/
sb = &so->so_rcv;
SOCK_RECVBUF_LOCK(so);
if (sb->sb_hiwat <= sb->sb_ccc) {
SOCK_RECVBUF_UNLOCK(so);
return (false);
}
SOCK_RECVBUF_UNLOCK(so);
sb = &so->so_snd;
SOCK_SENDBUF_LOCK(so);
while ((nb = TAILQ_FIRST(&sb->nl_queue)) != NULL) {
TAILQ_REMOVE(&sb->nl_queue, nb, tailq);
@ -244,7 +165,7 @@ nl_process_received_one(struct nlpcb *nlp)
sb->sb_ccc -= nb->datalen;
/* XXXGL: potentially can reduce lock&unlock count. */
sowwakeup_locked(so);
free(nb, M_NETLINK);
nl_buf_free(nb);
SOCK_SENDBUF_LOCK(so);
} else {
TAILQ_INSERT_HEAD(&sb->nl_queue, nb, tailq);
@ -252,10 +173,6 @@ nl_process_received_one(struct nlpcb *nlp)
}
}
SOCK_SENDBUF_UNLOCK(so);
if (nlp->tx_queue.hiwat > prev_hiwat) {
NLP_LOG(LOG_DEBUG, nlp, "TX override peaked to %d", nlp->tx_queue.hiwat);
}
return (reschedule);
}
@ -276,18 +193,6 @@ nl_process_received(struct nlpcb *nlp)
;
}
void
nl_init_io(struct nlpcb *nlp)
{
STAILQ_INIT(&nlp->tx_queue.head);
}
void
nl_free_io(struct nlpcb *nlp)
{
queue_free(&nlp->tx_queue);
}
/*
* Called after some data have been read from the socket.
*/
@ -306,8 +211,8 @@ nl_on_transmit(struct nlpcb *nlp)
struct sockbuf *sb = &so->so_rcv;
NLP_LOG(LOG_DEBUG, nlp,
"socket RX overflowed, %lu messages (%lu bytes) dropped. "
"bytes: [%u/%u] mbufs: [%u/%u]", dropped_messages, dropped_bytes,
sb->sb_ccc, sb->sb_hiwat, sb->sb_mbcnt, sb->sb_mbmax);
"bytes: [%u/%u]", dropped_messages, dropped_bytes,
sb->sb_ccc, sb->sb_hiwat);
/* TODO: send netlink message */
}
@ -325,95 +230,67 @@ nl_taskqueue_handler(void *_arg, int pending)
CURVNET_RESTORE();
}
static __noinline void
queue_push_tx(struct nlpcb *nlp, struct mbuf *m)
{
queue_push(&nlp->tx_queue, m);
nlp->nl_tx_blocked = true;
if (nlp->tx_queue.length > nlp->tx_queue.hiwat)
nlp->tx_queue.hiwat = nlp->tx_queue.length;
}
/*
* Tries to send @m to the socket @nlp.
*
* @m: mbuf(s) to send to. Consumed in any case.
* @nlp: socket to send to
* @cnt: number of messages in @m
* @io_flags: combination of NL_IOF_* flags
* Tries to send current data buffer from writer.
*
* Returns true on success.
* If no queue overrunes happened, wakes up socket owner.
*/
bool
nl_send_one(struct mbuf *m, struct nlpcb *nlp, int num_messages, int io_flags)
nl_send_one(struct nl_writer *nw)
{
bool untranslated = io_flags & NL_IOF_UNTRANSLATED;
bool ignore_limits = io_flags & NL_IOF_IGNORE_LIMIT;
bool result = true;
struct nlpcb *nlp = nw->nlp;
struct socket *so = nlp->nl_socket;
struct sockbuf *sb = &so->so_rcv;
struct nl_buf *nb;
MPASS(nw->hdr == NULL);
IF_DEBUG_LEVEL(LOG_DEBUG2) {
struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *);
struct nlmsghdr *hdr = (struct nlmsghdr *)nw->buf->data;
NLP_LOG(LOG_DEBUG2, nlp,
"TX mbuf len %u msgs %u msg type %d first hdrlen %u io_flags %X",
m_length(m, NULL), num_messages, hdr->nlmsg_type, hdr->nlmsg_len,
io_flags);
"TX len %u msgs %u msg type %d first hdrlen %u",
nw->buf->datalen, nw->num_messages, hdr->nlmsg_type,
hdr->nlmsg_len);
}
if (__predict_false(nlp->nl_linux && linux_netlink_p != NULL && untranslated)) {
m = linux_netlink_p->mbufs_to_linux(nlp->nl_proto, m, nlp);
if (m == NULL)
return (false);
}
NLP_LOCK(nlp);
if (__predict_false(nlp->nl_socket == NULL)) {
NLP_UNLOCK(nlp);
m_freem(m);
if (nlp->nl_linux && linux_netlink_p != NULL &&
__predict_false(!linux_netlink_p->msgs_to_linux(nw, nlp))) {
nl_buf_free(nw->buf);
nw->buf = NULL;
return (false);
}
if (!queue_empty(&nlp->tx_queue)) {
if (ignore_limits) {
queue_push_tx(nlp, m);
} else {
m_free(m);
result = false;
}
nb = nw->buf;
nw->buf = NULL;
SOCK_RECVBUF_LOCK(so);
if (!nw->ignore_limit && __predict_false(sb->sb_hiwat <= sb->sb_ccc)) {
SOCK_RECVBUF_UNLOCK(so);
NLP_LOCK(nlp);
nlp->nl_dropped_bytes += nb->datalen;
nlp->nl_dropped_messages += nw->num_messages;
NLP_LOG(LOG_DEBUG2, nlp, "RX oveflow: %lu m (+%d), %lu b (+%d)",
(unsigned long)nlp->nl_dropped_messages, nw->num_messages,
(unsigned long)nlp->nl_dropped_bytes, nb->datalen);
NLP_UNLOCK(nlp);
return (result);
}
struct socket *so = nlp->nl_socket;
struct mbuf *ctl = NULL;
if (__predict_false(m->m_next != NULL))
ctl = extract_msg_info(m);
if (sbappendaddr(&so->so_rcv, nl_empty_src, m, ctl) != 0) {
sorwakeup(so);
NLP_LOG(LOG_DEBUG3, nlp, "appended data & woken up");
nl_buf_free(nb);
return (false);
} else {
if (ignore_limits) {
queue_push_tx(nlp, m);
} else {
/*
* Store dropped data so it can be reported
* on the next read
*/
nlp->nl_dropped_bytes += m_length(m, NULL);
nlp->nl_dropped_messages += num_messages;
NLP_LOG(LOG_DEBUG2, nlp, "RX oveflow: %lu m (+%d), %lu b (+%d)",
(unsigned long)nlp->nl_dropped_messages, num_messages,
(unsigned long)nlp->nl_dropped_bytes, m_length(m, NULL));
soroverflow(so);
m_freem(m);
result = false;
}
}
NLP_UNLOCK(nlp);
bool full;
return (result);
TAILQ_INSERT_TAIL(&sb->nl_queue, nb, tailq);
sb->sb_acc += nb->datalen;
sb->sb_ccc += nb->datalen;
full = sb->sb_hiwat <= sb->sb_ccc;
sorwakeup_locked(so);
if (full) {
NLP_LOCK(nlp);
nlp->nl_tx_blocked = true;
NLP_UNLOCK(nlp);
}
return (true);
}
}
static int

View File

@ -27,6 +27,7 @@
#ifndef _NETLINK_LINUX_VAR_H_
#define _NETLINK_LINUX_VAR_H_
#ifdef _KERNEL
/*
* The file contains headers for the bridge interface between
@ -34,16 +35,13 @@
*/
struct nlpcb;
struct nl_pstate;
struct nl_writer;
typedef struct mbuf *mbufs_to_linux_cb_t(int netlink_family, struct mbuf *m,
struct nlpcb *nlp);
typedef struct mbuf *msgs_to_linux_cb_t(int netlink_family, char *buf, int data_length,
struct nlpcb *nlp);
typedef bool msgs_to_linux_cb_t(struct nl_writer *nw, struct nlpcb *nlp);
typedef struct nlmsghdr *msg_from_linux_cb_t(int netlink_family, struct nlmsghdr *hdr,
struct nl_pstate *npt);
struct linux_netlink_provider {
mbufs_to_linux_cb_t *mbufs_to_linux;
msgs_to_linux_cb_t *msgs_to_linux;
msg_from_linux_cb_t *msg_from_linux;
@ -52,3 +50,4 @@ struct linux_netlink_provider {
extern struct linux_netlink_provider *linux_netlink_p;
#endif
#endif

View File

@ -30,7 +30,6 @@
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <sys/mbuf.h>
#include <sys/ck.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/syslog.h>
@ -45,523 +44,44 @@
#include <netlink/netlink_debug.h>
_DECLARE_DEBUG(LOG_INFO);
/*
* The goal of this file is to provide convenient message writing KPI on top of
* different storage methods (mbufs, uio, temporary memory chunks).
*
* The main KPI guarantee is that the (last) message always resides in the contiguous
* memory buffer, so one is able to update the header after writing the entire message.
*
* This guarantee comes with a side effect of potentially reallocating underlying
* buffer, so one needs to update the desired pointers after something is added
* to the header.
*
* Messaging layer contains hooks performing transparent Linux translation for the messages.
*
* There are 3 types of supported targets:
* * socket (adds mbufs to the socket buffer, used for message replies)
* * group (sends mbuf/chain to the specified groups, used for the notifications)
* * chain (returns mbuf chain, used in Linux message translation code)
*
* There are 3 types of storage:
* * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message
* fits in NLMBUFSIZE)
* * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs
* to be larger than one supported by NS_WRITER_TYPE_MBUF)
* * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for
* Linux sockets, calls translation hook prior to sending messages to the socket).
*
* Internally, KPI switches between different types of storage when memory requirements
* change. It happens transparently to the caller.
*/
/*
* Uma zone for the mbuf-based Netlink storage
*/
static uma_zone_t nlmsg_zone;
static void
nl_free_mbuf_storage(struct mbuf *m)
{
uma_zfree(nlmsg_zone, m->m_ext.ext_buf);
}
static int
nl_setup_mbuf_storage(void *mem, int size, void *arg, int how __unused)
{
struct mbuf *m = (struct mbuf *)arg;
if (m != NULL)
m_extadd(m, mem, size, nl_free_mbuf_storage, NULL, NULL, 0, EXT_MOD_TYPE);
return (0);
}
static struct mbuf *
nl_get_mbuf_flags(int size, int malloc_flags, int mbuf_flags)
{
struct mbuf *m, *m_storage;
if (size <= MHLEN)
return (m_get2(size, malloc_flags, MT_DATA, mbuf_flags));
if (__predict_false(size > NLMBUFSIZE))
return (NULL);
m = m_gethdr(malloc_flags, MT_DATA);
if (m == NULL)
return (NULL);
m_storage = uma_zalloc_arg(nlmsg_zone, m, malloc_flags);
if (m_storage == NULL) {
m_free_raw(m);
return (NULL);
}
return (m);
}
static struct mbuf *
nl_get_mbuf(int size, int malloc_flags)
{
return (nl_get_mbuf_flags(size, malloc_flags, M_PKTHDR));
}
/*
* Gets a chain of Netlink mbufs.
* This is strip-down version of m_getm2()
*/
static struct mbuf *
nl_get_mbuf_chain(int len, int malloc_flags)
{
struct mbuf *m_chain = NULL, *m_tail = NULL;
int mbuf_flags = M_PKTHDR;
while (len > 0) {
int sz = len > NLMBUFSIZE ? NLMBUFSIZE: len;
struct mbuf *m = nl_get_mbuf_flags(sz, malloc_flags, mbuf_flags);
if (m == NULL) {
m_freem(m_chain);
return (NULL);
}
/* Book keeping. */
len -= M_SIZE(m);
if (m_tail != NULL)
m_tail->m_next = m;
else
m_chain = m;
m_tail = m;
mbuf_flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */
}
return (m_chain);
}
void
nl_init_msg_zone(void)
{
nlmsg_zone = uma_zcreate("netlink", NLMBUFSIZE, nl_setup_mbuf_storage,
NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
}
void
nl_destroy_msg_zone(void)
{
uma_zdestroy(nlmsg_zone);
}
typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok);
typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt);
struct nlwriter_ops {
nlwriter_op_init *init;
nlwriter_op_write *write_socket;
nlwriter_op_write *write_group;
nlwriter_op_write *write_chain;
};
/*
* NS_WRITER_TYPE_BUF
* Writes message to a temporary memory buffer,
* flushing to the socket/group when buffer size limit is reached
*/
static bool
nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok)
nlmsg_get_buf(struct nl_writer *nw, u_int len, bool waitok)
{
int mflag = waitok ? M_WAITOK : M_NOWAIT;
nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO);
if (__predict_false(nw->_storage == NULL))
const int mflag = waitok ? M_WAITOK : M_NOWAIT;
MPASS(nw->buf == NULL);
NL_LOG(LOG_DEBUG3, "Setting up nw %p len %u %s", nw, len,
waitok ? "wait" : "nowait");
nw->buf = nl_buf_alloc(len, mflag);
if (__predict_false(nw->buf == NULL))
return (false);
nw->alloc_len = size;
nw->offset = 0;
nw->hdr = NULL;
nw->data = nw->_storage;
nw->writer_type = NS_WRITER_TYPE_BUF;
nw->malloc_flag = mflag;
nw->num_messages = 0;
nw->enomem = false;
return (true);
}
static bool
nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
{
NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
if (__predict_false(datalen == 0)) {
free(buf, M_NETLINK);
return (true);
}
struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
if (__predict_false(m == NULL)) {
/* XXX: should we set sorcverr? */
free(buf, M_NETLINK);
return (false);
}
m_append(m, datalen, buf);
free(buf, M_NETLINK);
int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
}
static bool
nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
{
NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
nw->arg.group.proto, nw->arg.group.id);
if (__predict_false(datalen == 0)) {
free(buf, M_NETLINK);
return (true);
}
struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
if (__predict_false(m == NULL)) {
free(buf, M_NETLINK);
return (false);
}
bool success = m_append(m, datalen, buf) != 0;
free(buf, M_NETLINK);
if (!success)
return (false);
nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
return (true);
}
static bool
nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
{
struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
if (__predict_false(datalen == 0)) {
free(buf, M_NETLINK);
return (true);
}
if (*m0 == NULL) {
struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
if (__predict_false(m == NULL)) {
free(buf, M_NETLINK);
return (false);
}
*m0 = m;
}
if (__predict_false(m_append(*m0, datalen, buf) == 0)) {
free(buf, M_NETLINK);
return (false);
}
return (true);
}
/*
* NS_WRITER_TYPE_MBUF
* Writes message to the allocated mbuf,
* flushing to socket/group when mbuf size limit is reached.
* This is the most efficient mechanism as it avoids double-copying.
*
* Allocates a single mbuf suitable to store up to @size bytes of data.
* If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr.
* If the size <= NLMBUFSIZE (2k), allocate mbuf+storage out of nlmsg_zone.
* Returns NULL on greater size or the allocation failure.
*/
static bool
nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok)
{
int mflag = waitok ? M_WAITOK : M_NOWAIT;
struct mbuf *m = nl_get_mbuf(size, mflag);
if (__predict_false(m == NULL))
return (false);
nw->alloc_len = M_TRAILINGSPACE(m);
nw->offset = 0;
nw->hdr = NULL;
nw->_storage = (void *)m;
nw->data = mtod(m, void *);
nw->writer_type = NS_WRITER_TYPE_MBUF;
nw->malloc_flag = mflag;
nw->num_messages = 0;
nw->enomem = false;
memset(nw->data, 0, size);
NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p",
m, size, nw->alloc_len, nw->data);
return (true);
}
static bool
nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
{
struct mbuf *m = (struct mbuf *)buf;
NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
if (__predict_false(datalen == 0)) {
m_freem(m);
return (true);
}
m->m_pkthdr.len = datalen;
m->m_len = datalen;
int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
}
static bool
nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
{
struct mbuf *m = (struct mbuf *)buf;
NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
nw->arg.group.proto, nw->arg.group.id);
if (__predict_false(datalen == 0)) {
m_freem(m);
return (true);
}
m->m_pkthdr.len = datalen;
m->m_len = datalen;
nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
return (true);
}
static bool
nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
{
struct mbuf *m_new = (struct mbuf *)buf;
struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
if (__predict_false(datalen == 0)) {
m_freem(m_new);
return (true);
}
m_new->m_pkthdr.len = datalen;
m_new->m_len = datalen;
if (*m0 == NULL) {
*m0 = m_new;
} else {
struct mbuf *m_last;
for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next)
;
m_last->m_next = m_new;
(*m0)->m_pkthdr.len += datalen;
}
return (true);
}
/*
* NS_WRITER_TYPE_LBUF
* Writes message to the allocated memory buffer,
* flushing to socket/group when mbuf size limit is reached.
* Calls linux handler to rewrite messages before sending to the socket.
*/
static bool
nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok)
{
int mflag = waitok ? M_WAITOK : M_NOWAIT;
size = roundup2(size, sizeof(void *));
int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE;
char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO);
if (__predict_false(buf == NULL))
return (false);
/* Fill buffer header first */
struct linear_buffer *lb = (struct linear_buffer *)buf;
lb->base = &buf[sizeof(struct linear_buffer) + size];
lb->size = size + SCRATCH_BUFFER_SIZE;
nw->alloc_len = size;
nw->offset = 0;
nw->hdr = NULL;
nw->_storage = buf;
nw->data = (char *)(lb + 1);
nw->malloc_flag = mflag;
nw->writer_type = NS_WRITER_TYPE_LBUF;
nw->num_messages = 0;
nw->enomem = false;
return (true);
}
static bool
nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
{
struct linear_buffer *lb = (struct linear_buffer *)buf;
char *data = (char *)(lb + 1);
struct nlpcb *nlp = (struct nlpcb *)(nw->arg.ptr);
if (__predict_false(datalen == 0)) {
free(buf, M_NETLINK);
return (true);
}
struct mbuf *m = NULL;
if (linux_netlink_p != NULL)
m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp);
free(buf, M_NETLINK);
if (__predict_false(m == NULL)) {
/* XXX: should we set sorcverr? */
return (false);
}
int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
return (nl_send_one(m, nlp, cnt, io_flags));
}
/* Shouldn't be called (maybe except Linux code originating message) */
static bool
nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
{
struct linear_buffer *lb = (struct linear_buffer *)buf;
char *data = (char *)(lb + 1);
if (__predict_false(datalen == 0)) {
free(buf, M_NETLINK);
return (true);
}
struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
if (__predict_false(m == NULL)) {
free(buf, M_NETLINK);
return (false);
}
m_append(m, datalen, data);
free(buf, M_NETLINK);
nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
return (true);
}
static const struct nlwriter_ops nlmsg_writers[] = {
/* NS_WRITER_TYPE_MBUF */
{
.init = nlmsg_get_ns_mbuf,
.write_socket = nlmsg_write_socket_mbuf,
.write_group = nlmsg_write_group_mbuf,
.write_chain = nlmsg_write_chain_mbuf,
},
/* NS_WRITER_TYPE_BUF */
{
.init = nlmsg_get_ns_buf,
.write_socket = nlmsg_write_socket_buf,
.write_group = nlmsg_write_group_buf,
.write_chain = nlmsg_write_chain_buf,
},
/* NS_WRITER_TYPE_LBUF */
{
.init = nlmsg_get_ns_lbuf,
.write_socket = nlmsg_write_socket_lbuf,
.write_group = nlmsg_write_group_lbuf,
},
};
static void
nlmsg_set_callback(struct nl_writer *nw)
{
const struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type];
switch (nw->writer_target) {
case NS_WRITER_TARGET_SOCKET:
nw->cb = pops->write_socket;
break;
case NS_WRITER_TARGET_GROUP:
nw->cb = pops->write_group;
break;
case NS_WRITER_TARGET_CHAIN:
nw->cb = pops->write_chain;
break;
default:
panic("not implemented");
}
}
static bool
nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok)
{
MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0]));
NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type);
return (nlmsg_writers[type].init(nw, size, waitok));
}
static bool
nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux)
{
int type;
if (!is_linux) {
if (__predict_true(size <= NLMBUFSIZE))
type = NS_WRITER_TYPE_MBUF;
else
type = NS_WRITER_TYPE_BUF;
} else
type = NS_WRITER_TYPE_LBUF;
return (nlmsg_get_buf_type(nw, size, type, waitok));
}
bool
_nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp)
{
if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux))
return (false);
nw->arg.ptr = (void *)nlp;
nw->writer_target = NS_WRITER_TARGET_SOCKET;
nlmsg_set_callback(nw);
return (true);
nw->nlp = nlp;
nw->cb = nl_send_one;
return (nlmsg_get_buf(nw, size, false));
}
bool
_nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id)
{
if (!nlmsg_get_buf(nw, size, false, false))
return (false);
nw->arg.group.proto = protocol;
nw->arg.group.id = group_id;
nw->writer_target = NS_WRITER_TARGET_GROUP;
nlmsg_set_callback(nw);
return (true);
}
nw->group.proto = protocol;
nw->group.id = group_id;
nw->cb = nl_send_group;
bool
_nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm)
{
if (!nlmsg_get_buf(nw, size, false, false))
return (false);
*pm = NULL;
nw->arg.ptr = (void *)pm;
nw->writer_target = NS_WRITER_TARGET_CHAIN;
nlmsg_set_callback(nw);
NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf);
return (true);
return (nlmsg_get_buf(nw, size, false));
}
void
@ -576,18 +96,18 @@ _nlmsg_flush(struct nl_writer *nw)
if (__predict_false(nw->hdr != NULL)) {
/* Last message has not been completed, skip it. */
int completed_len = (char *)nw->hdr - nw->data;
int completed_len = (char *)nw->hdr - nw->buf->data;
/* Send completed messages */
nw->offset -= nw->offset - completed_len;
nw->buf->datalen -= nw->buf->datalen - completed_len;
nw->hdr = NULL;
}
}
NL_LOG(LOG_DEBUG2, "OUT");
bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages);
nw->_storage = NULL;
bool result = nw->cb(nw);
nw->num_messages = 0;
if (!result) {
NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb);
NL_LOG(LOG_DEBUG, "nw %p flush with %p() failed", nw, nw->cb);
}
return (result);
@ -599,59 +119,61 @@ _nlmsg_flush(struct nl_writer *nw)
* Return true on success.
*/
bool
_nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
_nlmsg_refill_buffer(struct nl_writer *nw, u_int required_len)
{
struct nl_writer ns_new = {};
int completed_len, new_len;
struct nl_buf *new;
u_int completed_len, new_len, last_len;
MPASS(nw->buf != NULL);
if (nw->enomem)
return (false);
NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim",
nw->offset, nw->alloc_len, required_len);
NL_LOG(LOG_DEBUG3, "no space at offset %u/%u (want %u), trying to "
"reclaim", nw->buf->datalen, nw->buf->buflen, required_len);
/* Calculated new buffer size and allocate it s*/
completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset;
/* Calculate new buffer size and allocate it. */
completed_len = (nw->hdr != NULL) ?
(char *)nw->hdr - nw->buf->data : nw->buf->datalen;
if (completed_len > 0 && required_len < NLMBUFSIZE) {
/* We already ran out of space, use the largest effective size */
new_len = max(nw->alloc_len, NLMBUFSIZE);
/* We already ran out of space, use largest effective size. */
new_len = max(nw->buf->buflen, NLMBUFSIZE);
} else {
if (nw->alloc_len < NLMBUFSIZE)
if (nw->buf->buflen < NLMBUFSIZE)
/* XXXGL: does this happen? */
new_len = NLMBUFSIZE;
else
new_len = nw->alloc_len * 2;
new_len = nw->buf->buflen * 2;
while (new_len < required_len)
new_len *= 2;
}
bool waitok = (nw->malloc_flag == M_WAITOK);
bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF);
if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) {
new = nl_buf_alloc(new_len, nw->malloc_flag | M_ZERO);
if (__predict_false(new == NULL)) {
nw->enomem = true;
NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM");
return (false);
}
if (nw->ignore_limit)
nlmsg_ignore_limit(&ns_new);
/* Update callback data */
ns_new.writer_target = nw->writer_target;
nlmsg_set_callback(&ns_new);
ns_new.arg = nw->arg;
/* Copy last (unfinished) header to the new storage */
int last_len = nw->offset - completed_len;
/* Copy last (unfinished) header to the new storage. */
last_len = nw->buf->datalen - completed_len;
if (last_len > 0) {
memcpy(ns_new.data, nw->hdr, last_len);
ns_new.hdr = (struct nlmsghdr *)ns_new.data;
ns_new.offset = last_len;
memcpy(new->data, nw->hdr, last_len);
new->datalen = last_len;
}
NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len);
NL_LOG(LOG_DEBUG2, "completed: %u bytes, copied: %u bytes",
completed_len, last_len);
/* Flush completed headers & switch to the new nw */
nlmsg_flush(nw);
memcpy(nw, &ns_new, sizeof(struct nl_writer));
NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len);
if (completed_len > 0) {
nlmsg_flush(nw);
MPASS(nw->buf == NULL);
} else
nl_buf_free(nw->buf);
nw->buf = new;
nw->hdr = (last_len > 0) ? (struct nlmsghdr *)new->data : NULL;
NL_LOG(LOG_DEBUG2, "switched buffer: used %u/%u bytes",
new->datalen, new->buflen);
return (true);
}
@ -660,17 +182,20 @@ bool
_nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
uint16_t flags, uint32_t len)
{
struct nl_buf *nb = nw->buf;
struct nlmsghdr *hdr;
u_int required_len;
MPASS(nw->hdr == NULL);
int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr));
if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr));
if (__predict_false(nb->datalen + required_len > nb->buflen)) {
if (!nlmsg_refill_buffer(nw, required_len))
return (false);
nb = nw->buf;
}
hdr = (struct nlmsghdr *)(&nw->data[nw->offset]);
hdr = (struct nlmsghdr *)(&nb->data[nb->datalen]);
hdr->nlmsg_len = len;
hdr->nlmsg_type = type;
@ -679,7 +204,7 @@ _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
hdr->nlmsg_pid = portid;
nw->hdr = hdr;
nw->offset += sizeof(struct nlmsghdr);
nb->datalen += sizeof(struct nlmsghdr);
return (true);
}
@ -687,6 +212,8 @@ _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
bool
_nlmsg_end(struct nl_writer *nw)
{
struct nl_buf *nb = nw->buf;
MPASS(nw->hdr != NULL);
if (nw->enomem) {
@ -695,7 +222,7 @@ _nlmsg_end(struct nl_writer *nw)
return (false);
}
nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr);
nw->hdr->nlmsg_len = nb->data + nb->datalen - (char *)nw->hdr;
NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags,
nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid);
@ -707,8 +234,10 @@ _nlmsg_end(struct nl_writer *nw)
void
_nlmsg_abort(struct nl_writer *nw)
{
struct nl_buf *nb = nw->buf;
if (nw->hdr != NULL) {
nw->offset = (uint32_t)((char *)nw->hdr - nw->data);
nb->datalen = (char *)nw->hdr - nb->data;
nw->hdr = NULL;
}
}
@ -775,7 +304,7 @@ _nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
/* Save operation result */
int *perror = nlmsg_reserve_object(nw, int);
NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error,
nw->offset, perror);
nw->buf->datalen, perror);
*perror = error;
nlmsg_end(nw);
nw->suppress_ack = true;
@ -787,40 +316,47 @@ _nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
* KPI functions.
*/
int
u_int
nlattr_save_offset(const struct nl_writer *nw)
{
return (nw->offset - ((char *)nw->hdr - nw->data));
return (nw->buf->datalen - ((char *)nw->hdr - nw->buf->data));
}
void *
nlmsg_reserve_data_raw(struct nl_writer *nw, size_t sz)
{
sz = NETLINK_ALIGN(sz);
struct nl_buf *nb = nw->buf;
void *data;
if (__predict_false(nw->offset + sz > nw->alloc_len)) {
sz = NETLINK_ALIGN(sz);
if (__predict_false(nb->datalen + sz > nb->buflen)) {
if (!nlmsg_refill_buffer(nw, sz))
return (NULL);
nb = nw->buf;
}
void *data_ptr = &nw->data[nw->offset];
nw->offset += sz;
bzero(data_ptr, sz);
data = &nb->data[nb->datalen];
bzero(data, sz);
nb->datalen += sz;
return (data_ptr);
return (data);
}
bool
nlattr_add(struct nl_writer *nw, int attr_type, int attr_len, const void *data)
{
int required_len = NLA_ALIGN(attr_len + sizeof(struct nlattr));
struct nl_buf *nb = nw->buf;
struct nlattr *nla;
u_int required_len;
if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
required_len = NLA_ALIGN(attr_len + sizeof(struct nlattr));
if (__predict_false(nb->datalen + required_len > nb->buflen)) {
if (!nlmsg_refill_buffer(nw, required_len))
return (false);
nb = nw->buf;
}
struct nlattr *nla = (struct nlattr *)(&nw->data[nw->offset]);
nla = (struct nlattr *)(&nb->data[nb->datalen]);
nla->nla_len = attr_len + sizeof(struct nlattr);
nla->nla_type = attr_type;
@ -831,7 +367,7 @@ nlattr_add(struct nl_writer *nw, int attr_type, int attr_len, const void *data)
}
memcpy((nla + 1), data, attr_len);
}
nw->offset += required_len;
nb->datalen += required_len;
return (true);
}

View File

@ -37,60 +37,41 @@
* It is not meant to be included directly
*/
struct mbuf;
struct nl_buf;
struct nl_writer;
typedef bool nl_writer_cb(struct nl_writer *nw, void *buf, int buflen, int cnt);
typedef bool nl_writer_cb(struct nl_writer *nw);
struct nl_writer {
int alloc_len; /* allocated buffer length */
int offset; /* offset from the start of the buffer */
struct nlmsghdr *hdr; /* Pointer to the currently-filled msg */
char *data; /* pointer to the contiguous storage */
void *_storage; /* Underlying storage pointer */
nl_writer_cb *cb; /* Callback to flush data */
struct nl_buf *buf; /* Underlying storage pointer */
struct nlmsghdr *hdr; /* Pointer to the currently-filled msg */
nl_writer_cb *cb; /* Callback to flush data */
union {
void *ptr;
struct nlpcb *nlp;
struct {
uint16_t proto;
uint16_t id;
} group;
} arg;
int num_messages; /* Number of messages in the buffer */
int malloc_flag; /* M_WAITOK or M_NOWAIT */
uint8_t writer_type; /* NS_WRITER_TYPE_* */
uint8_t writer_target; /* NS_WRITER_TARGET_* */
bool ignore_limit; /* If true, ignores RCVBUF limit */
bool enomem; /* True if ENOMEM occured */
bool suppress_ack; /* If true, don't send NLMSG_ERR */
};
u_int num_messages; /* Number of messages in the buffer */
int malloc_flag; /* M_WAITOK or M_NOWAIT */
bool ignore_limit; /* If true, ignores RCVBUF limit */
bool enomem; /* True if ENOMEM occured */
bool suppress_ack; /* If true, don't send NLMSG_ERR */
};
#define NS_WRITER_TARGET_SOCKET 0
#define NS_WRITER_TARGET_GROUP 1
#define NS_WRITER_TARGET_CHAIN 2
#define NS_WRITER_TYPE_MBUF 0
#define NS_WRITER_TYPE_BUF 1
#define NS_WRITER_TYPE_LBUF 2
#define NS_WRITER_TYPE_MBUFC 3
#define NS_WRITER_TYPE_STUB 4
#define NLMSG_SMALL 128
#define NLMSG_LARGE 2048
/* Message and attribute writing */
struct nlpcb;
#if defined(NETLINK) || defined(NETLINK_MODULE)
/* Provide optimized calls to the functions inside the same linking unit */
bool _nlmsg_get_unicast_writer(struct nl_writer *nw, int expected_size, struct nlpcb *nlp);
bool _nlmsg_get_group_writer(struct nl_writer *nw, int expected_size, int proto, int group_id);
bool _nlmsg_get_chain_writer(struct nl_writer *nw, int expected_size, struct mbuf **pm);
bool _nlmsg_flush(struct nl_writer *nw);
void _nlmsg_ignore_limit(struct nl_writer *nw);
bool _nlmsg_refill_buffer(struct nl_writer *nw, int required_size);
bool _nlmsg_refill_buffer(struct nl_writer *nw, u_int required_len);
bool _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
uint16_t flags, uint32_t len);
bool _nlmsg_end(struct nl_writer *nw);
@ -111,12 +92,6 @@ nlmsg_get_group_writer(struct nl_writer *nw, int expected_size, int proto, int g
return (_nlmsg_get_group_writer(nw, expected_size, proto, group_id));
}
static inline bool
nlmsg_get_chain_writer(struct nl_writer *nw, int expected_size, struct mbuf **pm)
{
return (_nlmsg_get_chain_writer(nw, expected_size, pm));
}
static inline bool
nlmsg_flush(struct nl_writer *nw)
{
@ -186,8 +161,6 @@ nlmsg_reply(struct nl_writer *nw, const struct nlmsghdr *hdr, int payload_len)
hdr->nlmsg_flags, payload_len));
}
#define nlmsg_data(_hdr) ((void *)((_hdr) + 1))
/*
* KPI similar to mtodo():
* current (uncompleted) header is guaranteed to be contiguous,

View File

@ -181,7 +181,6 @@ const static struct nl_function_wrapper nl_module = {
.nlmsg_abort = _nlmsg_abort,
.nlmsg_get_unicast_writer = _nlmsg_get_unicast_writer,
.nlmsg_get_group_writer = _nlmsg_get_group_writer,
.nlmsg_get_chain_writer = _nlmsg_get_chain_writer,
.nlmsg_end_dump = _nlmsg_end_dump,
.nl_modify_ifp_generic = _nl_modify_ifp_generic,
.nl_store_ifp_cookie = _nl_store_ifp_cookie,
@ -219,7 +218,6 @@ netlink_modevent(module_t mod __unused, int what, void *priv __unused)
switch (what) {
case MOD_LOAD:
NL_LOG(LOG_DEBUG2, "Loading");
nl_init_msg_zone();
nl_osd_register();
#if !defined(NETLINK) && defined(NETLINK_MODULE)
nl_set_functions(&nl_module);
@ -235,7 +233,6 @@ netlink_modevent(module_t mod __unused, int what, void *priv __unused)
nl_set_functions(NULL);
#endif
nl_osd_unregister();
nl_destroy_msg_zone();
} else
ret = EBUSY;
break;

View File

@ -43,14 +43,9 @@
struct ucred;
struct nl_io_queue {
STAILQ_HEAD(, mbuf) head;
int length;
int hiwat;
};
struct nl_buf {
TAILQ_ENTRY(nl_buf) tailq;
struct mbuf *control;
u_int buflen;
u_int datalen;
u_int offset;
@ -72,7 +67,6 @@ struct nlpcb {
bool nl_linux; /* true if running under compat */
bool nl_unconstrained_vnet; /* true if running under VNET jail (or without jail) */
bool nl_need_thread_setup;
struct nl_io_queue tx_queue;
struct taskqueue *nl_taskqueue;
struct task nl_task;
struct ucred *nl_cred; /* Copy of nl_socket->so_cred */
@ -131,7 +125,7 @@ struct nl_proto_handler {
extern struct nl_proto_handler *nl_handlers;
/* netlink_domain.c */
void nl_send_group(struct mbuf *m, int cnt, int proto, int group_id);
bool nl_send_group(struct nl_writer *);
void nl_osd_register(void);
void nl_osd_unregister(void);
void nl_set_thread_nlp(struct thread *td, struct nlpcb *nlp);
@ -139,22 +133,18 @@ void nl_set_thread_nlp(struct thread *td, struct nlpcb *nlp);
/* netlink_io.c */
#define NL_IOF_UNTRANSLATED 0x01
#define NL_IOF_IGNORE_LIMIT 0x02
bool nl_send_one(struct mbuf *m, struct nlpcb *nlp, int cnt, int io_flags);
bool nl_send_one(struct nl_writer *);
void nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *nlmsg,
struct nl_pstate *npt);
void nl_on_transmit(struct nlpcb *nlp);
void nl_init_io(struct nlpcb *nlp);
void nl_free_io(struct nlpcb *nlp);
void nl_taskqueue_handler(void *_arg, int pending);
void nl_schedule_taskqueue(struct nlpcb *nlp);
void nl_process_receive_locked(struct nlpcb *nlp);
void nl_set_source_metadata(struct mbuf *m, int num_messages);
void nl_add_msg_info(struct mbuf *m);
/* netlink_message_writer.c */
void nl_init_msg_zone(void);
void nl_destroy_msg_zone(void);
void nl_add_msg_info(struct nl_buf *nb);
struct nl_buf *nl_buf_alloc(size_t len, int mflag);
void nl_buf_free(struct nl_buf *nb);
/* netlink_generic.c */
struct genl_family {

View File

@ -556,9 +556,8 @@ dump_rtentry(struct rtentry *rt, void *_arg)
IF_DEBUG_LEVEL(LOG_DEBUG3) {
char rtbuf[INET6_ADDRSTRLEN + 5];
FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family,
"Dump %s, offset %u, error %d",
rt_print_buf(rt, rtbuf, sizeof(rtbuf)),
wa->nw->offset, error);
"Dump %s, error %d",
rt_print_buf(rt, rtbuf, sizeof(rtbuf)), error);
}
wa->error = error;
@ -578,7 +577,6 @@ dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family)
FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d",
wa->count, wa->dumped);
NL_LOG(LOG_DEBUG2, "Current offset: %d", wa->nw->offset);
}
static int

View File

@ -4,19 +4,11 @@
from atf_python.ktest import BaseKernelTest
from atf_python.sys.netlink.attrs import NlAttrU32
M_NOWAIT = 1
M_WAITOK = 2
NS_WRITER_TYPE_MBUF = 0
NS_WRITER_TYPE_BUF = 1
NS_WRITER_TYPE_LBUF = 1
MHLEN = 160
MCLBYTES = 2048 # XXX: may differ on some archs?
MJUMPAGESIZE = mmap.PAGESIZE
MJUM9BYTES = 9 * 1024
MJUM16BYTES = 16 * 1024
NLMSG_SMALL = 128
NLMSG_LARGE = 2048
class TestNetlinkMessageWriter(BaseKernelTest):
KTEST_MODULE_NAME = "ktest_netlink_message_writer"
@ -28,52 +20,20 @@ class TestNetlinkMessageWriter(BaseKernelTest):
pytest.param(M_WAITOK, id="WAITOK"),
],
)
@pytest.mark.parametrize(
"writer_type",
[
pytest.param(NS_WRITER_TYPE_MBUF, id="MBUF"),
pytest.param(NS_WRITER_TYPE_BUF, id="BUF"),
],
)
@pytest.mark.parametrize(
"sz",
[
pytest.param([160, 160], id="MHLEN"),
pytest.param([MCLBYTES, MCLBYTES], id="MCLBYTES"),
pytest.param([NLMSG_SMALL, NLMSG_SMALL], id="NLMSG_SMALL"),
pytest.param([NLMSG_LARGE, NLMSG_LARGE], id="NLMSG_LARGE"),
pytest.param([NLMSG_LARGE + 256, NLMSG_LARGE + 256], id="NLMSG_LARGE+256"),
],
)
def test_mbuf_writer_allocation(self, sz, writer_type, malloc_flags):
def test_nlbuf_writer_allocation(self, sz, malloc_flags):
"""override to parametrize"""
test_meta = [
NlAttrU32(1, sz[0]), # size
NlAttrU32(2, sz[1]), # expected_avail
NlAttrU32(4, writer_type),
NlAttrU32(5, malloc_flags),
]
self.runtest(test_meta)
@pytest.mark.parametrize(
"malloc_flags",
[
pytest.param(M_NOWAIT, id="NOWAIT"),
pytest.param(M_WAITOK, id="WAITOK"),
],
)
@pytest.mark.parametrize(
"sz",
[
pytest.param([160, 160, 1], id="MHLEN"),
pytest.param([MCLBYTES, MCLBYTES, 1], id="MCLBYTES"),
pytest.param([MCLBYTES + 1, MCLBYTES + 1, 2], id="MCLBYTES_MHLEN"),
pytest.param([MCLBYTES + 256, MCLBYTES * 2, 2], id="MCLBYTESx2"),
],
)
def test_mbuf_chain_allocation(self, sz, malloc_flags):
test_meta = [
NlAttrU32(1, sz[0]), # size
NlAttrU32(2, sz[1]), # expected_avail
NlAttrU32(3, sz[2]), # expected_count
NlAttrU32(5, malloc_flags),
NlAttrU32(3, malloc_flags),
]
self.runtest(test_meta)