mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-17 15:27:36 +00:00
update to the latest netmap snapshot.
This includes the following: - use separate memory regions for VALE ports - locking fixes - some simplifications in the NIC-specific routines - performance improvements for the VALE switch - some new features in the pkt-gen test program - documentation updates There are small API changes that require programs to be recompiled (NETMAP_API has been bumped so you will detect old binaries at runtime). In particular: - struct netmap_slot now is 16 bytes to support an extra pointer, which may save one data copy when using VALE ports or VMs; - the struct netmap_if has two extra fields; MFC after: 3 days
This commit is contained in:
parent
a09968c479
commit
ce3ee1e7c4
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=257529
@ -1,4 +1,4 @@
|
||||
.\" Copyright (c) 2011 Matteo Landi, Luigi Rizzo, Universita` di Pisa
|
||||
.\" Copyright (c) 2011-2013 Matteo Landi, Luigi Rizzo, Universita` di Pisa
|
||||
.\" All rights reserved.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
@ -21,14 +21,13 @@
|
||||
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\"
|
||||
.\" This document is derived in part from the enet man page (enet.4)
|
||||
.\" distributed with 4.3BSD Unix.
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\" $Id: netmap.4 11563 2012-08-02 08:59:12Z luigi $: stable/8/share/man/man4/bpf.4 181694 2008-08-13 17:45:06Z ed $
|
||||
.\"
|
||||
.Dd September 23, 2013
|
||||
.Dd October 18, 2013
|
||||
.Dt NETMAP 4
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -38,101 +37,230 @@
|
||||
.Cd device netmap
|
||||
.Sh DESCRIPTION
|
||||
.Nm
|
||||
is a framework for fast and safe access to network devices
|
||||
(reaching 14.88 Mpps at less than 1 GHz).
|
||||
is a framework for extremely fast and efficient packet I/O
|
||||
(reaching 14.88 Mpps with a single core at less than 1 GHz)
|
||||
for both userspace and kernel clients.
|
||||
Userspace clients can use the netmap API
|
||||
to send and receive raw packets through physical interfaces
|
||||
or ports of the
|
||||
.Xr VALE 4
|
||||
switch.
|
||||
.Pp
|
||||
.Nm VALE
|
||||
is a very fast (reaching 20 Mpps per port)
|
||||
and modular software switch,
|
||||
implemented within the kernel, which can interconnect
|
||||
virtual ports, physical devices, and the native host stack.
|
||||
.Pp
|
||||
.Nm
|
||||
uses memory mapped buffers and metadata
|
||||
(buffer indexes and lengths) to communicate with the kernel,
|
||||
which is in charge of validating information through
|
||||
.Pa ioctl()
|
||||
and
|
||||
.Pa select()/poll().
|
||||
uses a memory mapped region to share packet buffers,
|
||||
descriptors and queues with the kernel.
|
||||
Simple
|
||||
.Pa ioctl()s
|
||||
are used to bind interfaces/ports to file descriptors and
|
||||
implement non-blocking I/O, whereas blocking I/O uses
|
||||
.Pa select()/poll() .
|
||||
.Nm
|
||||
can exploit the parallelism in multiqueue devices and
|
||||
multicore systems.
|
||||
.Pp
|
||||
For the best performance,
|
||||
.Nm
|
||||
requires explicit support in device drivers.
|
||||
For a list of supported devices, see the end of this manual page.
|
||||
.Sh OPERATION
|
||||
requires explicit support in device drivers;
|
||||
a generic emulation layer is available to implement the
|
||||
.Nm
|
||||
clients must first open the
|
||||
API on top of unmodified device drivers,
|
||||
at the price of reduced performance
|
||||
(but still better than what can be achieved with
|
||||
sockets or BPF/pcap).
|
||||
.Pp
|
||||
For a list of devices with native
|
||||
.Nm
|
||||
support, see the end of this manual page.
|
||||
.Pp
|
||||
.Sh OPERATION - THE NETMAP API
|
||||
.Nm
|
||||
clients must first
|
||||
.Pa open("/dev/netmap") ,
|
||||
and then issue an
|
||||
.Pa ioctl(...,NIOCREGIF,...)
|
||||
to bind the file descriptor to a network device.
|
||||
.Pp
|
||||
When a device is put in
|
||||
.Pa ioctl(fd, NIOCREGIF, (struct nmreq *)arg)
|
||||
to bind the file descriptor to a specific interface or port.
|
||||
.Nm
|
||||
mode, its data path is disconnected from the host stack.
|
||||
The processes owning the file descriptor
|
||||
can exchange packets with the device, or with the host stack,
|
||||
through an mmapped memory region that contains pre-allocated
|
||||
buffers and metadata.
|
||||
has multiple modes of operation controlled by the
|
||||
content of the
|
||||
.Pa struct nmreq
|
||||
passed to the
|
||||
.Pa ioctl() .
|
||||
In particular, the
|
||||
.Em nr_name
|
||||
field specifies whether the client operates on a physical network
|
||||
interface or on a port of a
|
||||
.Nm VALE
|
||||
switch, as indicated below. Additional fields in the
|
||||
.Pa struct nmreq
|
||||
control the details of operation.
|
||||
.Pp
|
||||
.Bl -tag -width XXXX
|
||||
.It Dv Interface name (e.g. 'em0', 'eth1', ... )
|
||||
The data path of the interface is disconnected from the host stack.
|
||||
Depending on additional arguments,
|
||||
the file descriptor is bound to the NIC (one or all queues),
|
||||
or to the host stack.
|
||||
.It Dv valeXXX:YYY (arbitrary XXX and YYY)
|
||||
The file descriptor is bound to port YYY of a VALE switch called XXX,
|
||||
where XXX and YYY are arbitrary alphanumeric strings.
|
||||
The string cannot exceed IFNAMSIZ characters, and YYY cannot
|
||||
matching the name of any existing interface.
|
||||
.Pp
|
||||
The switch and the port are created if not existing.
|
||||
.It Dv valeXXX:ifname (ifname is an existing interface)
|
||||
Flags in the argument control whether the physical interface
|
||||
(and optionally the corrisponding host stack endpoint)
|
||||
are connected or disconnected from the VALE switch named XXX.
|
||||
.Pp
|
||||
In this case the
|
||||
.Pa ioctl()
|
||||
is used only for configuring the VALE switch, typically through the
|
||||
.Nm vale-ctl
|
||||
command.
|
||||
The file descriptor cannot be used for I/O, and should be
|
||||
.Pa close()d
|
||||
after issuing the
|
||||
.Pa ioctl().
|
||||
.El
|
||||
.Pp
|
||||
The binding can be removed (and the interface returns to
|
||||
regular operation, or the virtual port destroyed) with a
|
||||
.Pa close()
|
||||
on the file descriptor.
|
||||
.Pp
|
||||
The processes owning the file descriptor can then
|
||||
.Pa mmap()
|
||||
the memory region that contains pre-allocated
|
||||
buffers, descriptors and queues, and use them to
|
||||
read/write raw packets.
|
||||
Non blocking I/O is done with special
|
||||
.Pa ioctl()'s ,
|
||||
whereas the file descriptor can be passed to
|
||||
.Pa select()/poll()
|
||||
to be notified about incoming packet or available transmit buffers.
|
||||
.Ss Data structures
|
||||
All data structures for all devices in
|
||||
.Ss DATA STRUCTURES
|
||||
The data structures in the mmapped memory are described below
|
||||
(see
|
||||
.Xr sys/net/netmap.h
|
||||
for reference).
|
||||
All physical devices operating in
|
||||
.Nm
|
||||
mode are in a memory
|
||||
region shared by the kernel and all processes
|
||||
who open
|
||||
mode use the same memory region,
|
||||
shared by the kernel and all processes who own
|
||||
.Pa /dev/netmap
|
||||
descriptors bound to those devices
|
||||
(NOTE: visibility may be restricted in future implementations).
|
||||
Virtual ports instead use separate memory regions,
|
||||
shared only with the kernel.
|
||||
.Pp
|
||||
All references between the shared data structure
|
||||
are relative (offsets or indexes). Some macros help converting
|
||||
them into actual pointers.
|
||||
.Pp
|
||||
The data structures in shared memory are the following:
|
||||
.Bl -tag -width XXX
|
||||
.It Dv struct netmap_if (one per interface)
|
||||
indicates the number of rings supported by an interface, their
|
||||
sizes, and the offsets of the
|
||||
.Pa netmap_rings
|
||||
associated to the interface.
|
||||
The offset of a
|
||||
.Pp
|
||||
.Pa struct netmap_if
|
||||
in the shared memory region is indicated by the
|
||||
is at offset
|
||||
.Pa nr_offset
|
||||
in the shared memory region is indicated by the
|
||||
field in the structure returned by the
|
||||
.Pa NIOCREGIF
|
||||
(see below).
|
||||
.Bd -literal
|
||||
struct netmap_if {
|
||||
char ni_name[IFNAMSIZ]; /* name of the interface. */
|
||||
const u_int ni_num_queues; /* number of hw ring pairs */
|
||||
const ssize_t ring_ofs[]; /* offset of tx and rx rings */
|
||||
char ni_name[IFNAMSIZ]; /* name of the interface. */
|
||||
const u_int ni_version; /* API version */
|
||||
const u_int ni_rx_rings; /* number of rx ring pairs */
|
||||
const u_int ni_tx_rings; /* if 0, same as ni_rx_rings */
|
||||
const ssize_t ring_ofs[]; /* offset of tx and rx rings */
|
||||
};
|
||||
.Ed
|
||||
.It Dv struct netmap_ring (one per ring)
|
||||
contains the index of the current read or write slot (cur),
|
||||
the number of slots available for reception or transmission (avail),
|
||||
Contains the positions in the transmit and receive rings to
|
||||
synchronize the kernel and the application,
|
||||
and an array of
|
||||
.Pa slots
|
||||
describing the buffers.
|
||||
There is one ring pair for each of the N hardware ring pairs
|
||||
supported by the card (numbered 0..N-1), plus
|
||||
one ring pair (numbered N) for packets from/to the host stack.
|
||||
'reserved' is used in receive rings to tell the kernel the
|
||||
number of slots after 'cur' that are still in usr
|
||||
indicates how many slots starting from 'cur'
|
||||
the
|
||||
.Pp
|
||||
Each physical interface has one
|
||||
.Pa netmap_ring
|
||||
for each hardware transmit and receive ring,
|
||||
plus one extra transmit and one receive structure
|
||||
that connect to the host stack.
|
||||
.Bd -literal
|
||||
struct netmap_ring {
|
||||
const ssize_t buf_ofs;
|
||||
const uint32_t num_slots; /* number of slots in the ring. */
|
||||
uint32_t avail; /* number of usable slots */
|
||||
uint32_t cur; /* 'current' index for the user side */
|
||||
uint32_t reserved; /* not refilled before current */
|
||||
const ssize_t buf_ofs; /* see details */
|
||||
const uint32_t num_slots; /* number of slots in the ring */
|
||||
uint32_t avail; /* number of usable slots */
|
||||
uint32_t cur; /* 'current' read/write index */
|
||||
uint32_t reserved; /* not refilled before current */
|
||||
|
||||
const uint16_t nr_buf_size;
|
||||
uint16_t flags;
|
||||
struct netmap_slot slot[0]; /* array of slots. */
|
||||
uint16_t flags;
|
||||
#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */
|
||||
#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */
|
||||
#define NR_RX_TSTMP 0x0008 /* set rx timestamp in slots */
|
||||
struct timeval ts;
|
||||
struct netmap_slot slot[0]; /* array of slots */
|
||||
}
|
||||
.Ed
|
||||
.Pp
|
||||
In transmit rings, after a system call 'cur' indicates
|
||||
the first slot that can be used for transmissions,
|
||||
and 'avail' reports how many of them are available.
|
||||
Before the next netmap-related system call on the file
|
||||
descriptor, the application should fill buffers and
|
||||
slots with data, and update 'cur' and 'avail'
|
||||
accordingly, as shown in the figure below:
|
||||
.Bd -literal
|
||||
|
||||
cur
|
||||
|----- avail ---| (after syscall)
|
||||
v
|
||||
TX [*****aaaaaaaaaaaaaaaaa**]
|
||||
TX [*****TTTTTaaaaaaaaaaaa**]
|
||||
^
|
||||
|-- avail --| (before syscall)
|
||||
cur
|
||||
.Ed
|
||||
|
||||
In receive rings, after a system call 'cur' indicates
|
||||
the first slot that contains a valid packet,
|
||||
and 'avail' reports how many of them are available.
|
||||
Before the next netmap-related system call on the file
|
||||
descriptor, the application can process buffers and
|
||||
release them to the kernel updating
|
||||
'cur' and 'avail' accordingly, as shown in the figure below.
|
||||
Receive rings have an additional field called 'reserved'
|
||||
to indicate how many buffers before 'cur' are still
|
||||
under processing and cannot be released.
|
||||
.Bd -literal
|
||||
cur
|
||||
|-res-|-- avail --| (after syscall)
|
||||
v
|
||||
RX [**rrrrrrRRRRRRRRRRRR******]
|
||||
RX [**...........rrrrRRR******]
|
||||
|res|--|<avail (before syscall)
|
||||
^
|
||||
cur
|
||||
|
||||
.Ed
|
||||
.It Dv struct netmap_slot (one per packet)
|
||||
contains the metadata for a packet: a buffer index (buf_idx),
|
||||
a buffer length (len), and some flags.
|
||||
contains the metadata for a packet:
|
||||
.Bd -literal
|
||||
struct netmap_slot {
|
||||
uint32_t buf_idx; /* buffer index */
|
||||
@ -142,23 +270,94 @@ struct netmap_slot {
|
||||
#define NS_REPORT 0x0002 /* tell hw to report results
|
||||
* e.g. by generating an interrupt
|
||||
*/
|
||||
#define NS_FORWARD 0x0004 /* pass packet to the other endpoint
|
||||
* (host stack or device)
|
||||
*/
|
||||
#define NS_NO_LEARN 0x0008
|
||||
#define NS_INDIRECT 0x0010
|
||||
#define NS_MOREFRAG 0x0020
|
||||
#define NS_PORT_SHIFT 8
|
||||
#define NS_PORT_MASK (0xff << NS_PORT_SHIFT)
|
||||
#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff)
|
||||
uint64_t ptr; /* buffer address (indirect buffers) */
|
||||
};
|
||||
.Ed
|
||||
The flags control how the the buffer associated to the slot
|
||||
should be managed.
|
||||
.It Dv packet buffers
|
||||
are fixed size (approximately 2k) buffers allocated by the kernel
|
||||
are normally fixed size (2 Kbyte) buffers allocated by the kernel
|
||||
that contain packet data. Buffers addresses are computed through
|
||||
macros.
|
||||
.El
|
||||
.Pp
|
||||
.Bl -tag -width XXX
|
||||
Some macros support the access to objects in the shared memory
|
||||
region. In particular:
|
||||
.Bd -literal
|
||||
struct netmap_if *nifp;
|
||||
struct netmap_ring *txring = NETMAP_TXRING(nifp, i);
|
||||
struct netmap_ring *rxring = NETMAP_RXRING(nifp, i);
|
||||
int i = txring->slot[txring->cur].buf_idx;
|
||||
char *buf = NETMAP_BUF(txring, i);
|
||||
.Ed
|
||||
region. In particular,
|
||||
.It NETMAP_TXRING(nifp, i)
|
||||
.It NETMAP_RXRING(nifp, i)
|
||||
return the address of the i-th transmit and receive ring,
|
||||
respectively, whereas
|
||||
.It NETMAP_BUF(ring, buf_idx)
|
||||
returns the address of the buffer with index buf_idx
|
||||
(which can be part of any ring for the given interface).
|
||||
.El
|
||||
.Pp
|
||||
Normally, buffers are associated to slots when interfaces are bound,
|
||||
and one packet is fully contained in a single buffer.
|
||||
Clients can however modify the mapping using the
|
||||
following flags:
|
||||
.Ss FLAGS
|
||||
.Bl -tag -width XXX
|
||||
.It NS_BUF_CHANGED
|
||||
indicates that the buf_idx in the slot has changed.
|
||||
This can be useful if the client wants to implement
|
||||
some form of zero-copy forwarding (e.g. by passing buffers
|
||||
from an input interface to an output interface), or
|
||||
needs to process packets out of order.
|
||||
.Pp
|
||||
The flag MUST be used whenever the buffer index is changed.
|
||||
.It NS_REPORT
|
||||
indicates that we want to be woken up when this buffer
|
||||
has been transmitted. This reduces performance but insures
|
||||
a prompt notification when a buffer has been sent.
|
||||
Normally,
|
||||
.Nm
|
||||
notifies transmit completions in batches, hence signals
|
||||
can be delayed indefinitely. However, we need such notifications
|
||||
before closing a descriptor.
|
||||
.It NS_FORWARD
|
||||
When the device is open in 'transparent' mode,
|
||||
the client can mark slots in receive rings with this flag.
|
||||
For all marked slots, marked packets are forwarded to
|
||||
the other endpoint at the next system call, thus restoring
|
||||
(in a selective way) the connection between the NIC and the
|
||||
host stack.
|
||||
.It NS_NO_LEARN
|
||||
tells the forwarding code that the SRC MAC address for this
|
||||
packet should not be used in the learning bridge
|
||||
.It NS_INDIRECT
|
||||
indicates that the packet's payload is not in the netmap
|
||||
supplied buffer, but in a user-supplied buffer whose
|
||||
user virtual address is in the 'ptr' field of the slot.
|
||||
The size can reach 65535 bytes.
|
||||
.Em This is only supported on the transmit ring of virtual ports
|
||||
.It NS_MOREFRAG
|
||||
indicates that the packet continues with subsequent buffers;
|
||||
the last buffer in a packet must have the flag clear.
|
||||
The maximum length of a chain is 64 buffers.
|
||||
.Em This is only supported on virtual ports
|
||||
.It ns_ctr
|
||||
on receive rings, contains the number of remaining buffers
|
||||
in a packet, including this one.
|
||||
Slots with a value greater than 1 also have NS_MOREFRAG set.
|
||||
The length refers to the individual buffer, there is no
|
||||
field for the total length
|
||||
XXX maybe put it in the ptr field ?
|
||||
.Pp
|
||||
On transmit rings, if NS_DST is set, it is passed to the lookup
|
||||
function, which can use it e.g. as the index of the destination
|
||||
port instead of doing an address lookup.
|
||||
.El
|
||||
.Sh IOCTLS
|
||||
.Nm
|
||||
supports some ioctl() to synchronize the state of the rings
|
||||
@ -166,13 +365,13 @@ between the kernel and the user processes, plus some
|
||||
to query and configure the interface.
|
||||
The former do not require any argument, whereas the latter
|
||||
use a
|
||||
.Pa struct netmap_req
|
||||
.Pa struct nmreq
|
||||
defined as follows:
|
||||
.Bd -literal
|
||||
struct nmreq {
|
||||
char nr_name[IFNAMSIZ];
|
||||
uint32_t nr_version; /* API version */
|
||||
#define NETMAP_API 3 /* current version */
|
||||
#define NETMAP_API 4 /* current version */
|
||||
uint32_t nr_offset; /* nifp offset in the shared region */
|
||||
uint32_t nr_memsize; /* size of the shared region */
|
||||
uint32_t nr_tx_slots; /* slots in tx rings */
|
||||
@ -184,8 +383,14 @@ struct nmreq {
|
||||
#define NETMAP_SW_RING 0x2000 /* we process the sw ring */
|
||||
#define NETMAP_NO_TX_POLL 0x1000 /* no gratuitous txsync on poll */
|
||||
#define NETMAP_RING_MASK 0xfff /* the actual ring number */
|
||||
uint16_t spare1;
|
||||
uint32_t spare2[4];
|
||||
uint16_t nr_cmd;
|
||||
#define NETMAP_BDG_ATTACH 1 /* attach the NIC */
|
||||
#define NETMAP_BDG_DETACH 2 /* detach the NIC */
|
||||
#define NETMAP_BDG_LOOKUP_REG 3 /* register lookup function */
|
||||
#define NETMAP_BDG_LIST 4 /* get bridge's info */
|
||||
uint16_t nr_arg1;
|
||||
uint16_t nr_arg2;
|
||||
uint32_t spare2[3];
|
||||
};
|
||||
|
||||
.Ed
|
||||
@ -200,15 +405,27 @@ command codes below are defined in
|
||||
and are:
|
||||
.Bl -tag -width XXXX
|
||||
.It Dv NIOCGINFO
|
||||
returns information about the interface named in nr_name.
|
||||
On return, nr_memsize indicates the size of the shared netmap
|
||||
memory region (this is device-independent),
|
||||
nr_tx_slots and nr_rx_slots indicates how many buffers are in a
|
||||
transmit and receive ring,
|
||||
nr_tx_rings and nr_rx_rings indicates the number of transmit
|
||||
and receive rings supported by the hardware.
|
||||
returns EINVAL if the named device does not support netmap.
|
||||
Otherwise, it returns 0 and (advisory) information
|
||||
about the interface.
|
||||
Note that all the information below can change before the
|
||||
interface is actually put in netmap mode.
|
||||
.Pp
|
||||
If the device does not support netmap, the ioctl returns EINVAL.
|
||||
.Pa nr_memsize
|
||||
indicates the size of the netmap
|
||||
memory region. Physical devices all share the same memory region,
|
||||
whereas VALE ports may have independent regions for each port.
|
||||
These sizes can be set through system-wise sysctl variables.
|
||||
.Pa nr_tx_slots, nr_rx_slots
|
||||
indicate the size of transmit and receive rings.
|
||||
.Pa nr_tx_rings, nr_rx_rings
|
||||
indicate the number of transmit
|
||||
and receive rings.
|
||||
Both ring number and sizes may be configured at runtime
|
||||
using interface-specific functions (e.g.
|
||||
.Pa sysctl
|
||||
or
|
||||
.Pa ethtool .
|
||||
.It Dv NIOCREGIF
|
||||
puts the interface named in nr_name into netmap mode, disconnecting
|
||||
it from the host stack, and/or defines which rings are controlled
|
||||
@ -243,8 +460,11 @@ or the send queue is full.
|
||||
.Pa NIOCREGIF
|
||||
can be used multiple times to change the association of a
|
||||
file descriptor to a ring pair, always within the same device.
|
||||
.It Dv NIOCUNREGIF
|
||||
brings an interface back to normal mode.
|
||||
.Pp
|
||||
When registering a virtual interface that is dynamically created to a
|
||||
.Xr vale 4
|
||||
switch, we can specify the desired number of rings (1 by default,
|
||||
and currently up to 16) on it using nr_tx_rings and nr_rx_rings fields.
|
||||
.It Dv NIOCTXSYNC
|
||||
tells the hardware of new packets to transmit, and updates the
|
||||
number of slots available for transmission.
|
||||
@ -255,10 +475,20 @@ packets.
|
||||
.Sh SYSTEM CALLS
|
||||
.Nm
|
||||
uses
|
||||
.Nm select
|
||||
.Xr select 2
|
||||
and
|
||||
.Nm poll
|
||||
to wake up processes when significant events occur.
|
||||
.Xr poll 2
|
||||
to wake up processes when significant events occur, and
|
||||
.Xr mmap 2
|
||||
to map memory.
|
||||
.Pp
|
||||
Applications may need to create threads and bind them to
|
||||
specific cores to improve performance, using standard
|
||||
OS primitives, see
|
||||
.Xr pthread 3 .
|
||||
In particular,
|
||||
.Xr pthread_setaffinity_np 3
|
||||
may be of use.
|
||||
.Sh EXAMPLES
|
||||
The following code implements a traffic generator
|
||||
.Pp
|
||||
@ -272,10 +502,10 @@ struct nmreq nmr;
|
||||
fd = open("/dev/netmap", O_RDWR);
|
||||
bzero(&nmr, sizeof(nmr));
|
||||
strcpy(nmr.nr_name, "ix0");
|
||||
nmr.nr_version = NETMAP_API;
|
||||
ioctl(fd, NIOCREG, &nmr);
|
||||
nmr.nm_version = NETMAP_API;
|
||||
ioctl(fd, NIOCREGIF, &nmr);
|
||||
p = mmap(0, nmr.nr_memsize, fd);
|
||||
nifp = NETMAP_IF(p, nmr.offset);
|
||||
nifp = NETMAP_IF(p, nmr.nr_offset);
|
||||
ring = NETMAP_TXRING(nifp, 0);
|
||||
fds.fd = fd;
|
||||
fds.events = POLLOUT;
|
||||
@ -312,13 +542,17 @@ Usenix ATC'12, June 2012, Boston
|
||||
.An -nosplit
|
||||
The
|
||||
.Nm
|
||||
framework has been designed and implemented at the
|
||||
framework has been originally designed and implemented at the
|
||||
Universita` di Pisa in 2011 by
|
||||
.An Luigi Rizzo ,
|
||||
with help from
|
||||
and further extended with help from
|
||||
.An Matteo Landi ,
|
||||
.An Gaetano Catalli ,
|
||||
.An Giuseppe Lettieri .
|
||||
.An Giuseppe Lettieri ,
|
||||
.An Vincenzo Maffione .
|
||||
.Pp
|
||||
.Nm
|
||||
has been funded by the European Commission within FP7 Project CHANGE (257422).
|
||||
and
|
||||
.Nm VALE
|
||||
have been funded by the European Commission within FP7 Projects
|
||||
CHANGE (257422) and OPENLAB (287581).
|
||||
|
@ -1881,6 +1881,8 @@ dev/nand/nfc_if.m optional nand
|
||||
dev/ncv/ncr53c500.c optional ncv
|
||||
dev/ncv/ncr53c500_pccard.c optional ncv pccard
|
||||
dev/netmap/netmap.c optional netmap
|
||||
dev/netmap/netmap_mem2.c optional netmap
|
||||
# compile-with "${NORMAL_C} -Wconversion -Wextra"
|
||||
dev/nge/if_nge.c optional nge
|
||||
dev/nxge/if_nxge.c optional nxge \
|
||||
compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
|
||||
|
@ -3836,8 +3836,7 @@ em_txeof(struct tx_ring *txr)
|
||||
|
||||
EM_TX_LOCK_ASSERT(txr);
|
||||
#ifdef DEV_NETMAP
|
||||
if (netmap_tx_irq(ifp, txr->me |
|
||||
(NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT)))
|
||||
if (netmap_tx_irq(ifp, txr->me))
|
||||
return;
|
||||
#endif /* DEV_NETMAP */
|
||||
|
||||
@ -4101,7 +4100,7 @@ em_setup_receive_ring(struct rx_ring *rxr)
|
||||
sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
|
||||
bzero((void *)rxr->rx_base, rsize);
|
||||
#ifdef DEV_NETMAP
|
||||
slot = netmap_reset(na, NR_RX, 0, 0);
|
||||
slot = netmap_reset(na, NR_RX, rxr->me, 0);
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -4433,8 +4432,10 @@ em_rxeof(struct rx_ring *rxr, int count, int *done)
|
||||
EM_RX_LOCK(rxr);
|
||||
|
||||
#ifdef DEV_NETMAP
|
||||
if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
|
||||
if (netmap_rx_irq(ifp, rxr->me, &processed)) {
|
||||
EM_RX_UNLOCK(rxr);
|
||||
return (FALSE);
|
||||
}
|
||||
#endif /* DEV_NETMAP */
|
||||
|
||||
for (i = rxr->next_to_check, processed = 0; count != 0;) {
|
||||
|
@ -3962,8 +3962,7 @@ igb_txeof(struct tx_ring *txr)
|
||||
mtx_assert(&txr->tx_mtx, MA_OWNED);
|
||||
|
||||
#ifdef DEV_NETMAP
|
||||
if (netmap_tx_irq(ifp, txr->me |
|
||||
(NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)))
|
||||
if (netmap_tx_irq(ifp, txr->me))
|
||||
return (FALSE);
|
||||
#endif /* DEV_NETMAP */
|
||||
|
||||
@ -4829,8 +4828,10 @@ igb_rxeof(struct igb_queue *que, int count, int *done)
|
||||
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
|
||||
|
||||
#ifdef DEV_NETMAP
|
||||
if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
|
||||
if (netmap_rx_irq(ifp, rxr->me, &processed)) {
|
||||
IGB_RX_UNLOCK(rxr);
|
||||
return (FALSE);
|
||||
}
|
||||
#endif /* DEV_NETMAP */
|
||||
|
||||
/* Main clean loop */
|
||||
|
@ -2986,7 +2986,7 @@ lem_txeof(struct adapter *adapter)
|
||||
EM_TX_LOCK_ASSERT(adapter);
|
||||
|
||||
#ifdef DEV_NETMAP
|
||||
if (netmap_tx_irq(ifp, 0 | (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)))
|
||||
if (netmap_tx_irq(ifp, 0))
|
||||
return;
|
||||
#endif /* DEV_NETMAP */
|
||||
if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
|
||||
@ -3455,8 +3455,10 @@ lem_rxeof(struct adapter *adapter, int count, int *done)
|
||||
BUS_DMASYNC_POSTREAD);
|
||||
|
||||
#ifdef DEV_NETMAP
|
||||
if (netmap_rx_irq(ifp, 0 | NETMAP_LOCKED_ENTER, &rx_sent))
|
||||
if (netmap_rx_irq(ifp, 0, &rx_sent)) {
|
||||
EM_RX_UNLOCK(adapter);
|
||||
return (FALSE);
|
||||
}
|
||||
#endif /* DEV_NETMAP */
|
||||
|
||||
if (!((current_desc->status) & E1000_RXD_STAT_DD)) {
|
||||
|
@ -265,6 +265,13 @@
|
||||
#define PICOSECS_PER_TICK 20833
|
||||
#define TSYNC_PORT 319 /* UDP port for the protocol */
|
||||
|
||||
#ifdef NIC_PARAVIRT
|
||||
#define E1000_PARA_SUBDEV 0x1101 /* special id */
|
||||
#define E1000_CSBAL 0x02830 /* csb phys. addr. low */
|
||||
#define E1000_CSBAH 0x02834 /* csb phys. addr. hi */
|
||||
#include <net/paravirt.h>
|
||||
#endif /* NIC_PARAVIRT */
|
||||
|
||||
/*
|
||||
* Bus dma allocation structure used by
|
||||
* e1000_dma_malloc and e1000_dma_free.
|
||||
@ -437,6 +444,26 @@ struct adapter {
|
||||
boolean_t pcix_82544;
|
||||
boolean_t in_detach;
|
||||
|
||||
#ifdef NIC_SEND_COMBINING
|
||||
/* 0 = idle; 1xxxx int-pending; 3xxxx int + d pending + tdt */
|
||||
#define MIT_PENDING_INT 0x10000 /* pending interrupt */
|
||||
#define MIT_PENDING_TDT 0x30000 /* both intr and tdt write are pending */
|
||||
uint32_t shadow_tdt;
|
||||
uint32_t sc_enable;
|
||||
#endif /* NIC_SEND_COMBINING */
|
||||
#ifdef BATCH_DISPATCH
|
||||
uint32_t batch_enable;
|
||||
#endif /* BATCH_DISPATCH */
|
||||
|
||||
#ifdef NIC_PARAVIRT
|
||||
struct em_dma_alloc csb_mem; /* phys address */
|
||||
struct paravirt_csb *csb; /* virtual addr */
|
||||
uint32_t rx_retries; /* optimize rx loop */
|
||||
uint32_t tdt_csb_count;// XXX stat
|
||||
uint32_t tdt_reg_count;// XXX stat
|
||||
uint32_t tdt_int_count;// XXX stat
|
||||
uint32_t guest_need_kick_count;// XXX stat
|
||||
#endif /* NIC_PARAVIRT */
|
||||
|
||||
struct e1000_hw_stats stats;
|
||||
};
|
||||
|
@ -3621,16 +3621,11 @@ ixgbe_txeof(struct tx_ring *txr)
|
||||
* means the user thread should not be woken up);
|
||||
* - the driver ignores tx interrupts unless netmap_mitigate=0
|
||||
* or the slot has the DD bit set.
|
||||
*
|
||||
* When the driver has separate locks, we need to
|
||||
* release and re-acquire txlock to avoid deadlocks.
|
||||
* XXX see if we can find a better way.
|
||||
*/
|
||||
if (!netmap_mitigate ||
|
||||
(kring->nr_kflags < kring->nkr_num_slots &&
|
||||
txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
|
||||
netmap_tx_irq(ifp, txr->me |
|
||||
(NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT));
|
||||
netmap_tx_irq(ifp, txr->me);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -4422,8 +4417,10 @@ ixgbe_rxeof(struct ix_queue *que)
|
||||
|
||||
#ifdef DEV_NETMAP
|
||||
/* Same as the txeof routine: wakeup clients on intr. */
|
||||
if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
|
||||
if (netmap_rx_irq(ifp, rxr->me, &processed)) {
|
||||
IXGBE_RX_UNLOCK(rxr);
|
||||
return (FALSE);
|
||||
}
|
||||
#endif /* DEV_NETMAP */
|
||||
|
||||
for (i = rxr->next_to_check; count != 0;) {
|
||||
|
@ -43,35 +43,6 @@ static void em_netmap_block_tasks(struct adapter *);
|
||||
static void em_netmap_unblock_tasks(struct adapter *);
|
||||
|
||||
|
||||
static void
|
||||
em_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int queueid)
|
||||
{
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
|
||||
ASSERT(queueid < adapter->num_queues);
|
||||
switch (what) {
|
||||
case NETMAP_CORE_LOCK:
|
||||
EM_CORE_LOCK(adapter);
|
||||
break;
|
||||
case NETMAP_CORE_UNLOCK:
|
||||
EM_CORE_UNLOCK(adapter);
|
||||
break;
|
||||
case NETMAP_TX_LOCK:
|
||||
EM_TX_LOCK(&adapter->tx_rings[queueid]);
|
||||
break;
|
||||
case NETMAP_TX_UNLOCK:
|
||||
EM_TX_UNLOCK(&adapter->tx_rings[queueid]);
|
||||
break;
|
||||
case NETMAP_RX_LOCK:
|
||||
EM_RX_LOCK(&adapter->rx_rings[queueid]);
|
||||
break;
|
||||
case NETMAP_RX_UNLOCK:
|
||||
EM_RX_UNLOCK(&adapter->rx_rings[queueid]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// XXX do we need to block/unblock the tasks ?
|
||||
static void
|
||||
em_netmap_block_tasks(struct adapter *adapter)
|
||||
@ -137,7 +108,7 @@ em_netmap_reg(struct ifnet *ifp, int onoff)
|
||||
ifp->if_capenable |= IFCAP_NETMAP;
|
||||
|
||||
na->if_transmit = ifp->if_transmit;
|
||||
ifp->if_transmit = netmap_start;
|
||||
ifp->if_transmit = netmap_transmit;
|
||||
|
||||
em_init_locked(adapter);
|
||||
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
|
||||
@ -160,7 +131,7 @@ em_netmap_reg(struct ifnet *ifp, int onoff)
|
||||
* Reconcile kernel and user view of the transmit ring.
|
||||
*/
|
||||
static int
|
||||
em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
|
||||
{
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
struct tx_ring *txr = &adapter->tx_rings[ring_nr];
|
||||
@ -176,8 +147,6 @@ em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
if (k > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
if (do_lock)
|
||||
EM_TX_LOCK(txr);
|
||||
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD);
|
||||
|
||||
@ -202,8 +171,6 @@ em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
u_int len = slot->len;
|
||||
|
||||
if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
|
||||
if (do_lock)
|
||||
EM_TX_UNLOCK(txr);
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
|
||||
@ -252,8 +219,6 @@ em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
/* update avail to what the kernel knows */
|
||||
ring->avail = kring->nr_hwavail;
|
||||
|
||||
if (do_lock)
|
||||
EM_TX_UNLOCK(txr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -262,7 +227,7 @@ em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
* Reconcile kernel and user view of the receive ring.
|
||||
*/
|
||||
static int
|
||||
em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
|
||||
{
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
|
||||
@ -270,16 +235,13 @@ em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
struct netmap_kring *kring = &na->rx_rings[ring_nr];
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int j, l, n, lim = kring->nkr_num_slots - 1;
|
||||
int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
|
||||
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
|
||||
u_int k = ring->cur, resvd = ring->reserved;
|
||||
|
||||
k = ring->cur;
|
||||
if (k > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
if (do_lock)
|
||||
EM_RX_LOCK(rxr);
|
||||
|
||||
/* XXX check sync modes */
|
||||
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
|
||||
@ -334,8 +296,6 @@ em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
if (addr == netmap_buffer_base) { /* bad buf */
|
||||
if (do_lock)
|
||||
EM_RX_UNLOCK(rxr);
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
|
||||
@ -364,8 +324,6 @@ em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
}
|
||||
/* tell userspace that there are new packets */
|
||||
ring->avail = kring->nr_hwavail - resvd;
|
||||
if (do_lock)
|
||||
EM_RX_UNLOCK(rxr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -378,12 +336,11 @@ em_netmap_attach(struct adapter *adapter)
|
||||
bzero(&na, sizeof(na));
|
||||
|
||||
na.ifp = adapter->ifp;
|
||||
na.separate_locks = 1;
|
||||
na.na_flags = NAF_BDG_MAYSLEEP;
|
||||
na.num_tx_desc = adapter->num_tx_desc;
|
||||
na.num_rx_desc = adapter->num_rx_desc;
|
||||
na.nm_txsync = em_netmap_txsync;
|
||||
na.nm_rxsync = em_netmap_rxsync;
|
||||
na.nm_lock = em_netmap_lock_wrapper;
|
||||
na.nm_register = em_netmap_reg;
|
||||
netmap_attach(&na, adapter->num_queues);
|
||||
}
|
||||
|
@ -38,38 +38,6 @@
|
||||
#include <dev/netmap/netmap_kern.h>
|
||||
|
||||
|
||||
/*
|
||||
* wrapper to export locks to the generic code
|
||||
*/
|
||||
static void
|
||||
igb_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int queueid)
|
||||
{
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
|
||||
ASSERT(queueid < adapter->num_queues);
|
||||
switch (what) {
|
||||
case NETMAP_CORE_LOCK:
|
||||
IGB_CORE_LOCK(adapter);
|
||||
break;
|
||||
case NETMAP_CORE_UNLOCK:
|
||||
IGB_CORE_UNLOCK(adapter);
|
||||
break;
|
||||
case NETMAP_TX_LOCK:
|
||||
IGB_TX_LOCK(&adapter->tx_rings[queueid]);
|
||||
break;
|
||||
case NETMAP_TX_UNLOCK:
|
||||
IGB_TX_UNLOCK(&adapter->tx_rings[queueid]);
|
||||
break;
|
||||
case NETMAP_RX_LOCK:
|
||||
IGB_RX_LOCK(&adapter->rx_rings[queueid]);
|
||||
break;
|
||||
case NETMAP_RX_UNLOCK:
|
||||
IGB_RX_UNLOCK(&adapter->rx_rings[queueid]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* register-unregister routine
|
||||
*/
|
||||
@ -92,7 +60,7 @@ igb_netmap_reg(struct ifnet *ifp, int onoff)
|
||||
ifp->if_capenable |= IFCAP_NETMAP;
|
||||
|
||||
na->if_transmit = ifp->if_transmit;
|
||||
ifp->if_transmit = netmap_start;
|
||||
ifp->if_transmit = netmap_transmit;
|
||||
|
||||
igb_init_locked(adapter);
|
||||
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
|
||||
@ -114,7 +82,7 @@ igb_netmap_reg(struct ifnet *ifp, int onoff)
|
||||
* Reconcile kernel and user view of the transmit ring.
|
||||
*/
|
||||
static int
|
||||
igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
|
||||
{
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
struct tx_ring *txr = &adapter->tx_rings[ring_nr];
|
||||
@ -130,8 +98,6 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
if (k > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
if (do_lock)
|
||||
IGB_TX_LOCK(txr);
|
||||
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD);
|
||||
|
||||
@ -153,6 +119,13 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
/* curr is the current slot in the nic ring */
|
||||
union e1000_adv_tx_desc *curr =
|
||||
(union e1000_adv_tx_desc *)&txr->tx_base[l];
|
||||
#ifndef IGB_MEDIA_RESET
|
||||
/* at the same time as IGB_MEDIA_RESET was defined, the
|
||||
* tx buffer descriptor was renamed, so use this to revert
|
||||
* back to the old name.
|
||||
*/
|
||||
#define igb_tx_buf igb_tx_buffer
|
||||
#endif
|
||||
struct igb_tx_buf *txbuf = &txr->tx_buffers[l];
|
||||
int flags = ((slot->flags & NS_REPORT) ||
|
||||
j == 0 || j == report_frequency) ?
|
||||
@ -162,8 +135,6 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
u_int len = slot->len;
|
||||
|
||||
if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
|
||||
if (do_lock)
|
||||
IGB_TX_UNLOCK(txr);
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
|
||||
@ -223,8 +194,6 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
/* update avail to what the kernel knows */
|
||||
ring->avail = kring->nr_hwavail;
|
||||
|
||||
if (do_lock)
|
||||
IGB_TX_UNLOCK(txr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -233,7 +202,7 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
* Reconcile kernel and user view of the receive ring.
|
||||
*/
|
||||
static int
|
||||
igb_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
igb_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
|
||||
{
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
|
||||
@ -241,16 +210,13 @@ igb_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
struct netmap_kring *kring = &na->rx_rings[ring_nr];
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int j, l, n, lim = kring->nkr_num_slots - 1;
|
||||
int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
|
||||
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
|
||||
u_int k = ring->cur, resvd = ring->reserved;
|
||||
|
||||
k = ring->cur;
|
||||
if (k > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
if (do_lock)
|
||||
IGB_RX_LOCK(rxr);
|
||||
|
||||
/* XXX check sync modes */
|
||||
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
|
||||
@ -303,8 +269,6 @@ igb_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
if (addr == netmap_buffer_base) { /* bad buf */
|
||||
if (do_lock)
|
||||
IGB_RX_UNLOCK(rxr);
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
|
||||
@ -332,8 +296,6 @@ igb_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
}
|
||||
/* tell userspace that there are new packets */
|
||||
ring->avail = kring->nr_hwavail - resvd;
|
||||
if (do_lock)
|
||||
IGB_RX_UNLOCK(rxr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -346,12 +308,11 @@ igb_netmap_attach(struct adapter *adapter)
|
||||
bzero(&na, sizeof(na));
|
||||
|
||||
na.ifp = adapter->ifp;
|
||||
na.separate_locks = 1;
|
||||
na.na_flags = NAF_BDG_MAYSLEEP;
|
||||
na.num_tx_desc = adapter->num_tx_desc;
|
||||
na.num_rx_desc = adapter->num_rx_desc;
|
||||
na.nm_txsync = igb_netmap_txsync;
|
||||
na.nm_rxsync = igb_netmap_rxsync;
|
||||
na.nm_lock = igb_netmap_lock_wrapper;
|
||||
na.nm_register = igb_netmap_reg;
|
||||
netmap_attach(&na, adapter->num_queues);
|
||||
}
|
||||
|
@ -39,35 +39,6 @@
|
||||
#include <dev/netmap/netmap_kern.h>
|
||||
|
||||
|
||||
static void
|
||||
lem_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int ringid)
|
||||
{
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
|
||||
/* only one ring here so ignore the ringid */
|
||||
switch (what) {
|
||||
case NETMAP_CORE_LOCK:
|
||||
EM_CORE_LOCK(adapter);
|
||||
break;
|
||||
case NETMAP_CORE_UNLOCK:
|
||||
EM_CORE_UNLOCK(adapter);
|
||||
break;
|
||||
case NETMAP_TX_LOCK:
|
||||
EM_TX_LOCK(adapter);
|
||||
break;
|
||||
case NETMAP_TX_UNLOCK:
|
||||
EM_TX_UNLOCK(adapter);
|
||||
break;
|
||||
case NETMAP_RX_LOCK:
|
||||
EM_RX_LOCK(adapter);
|
||||
break;
|
||||
case NETMAP_RX_UNLOCK:
|
||||
EM_RX_UNLOCK(adapter);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Register/unregister
|
||||
*/
|
||||
@ -81,6 +52,8 @@ lem_netmap_reg(struct ifnet *ifp, int onoff)
|
||||
if (na == NULL)
|
||||
return EINVAL;
|
||||
|
||||
EM_CORE_LOCK(adapter);
|
||||
|
||||
lem_disable_intr(adapter);
|
||||
|
||||
/* Tell the stack that the interface is no longer active */
|
||||
@ -95,7 +68,7 @@ lem_netmap_reg(struct ifnet *ifp, int onoff)
|
||||
ifp->if_capenable |= IFCAP_NETMAP;
|
||||
|
||||
na->if_transmit = ifp->if_transmit;
|
||||
ifp->if_transmit = netmap_start;
|
||||
ifp->if_transmit = netmap_transmit;
|
||||
|
||||
lem_init_locked(adapter);
|
||||
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
|
||||
@ -114,6 +87,8 @@ lem_netmap_reg(struct ifnet *ifp, int onoff)
|
||||
taskqueue_unblock(adapter->tq); // XXX do we need this ?
|
||||
#endif /* !EM_LEGCY_IRQ */
|
||||
|
||||
EM_CORE_UNLOCK(adapter);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -122,7 +97,7 @@ lem_netmap_reg(struct ifnet *ifp, int onoff)
|
||||
* Reconcile kernel and user view of the transmit ring.
|
||||
*/
|
||||
static int
|
||||
lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
|
||||
{
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
struct netmap_adapter *na = NA(ifp);
|
||||
@ -133,13 +108,16 @@ lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
/* generate an interrupt approximately every half ring */
|
||||
int report_frequency = kring->nkr_num_slots >> 1;
|
||||
|
||||
ND("%s: hwofs %d, hwcur %d hwavail %d lease %d cur %d avail %d",
|
||||
ifp->if_xname,
|
||||
kring->nkr_hwofs, kring->nr_hwcur, kring->nr_hwavail,
|
||||
kring->nkr_hwlease,
|
||||
ring->cur, ring->avail);
|
||||
/* take a copy of ring->cur now, and never read it again */
|
||||
k = ring->cur;
|
||||
if (k > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
if (do_lock)
|
||||
EM_TX_LOCK(adapter);
|
||||
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD);
|
||||
/*
|
||||
@ -147,6 +125,8 @@ lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
* netmap ring, l is the corresponding index in the NIC ring.
|
||||
*/
|
||||
j = kring->nr_hwcur;
|
||||
if (netmap_verbose > 255)
|
||||
RD(5, "device %s send %d->%d", ifp->if_xname, j, k);
|
||||
if (j != k) { /* we have new packets to send */
|
||||
l = netmap_idx_k2n(kring, j);
|
||||
for (n = 0; j != k; n++) {
|
||||
@ -163,13 +143,12 @@ lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
u_int len = slot->len;
|
||||
|
||||
if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
|
||||
if (do_lock)
|
||||
EM_TX_UNLOCK(adapter);
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
ND("slot %d NIC %d %s", j, l, nm_dump_buf(addr, len, 128, NULL));
|
||||
|
||||
slot->flags &= ~NS_REPORT;
|
||||
if (slot->flags & NS_BUF_CHANGED) {
|
||||
if (1 || slot->flags & NS_BUF_CHANGED) {
|
||||
/* buffer has changed, reload map */
|
||||
netmap_reload_map(adapter->txtag, txbuf->map, addr);
|
||||
curr->buffer_addr = htole64(paddr);
|
||||
@ -180,11 +159,13 @@ lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
htole32( adapter->txd_cmd | len |
|
||||
(E1000_TXD_CMD_EOP | flags) );
|
||||
|
||||
ND("len %d kring %d nic %d", len, j, l);
|
||||
bus_dmamap_sync(adapter->txtag, txbuf->map,
|
||||
BUS_DMASYNC_PREWRITE);
|
||||
j = (j == lim) ? 0 : j + 1;
|
||||
l = (l == lim) ? 0 : l + 1;
|
||||
}
|
||||
ND("sent %d packets from %d, TDT now %d", n, kring->nr_hwcur, l);
|
||||
kring->nr_hwcur = k; /* the saved ring->cur */
|
||||
kring->nr_hwavail -= n;
|
||||
|
||||
@ -199,6 +180,7 @@ lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
|
||||
/* record completed transmissions using TDH */
|
||||
l = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
|
||||
ND("tdh is now %d", l);
|
||||
if (l >= kring->nkr_num_slots) { /* XXX can it happen ? */
|
||||
D("bad TDH %d", l);
|
||||
l -= kring->nkr_num_slots;
|
||||
@ -208,6 +190,9 @@ lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
/* some tx completed, increment hwavail. */
|
||||
if (delta < 0)
|
||||
delta += kring->nkr_num_slots;
|
||||
if (netmap_verbose > 255)
|
||||
RD(5, "%s tx recover %d bufs",
|
||||
ifp->if_xname, delta);
|
||||
adapter->next_tx_to_clean = l;
|
||||
kring->nr_hwavail += delta;
|
||||
}
|
||||
@ -215,8 +200,6 @@ lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
/* update avail to what the kernel knows */
|
||||
ring->avail = kring->nr_hwavail;
|
||||
|
||||
if (do_lock)
|
||||
EM_TX_UNLOCK(adapter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -225,21 +208,19 @@ lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
* Reconcile kernel and user view of the receive ring.
|
||||
*/
|
||||
static int
|
||||
lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
|
||||
{
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
struct netmap_adapter *na = NA(ifp);
|
||||
struct netmap_kring *kring = &na->rx_rings[ring_nr];
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
int j, l, n, lim = kring->nkr_num_slots - 1;
|
||||
int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
|
||||
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
|
||||
u_int k = ring->cur, resvd = ring->reserved;
|
||||
|
||||
if (k > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
if (do_lock)
|
||||
EM_RX_LOCK(adapter);
|
||||
|
||||
/* XXX check sync modes */
|
||||
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
|
||||
@ -251,6 +232,10 @@ lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
*/
|
||||
l = adapter->next_rx_desc_to_check;
|
||||
j = netmap_idx_n2k(kring, l);
|
||||
ND("%s: next NIC %d kring %d (ofs %d), hwcur %d hwavail %d cur %d avail %d",
|
||||
ifp->if_xname,
|
||||
l, j, kring->nkr_hwofs, kring->nr_hwcur, kring->nr_hwavail,
|
||||
ring->cur, ring->avail);
|
||||
if (netmap_no_pendintr || force_update) {
|
||||
uint16_t slot_flags = kring->nkr_slot_flags;
|
||||
|
||||
@ -266,6 +251,8 @@ lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
D("bogus pkt size at %d", j);
|
||||
len = 0;
|
||||
}
|
||||
ND("\n%s", nm_dump_buf(NMB(&ring->slot[j]),
|
||||
len, 128, NULL));
|
||||
ring->slot[j].len = len;
|
||||
ring->slot[j].flags = slot_flags;
|
||||
bus_dmamap_sync(adapter->rxtag,
|
||||
@ -300,8 +287,6 @@ lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
if (addr == netmap_buffer_base) { /* bad buf */
|
||||
if (do_lock)
|
||||
EM_RX_UNLOCK(adapter);
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
|
||||
@ -332,8 +317,6 @@ lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
}
|
||||
/* tell userspace that there are new packets */
|
||||
ring->avail = kring->nr_hwavail - resvd;
|
||||
if (do_lock)
|
||||
EM_RX_UNLOCK(adapter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -346,12 +329,11 @@ lem_netmap_attach(struct adapter *adapter)
|
||||
bzero(&na, sizeof(na));
|
||||
|
||||
na.ifp = adapter->ifp;
|
||||
na.separate_locks = 1;
|
||||
na.na_flags = NAF_BDG_MAYSLEEP;
|
||||
na.num_tx_desc = adapter->num_tx_desc;
|
||||
na.num_rx_desc = adapter->num_rx_desc;
|
||||
na.nm_txsync = lem_netmap_txsync;
|
||||
na.nm_rxsync = lem_netmap_rxsync;
|
||||
na.nm_lock = lem_netmap_lock_wrapper;
|
||||
na.nm_register = lem_netmap_reg;
|
||||
netmap_attach(&na, 1);
|
||||
}
|
||||
|
@ -38,33 +38,6 @@
|
||||
#include <dev/netmap/netmap_kern.h>
|
||||
|
||||
|
||||
/*
|
||||
* wrapper to export locks to the generic code
|
||||
* We should not use the tx/rx locks
|
||||
*/
|
||||
static void
|
||||
re_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int queueid)
|
||||
{
|
||||
struct rl_softc *adapter = ifp->if_softc;
|
||||
|
||||
switch (what) {
|
||||
case NETMAP_CORE_LOCK:
|
||||
RL_LOCK(adapter);
|
||||
break;
|
||||
case NETMAP_CORE_UNLOCK:
|
||||
RL_UNLOCK(adapter);
|
||||
break;
|
||||
|
||||
case NETMAP_TX_LOCK:
|
||||
case NETMAP_RX_LOCK:
|
||||
case NETMAP_TX_UNLOCK:
|
||||
case NETMAP_RX_UNLOCK:
|
||||
D("invalid lock call %d, no tx/rx locks here", what);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* support for netmap register/unregisted. We are already under core lock.
|
||||
* only called on the first register or the last unregister.
|
||||
@ -88,7 +61,7 @@ re_netmap_reg(struct ifnet *ifp, int onoff)
|
||||
|
||||
/* save if_transmit to restore it later */
|
||||
na->if_transmit = ifp->if_transmit;
|
||||
ifp->if_transmit = netmap_start;
|
||||
ifp->if_transmit = netmap_transmit;
|
||||
|
||||
re_init_locked(adapter);
|
||||
|
||||
@ -111,7 +84,7 @@ re_netmap_reg(struct ifnet *ifp, int onoff)
|
||||
* Reconcile kernel and user view of the transmit ring.
|
||||
*/
|
||||
static int
|
||||
re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
|
||||
{
|
||||
struct rl_softc *sc = ifp->if_softc;
|
||||
struct rl_txdesc *txd = sc->rl_ldata.rl_tx_desc;
|
||||
@ -124,9 +97,6 @@ re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
if (k > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
if (do_lock)
|
||||
RL_LOCK(sc);
|
||||
|
||||
/* Sync the TX descriptor list */
|
||||
bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
|
||||
sc->rl_ldata.rl_tx_list_map,
|
||||
@ -164,8 +134,6 @@ re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
int len = slot->len;
|
||||
|
||||
if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
|
||||
if (do_lock)
|
||||
RL_UNLOCK(sc);
|
||||
// XXX what about prodidx ?
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
@ -200,8 +168,6 @@ re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
/* start ? */
|
||||
CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
|
||||
}
|
||||
if (do_lock)
|
||||
RL_UNLOCK(sc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -210,7 +176,7 @@ re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
* Reconcile kernel and user view of the receive ring.
|
||||
*/
|
||||
static int
|
||||
re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
|
||||
{
|
||||
struct rl_softc *sc = ifp->if_softc;
|
||||
struct rl_rxdesc *rxd = sc->rl_ldata.rl_rx_desc;
|
||||
@ -218,15 +184,13 @@ re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
struct netmap_kring *kring = &na->rx_rings[ring_nr];
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
int j, l, n, lim = kring->nkr_num_slots - 1;
|
||||
int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
|
||||
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
|
||||
u_int k = ring->cur, resvd = ring->reserved;
|
||||
|
||||
k = ring->cur;
|
||||
if (k > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
if (do_lock)
|
||||
RL_LOCK(sc);
|
||||
/* XXX check sync modes */
|
||||
bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
|
||||
sc->rl_ldata.rl_rx_list_map,
|
||||
@ -291,8 +255,6 @@ re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
if (addr == netmap_buffer_base) { /* bad buf */
|
||||
if (do_lock)
|
||||
RL_UNLOCK(sc);
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
|
||||
@ -323,8 +285,6 @@ re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
}
|
||||
/* tell userspace that there are new packets */
|
||||
ring->avail = kring->nr_hwavail - resvd;
|
||||
if (do_lock)
|
||||
RL_UNLOCK(sc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -411,12 +371,11 @@ re_netmap_attach(struct rl_softc *sc)
|
||||
bzero(&na, sizeof(na));
|
||||
|
||||
na.ifp = sc->rl_ifp;
|
||||
na.separate_locks = 0;
|
||||
na.na_flags = NAF_BDG_MAYSLEEP;
|
||||
na.num_tx_desc = sc->rl_ldata.rl_tx_desc_cnt;
|
||||
na.num_rx_desc = sc->rl_ldata.rl_rx_desc_cnt;
|
||||
na.nm_txsync = re_netmap_txsync;
|
||||
na.nm_rxsync = re_netmap_rxsync;
|
||||
na.nm_lock = re_netmap_lock_wrapper;
|
||||
na.nm_register = re_netmap_reg;
|
||||
netmap_attach(&na, 1);
|
||||
}
|
||||
|
@ -72,37 +72,6 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss,
|
||||
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs,
|
||||
CTLFLAG_RW, &ix_rx_miss_bufs, 0, "potentially missed rx intr bufs");
|
||||
|
||||
/*
|
||||
* wrapper to export locks to the generic netmap code.
|
||||
*/
|
||||
static void
|
||||
ixgbe_netmap_lock_wrapper(struct ifnet *_a, int what, u_int queueid)
|
||||
{
|
||||
struct adapter *adapter = _a->if_softc;
|
||||
|
||||
ASSERT(queueid < adapter->num_queues);
|
||||
switch (what) {
|
||||
case NETMAP_CORE_LOCK:
|
||||
IXGBE_CORE_LOCK(adapter);
|
||||
break;
|
||||
case NETMAP_CORE_UNLOCK:
|
||||
IXGBE_CORE_UNLOCK(adapter);
|
||||
break;
|
||||
case NETMAP_TX_LOCK:
|
||||
IXGBE_TX_LOCK(&adapter->tx_rings[queueid]);
|
||||
break;
|
||||
case NETMAP_TX_UNLOCK:
|
||||
IXGBE_TX_UNLOCK(&adapter->tx_rings[queueid]);
|
||||
break;
|
||||
case NETMAP_RX_LOCK:
|
||||
IXGBE_RX_LOCK(&adapter->rx_rings[queueid]);
|
||||
break;
|
||||
case NETMAP_RX_UNLOCK:
|
||||
IXGBE_RX_UNLOCK(&adapter->rx_rings[queueid]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
set_crcstrip(struct ixgbe_hw *hw, int onoff)
|
||||
@ -155,6 +124,7 @@ ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
|
||||
if (na == NULL)
|
||||
return EINVAL; /* no netmap support here */
|
||||
|
||||
IXGBE_CORE_LOCK(adapter);
|
||||
ixgbe_disable_intr(adapter);
|
||||
|
||||
/* Tell the stack that the interface is no longer active */
|
||||
@ -166,7 +136,7 @@ ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
|
||||
|
||||
/* save if_transmit and replace with our routine */
|
||||
na->if_transmit = ifp->if_transmit;
|
||||
ifp->if_transmit = netmap_start;
|
||||
ifp->if_transmit = netmap_transmit;
|
||||
|
||||
/*
|
||||
* reinitialize the adapter, now with netmap flag set,
|
||||
@ -186,6 +156,7 @@ ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
|
||||
ixgbe_init_locked(adapter); /* also enables intr */
|
||||
}
|
||||
set_crcstrip(&adapter->hw, onoff);
|
||||
IXGBE_CORE_UNLOCK(adapter);
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -213,12 +184,11 @@ ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
|
||||
*
|
||||
* ring->avail is never used, only checked for bogus values.
|
||||
*
|
||||
* do_lock is set iff the function is called from the ioctl handler.
|
||||
* In this case, grab a lock around the body, and also reclaim transmitted
|
||||
* I flags & FORCE_RECLAIM, reclaim transmitted
|
||||
* buffers irrespective of interrupt mitigation.
|
||||
*/
|
||||
static int
|
||||
ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
|
||||
{
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
struct tx_ring *txr = &adapter->tx_rings[ring_nr];
|
||||
@ -237,8 +207,6 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
|
||||
if (k > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
if (do_lock)
|
||||
IXGBE_TX_LOCK(txr);
|
||||
|
||||
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD);
|
||||
@ -303,8 +271,6 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
*/
|
||||
if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
|
||||
ring_reset:
|
||||
if (do_lock)
|
||||
IXGBE_TX_UNLOCK(txr);
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
|
||||
@ -347,7 +313,7 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
* In all cases kring->nr_kflags indicates which slot will be
|
||||
* checked upon a tx interrupt (nkr_num_slots means none).
|
||||
*/
|
||||
if (do_lock) {
|
||||
if (flags & NAF_FORCE_RECLAIM) {
|
||||
j = 1; /* forced reclaim, ignore interrupts */
|
||||
kring->nr_kflags = kring->nkr_num_slots;
|
||||
} else if (kring->nr_hwavail > 0) {
|
||||
@ -422,8 +388,6 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
/* update avail to what the kernel knows */
|
||||
ring->avail = kring->nr_hwavail;
|
||||
|
||||
if (do_lock)
|
||||
IXGBE_TX_UNLOCK(txr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -442,10 +406,11 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
* from nr_hwavail, make the descriptors available for the next reads,
|
||||
* and set kring->nr_hwcur = ring->cur and ring->avail = kring->nr_hwavail.
|
||||
*
|
||||
* do_lock has a special meaning: please refer to txsync.
|
||||
* If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
|
||||
* of whether or not we received an interrupt.
|
||||
*/
|
||||
static int
|
||||
ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
|
||||
{
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
|
||||
@ -453,14 +418,12 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
struct netmap_kring *kring = &na->rx_rings[ring_nr];
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int j, l, n, lim = kring->nkr_num_slots - 1;
|
||||
int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
|
||||
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
|
||||
u_int k = ring->cur, resvd = ring->reserved;
|
||||
|
||||
if (k > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
if (do_lock)
|
||||
IXGBE_RX_LOCK(rxr);
|
||||
/* XXX check sync modes */
|
||||
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
|
||||
@ -571,13 +534,9 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
|
||||
/* tell userspace that there are new packets */
|
||||
ring->avail = kring->nr_hwavail - resvd;
|
||||
|
||||
if (do_lock)
|
||||
IXGBE_RX_UNLOCK(rxr);
|
||||
return 0;
|
||||
|
||||
ring_reset:
|
||||
if (do_lock)
|
||||
IXGBE_RX_UNLOCK(rxr);
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
|
||||
@ -597,12 +556,11 @@ ixgbe_netmap_attach(struct adapter *adapter)
|
||||
bzero(&na, sizeof(na));
|
||||
|
||||
na.ifp = adapter->ifp;
|
||||
na.separate_locks = 1; /* this card has separate rx/tx locks */
|
||||
na.na_flags = NAF_BDG_MAYSLEEP;
|
||||
na.num_tx_desc = adapter->num_tx_desc;
|
||||
na.num_rx_desc = adapter->num_rx_desc;
|
||||
na.nm_txsync = ixgbe_netmap_txsync;
|
||||
na.nm_rxsync = ixgbe_netmap_rxsync;
|
||||
na.nm_lock = ixgbe_netmap_lock_wrapper;
|
||||
na.nm_register = ixgbe_netmap_reg;
|
||||
netmap_attach(&na, adapter->num_queues);
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -35,26 +35,28 @@
|
||||
|
||||
#if defined(__FreeBSD__)
|
||||
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#define likely(x) __builtin_expect((long)!!(x), 1L)
|
||||
#define unlikely(x) __builtin_expect((long)!!(x), 0L)
|
||||
|
||||
#define NM_LOCK_T struct mtx
|
||||
#define NM_RWLOCK_T struct rwlock
|
||||
#define NM_SELINFO_T struct selinfo
|
||||
#define MBUF_LEN(m) ((m)->m_pkthdr.len)
|
||||
#define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m)
|
||||
|
||||
#define NM_ATOMIC_T volatile int
|
||||
|
||||
#elif defined (linux)
|
||||
|
||||
#define NM_LOCK_T safe_spinlock_t // see bsd_glue.h
|
||||
#define NM_RWLOCK_T safe_spinlock_t // see bsd_glue.h
|
||||
#define NM_SELINFO_T wait_queue_head_t
|
||||
#define MBUF_LEN(m) ((m)->len)
|
||||
#define NM_SEND_UP(ifp, m) netif_rx(m)
|
||||
|
||||
#define NM_ATOMIC_T volatile long unsigned int
|
||||
|
||||
#ifndef DEV_NETMAP
|
||||
#define DEV_NETMAP
|
||||
#endif
|
||||
#endif /* DEV_NETMAP */
|
||||
|
||||
/*
|
||||
* IFCAP_NETMAP goes into net_device's priv_flags (if_capenable).
|
||||
@ -111,6 +113,8 @@ struct nm_bdg_fwd;
|
||||
struct nm_bridge;
|
||||
struct netmap_priv_d;
|
||||
|
||||
const char *nm_dump_buf(char *p, int len, int lim, char *dst);
|
||||
|
||||
/*
|
||||
* private, kernel view of a ring. Keeps track of the status of
|
||||
* a ring across system calls.
|
||||
@ -128,26 +132,120 @@ struct netmap_priv_d;
|
||||
* the next empty buffer as known by the hardware (next_to_check or so).
|
||||
* TX rings: hwcur + hwofs coincides with next_to_send
|
||||
*
|
||||
* Clients cannot issue concurrent syscall on a ring. The system
|
||||
* detects this and reports an error using two flags,
|
||||
* NKR_WBUSY and NKR_RBUSY
|
||||
* For received packets, slot->flags is set to nkr_slot_flags
|
||||
* so we can provide a proper initial value (e.g. set NS_FORWARD
|
||||
* when operating in 'transparent' mode).
|
||||
*
|
||||
* The following fields are used to implement lock-free copy of packets
|
||||
* from input to output ports in VALE switch:
|
||||
* nkr_hwlease buffer after the last one being copied.
|
||||
* A writer in nm_bdg_flush reserves N buffers
|
||||
* from nr_hwlease, advances it, then does the
|
||||
* copy outside the lock.
|
||||
* In RX rings (used for VALE ports),
|
||||
* nkr_hwcur + nkr_hwavail <= nkr_hwlease < nkr_hwcur+N-1
|
||||
* In TX rings (used for NIC or host stack ports)
|
||||
* nkr_hwcur <= nkr_hwlease < nkr_hwcur+ nkr_hwavail
|
||||
* nkr_leases array of nkr_num_slots where writers can report
|
||||
* completion of their block. NR_NOSLOT (~0) indicates
|
||||
* that the writer has not finished yet
|
||||
* nkr_lease_idx index of next free slot in nr_leases, to be assigned
|
||||
*
|
||||
* The kring is manipulated by txsync/rxsync and generic netmap function.
|
||||
* q_lock is used to arbitrate access to the kring from within the netmap
|
||||
* code, and this and other protections guarantee that there is never
|
||||
* more than 1 concurrent call to txsync or rxsync. So we are free
|
||||
* to manipulate the kring from within txsync/rxsync without any extra
|
||||
* locks.
|
||||
*/
|
||||
struct netmap_kring {
|
||||
struct netmap_ring *ring;
|
||||
u_int nr_hwcur;
|
||||
int nr_hwavail;
|
||||
u_int nr_kflags; /* private driver flags */
|
||||
uint32_t nr_hwcur;
|
||||
uint32_t nr_hwavail;
|
||||
uint32_t nr_kflags; /* private driver flags */
|
||||
#define NKR_PENDINTR 0x1 // Pending interrupt.
|
||||
u_int nkr_num_slots;
|
||||
uint32_t nkr_num_slots;
|
||||
int32_t nkr_hwofs; /* offset between NIC and netmap ring */
|
||||
|
||||
uint16_t nkr_slot_flags; /* initial value for flags */
|
||||
int nkr_hwofs; /* offset between NIC and netmap ring */
|
||||
struct netmap_adapter *na;
|
||||
struct nm_bdg_fwd *nkr_ft;
|
||||
uint32_t *nkr_leases;
|
||||
#define NR_NOSLOT ((uint32_t)~0)
|
||||
uint32_t nkr_hwlease;
|
||||
uint32_t nkr_lease_idx;
|
||||
|
||||
NM_SELINFO_T si; /* poll/select wait queue */
|
||||
NM_LOCK_T q_lock; /* used if no device lock available */
|
||||
NM_LOCK_T q_lock; /* protects kring and ring. */
|
||||
NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */
|
||||
|
||||
volatile int nkr_stopped;
|
||||
} __attribute__((__aligned__(64)));
|
||||
|
||||
|
||||
/* return the next index, with wraparound */
|
||||
static inline uint32_t
|
||||
nm_next(uint32_t i, uint32_t lim)
|
||||
{
|
||||
return unlikely (i == lim) ? 0 : i + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
* Here is the layout for the Rx and Tx rings.
|
||||
|
||||
RxRING TxRING
|
||||
|
||||
+-----------------+ +-----------------+
|
||||
| | | |
|
||||
|XXX free slot XXX| |XXX free slot XXX|
|
||||
+-----------------+ +-----------------+
|
||||
| |<-hwcur | |<-hwcur
|
||||
| reserved h | | (ready |
|
||||
+----------- w -+ | to be |
|
||||
cur->| a | | sent) h |
|
||||
| v | +---------- w |
|
||||
| a | cur->| (being a |
|
||||
| i | | prepared) v |
|
||||
| avail l | | a |
|
||||
+-----------------+ + a ------ i +
|
||||
| | ... | v l |<-hwlease
|
||||
| (being | ... | a | ...
|
||||
| prepared) | ... | i | ...
|
||||
+-----------------+ ... | l | ...
|
||||
| |<-hwlease +-----------------+
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
+-----------------+ +-----------------+
|
||||
|
||||
* The cur/avail (user view) and hwcur/hwavail (kernel view)
|
||||
* are used in the normal operation of the card.
|
||||
*
|
||||
* When a ring is the output of a switch port (Rx ring for
|
||||
* a VALE port, Tx ring for the host stack or NIC), slots
|
||||
* are reserved in blocks through 'hwlease' which points
|
||||
* to the next unused slot.
|
||||
* On an Rx ring, hwlease is always after hwavail,
|
||||
* and completions cause avail to advance.
|
||||
* On a Tx ring, hwlease is always between cur and hwavail,
|
||||
* and completions cause cur to advance.
|
||||
*
|
||||
* nm_kr_space() returns the maximum number of slots that
|
||||
* can be assigned.
|
||||
* nm_kr_lease() reserves the required number of buffers,
|
||||
* advances nkr_hwlease and also returns an entry in
|
||||
* a circular array where completions should be reported.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* This struct extends the 'struct adapter' (or
|
||||
* equivalent) device descriptor. It contains all fields needed to
|
||||
@ -167,6 +265,13 @@ struct netmap_adapter {
|
||||
* useful during initialization
|
||||
*/
|
||||
#define NAF_SW_ONLY 2 /* forward packets only to sw adapter */
|
||||
#define NAF_BDG_MAYSLEEP 4 /* the bridge is allowed to sleep when
|
||||
* forwarding packets coming from this
|
||||
* interface
|
||||
*/
|
||||
#define NAF_MEM_OWNER 8 /* the adapter is responsible for the
|
||||
* deallocation of the memory allocator
|
||||
*/
|
||||
int refcount; /* number of user-space descriptors using this
|
||||
interface, which is equal to the number of
|
||||
struct netmap_if objs in the mapped region. */
|
||||
@ -179,9 +284,6 @@ struct netmap_adapter {
|
||||
int na_single; /* threads attached to a single hw queue */
|
||||
int na_multi; /* threads attached to multiple hw queues */
|
||||
|
||||
int separate_locks; /* set if the interface suports different
|
||||
locks for rx, tx and core. */
|
||||
|
||||
u_int num_rx_rings; /* number of adapter receive rings */
|
||||
u_int num_tx_rings; /* number of adapter transmit rings */
|
||||
|
||||
@ -210,9 +312,11 @@ struct netmap_adapter {
|
||||
NM_LOCK_T core_lock; /* used if no device lock available */
|
||||
|
||||
int (*nm_register)(struct ifnet *, int onoff);
|
||||
void (*nm_lock)(struct ifnet *, int what, u_int ringid);
|
||||
int (*nm_txsync)(struct ifnet *, u_int ring, int lock);
|
||||
int (*nm_rxsync)(struct ifnet *, u_int ring, int lock);
|
||||
|
||||
int (*nm_txsync)(struct ifnet *, u_int ring, int flags);
|
||||
int (*nm_rxsync)(struct ifnet *, u_int ring, int flags);
|
||||
#define NAF_FORCE_READ 1
|
||||
#define NAF_FORCE_RECLAIM 2
|
||||
/* return configuration information */
|
||||
int (*nm_config)(struct ifnet *, u_int *txr, u_int *txd,
|
||||
u_int *rxr, u_int *rxd);
|
||||
@ -236,12 +340,105 @@ struct netmap_adapter {
|
||||
* This is only done when physical interfaces are attached to a bridge.
|
||||
*/
|
||||
struct netmap_priv_d *na_kpriv;
|
||||
|
||||
/* memory allocator */
|
||||
struct netmap_mem_d *nm_mem;
|
||||
#ifdef linux
|
||||
struct net_device_ops nm_ndo;
|
||||
#endif /* linux */
|
||||
};
|
||||
|
||||
/*
|
||||
* Available space in the ring.
|
||||
*/
|
||||
static inline uint32_t
|
||||
nm_kr_space(struct netmap_kring *k, int is_rx)
|
||||
{
|
||||
int space;
|
||||
|
||||
if (is_rx) {
|
||||
int busy = k->nkr_hwlease - k->nr_hwcur;
|
||||
if (busy < 0)
|
||||
busy += k->nkr_num_slots;
|
||||
space = k->nkr_num_slots - 1 - busy;
|
||||
} else {
|
||||
space = k->nr_hwcur + k->nr_hwavail - k->nkr_hwlease;
|
||||
if (space < 0)
|
||||
space += k->nkr_num_slots;
|
||||
}
|
||||
#if 0
|
||||
// sanity check
|
||||
if (k->nkr_hwlease >= k->nkr_num_slots ||
|
||||
k->nr_hwcur >= k->nkr_num_slots ||
|
||||
k->nr_hwavail >= k->nkr_num_slots ||
|
||||
busy < 0 ||
|
||||
busy >= k->nkr_num_slots) {
|
||||
D("invalid kring, cur %d avail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease,
|
||||
k->nkr_lease_idx, k->nkr_num_slots);
|
||||
}
|
||||
#endif
|
||||
return space;
|
||||
}
|
||||
|
||||
|
||||
/* return update position */
|
||||
static inline uint32_t
|
||||
nm_kr_rxpos(struct netmap_kring *k)
|
||||
{
|
||||
uint32_t pos = k->nr_hwcur + k->nr_hwavail;
|
||||
if (pos >= k->nkr_num_slots)
|
||||
pos -= k->nkr_num_slots;
|
||||
#if 0
|
||||
if (pos >= k->nkr_num_slots ||
|
||||
k->nkr_hwlease >= k->nkr_num_slots ||
|
||||
k->nr_hwcur >= k->nkr_num_slots ||
|
||||
k->nr_hwavail >= k->nkr_num_slots ||
|
||||
k->nkr_lease_idx >= k->nkr_num_slots) {
|
||||
D("invalid kring, cur %d avail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease,
|
||||
k->nkr_lease_idx, k->nkr_num_slots);
|
||||
}
|
||||
#endif
|
||||
return pos;
|
||||
}
|
||||
|
||||
|
||||
/* make a lease on the kring for N positions. return the
|
||||
* lease index
|
||||
*/
|
||||
static inline uint32_t
|
||||
nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
|
||||
{
|
||||
uint32_t lim = k->nkr_num_slots - 1;
|
||||
uint32_t lease_idx = k->nkr_lease_idx;
|
||||
|
||||
k->nkr_leases[lease_idx] = NR_NOSLOT;
|
||||
k->nkr_lease_idx = nm_next(lease_idx, lim);
|
||||
|
||||
if (n > nm_kr_space(k, is_rx)) {
|
||||
D("invalid request for %d slots", n);
|
||||
panic("x");
|
||||
}
|
||||
/* XXX verify that there are n slots */
|
||||
k->nkr_hwlease += n;
|
||||
if (k->nkr_hwlease > lim)
|
||||
k->nkr_hwlease -= lim + 1;
|
||||
|
||||
if (k->nkr_hwlease >= k->nkr_num_slots ||
|
||||
k->nr_hwcur >= k->nkr_num_slots ||
|
||||
k->nr_hwavail >= k->nkr_num_slots ||
|
||||
k->nkr_lease_idx >= k->nkr_num_slots) {
|
||||
D("invalid kring %s, cur %d avail %d lease %d lease_idx %d lim %d",
|
||||
k->na->ifp->if_xname,
|
||||
k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease,
|
||||
k->nkr_lease_idx, k->nkr_num_slots);
|
||||
}
|
||||
return lease_idx;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* XXX NETMAP_DELETING() is unused
|
||||
*
|
||||
* The combination of "enable" (ifp->if_capenable & IFCAP_NETMAP)
|
||||
* and refcount gives the status of the interface, namely:
|
||||
*
|
||||
@ -256,25 +453,6 @@ struct netmap_adapter {
|
||||
#define NETMAP_DELETING(_na) ( ((_na)->refcount == 0) && \
|
||||
( (_na)->ifp->if_capenable & IFCAP_NETMAP) )
|
||||
|
||||
/*
|
||||
* parameters for (*nm_lock)(adapter, what, index)
|
||||
*/
|
||||
enum {
|
||||
NETMAP_NO_LOCK = 0,
|
||||
NETMAP_CORE_LOCK, NETMAP_CORE_UNLOCK,
|
||||
NETMAP_TX_LOCK, NETMAP_TX_UNLOCK,
|
||||
NETMAP_RX_LOCK, NETMAP_RX_UNLOCK,
|
||||
#ifdef __FreeBSD__
|
||||
#define NETMAP_REG_LOCK NETMAP_CORE_LOCK
|
||||
#define NETMAP_REG_UNLOCK NETMAP_CORE_UNLOCK
|
||||
#else
|
||||
NETMAP_REG_LOCK, NETMAP_REG_UNLOCK
|
||||
#endif
|
||||
};
|
||||
|
||||
/* How to handle locking support in netmap_rx_irq/netmap_tx_irq */
|
||||
#define NETMAP_LOCKED_ENTER 0x10000000 /* already locked on enter */
|
||||
#define NETMAP_LOCKED_EXIT 0x20000000 /* keep locked on exit */
|
||||
|
||||
/*
|
||||
* The following are support routines used by individual drivers to
|
||||
@ -285,7 +463,7 @@ enum {
|
||||
*
|
||||
* netmap_detach() frees the memory allocated by netmap_attach().
|
||||
*
|
||||
* netmap_start() replaces the if_transmit routine of the interface,
|
||||
* netmap_transmit() replaces the if_transmit routine of the interface,
|
||||
* and is used to intercept packets coming from the stack.
|
||||
*
|
||||
* netmap_load_map/netmap_reload_map are helper routines to set/reset
|
||||
@ -294,14 +472,16 @@ enum {
|
||||
* netmap_reset() is a helper routine to be called in the driver
|
||||
* when reinitializing a ring.
|
||||
*/
|
||||
int netmap_attach(struct netmap_adapter *, int);
|
||||
int netmap_attach(struct netmap_adapter *, u_int);
|
||||
void netmap_detach(struct ifnet *);
|
||||
int netmap_start(struct ifnet *, struct mbuf *);
|
||||
int netmap_transmit(struct ifnet *, struct mbuf *);
|
||||
enum txrx { NR_RX = 0, NR_TX = 1 };
|
||||
struct netmap_slot *netmap_reset(struct netmap_adapter *na,
|
||||
enum txrx tx, int n, u_int new_cur);
|
||||
enum txrx tx, u_int n, u_int new_cur);
|
||||
int netmap_ring_reinit(struct netmap_kring *);
|
||||
|
||||
u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
|
||||
|
||||
/*
|
||||
* The following bridge-related interfaces are used by other kernel modules
|
||||
* In the version that only supports unicast or broadcast, the lookup
|
||||
@ -451,6 +631,7 @@ netmap_reload_map(bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
|
||||
|
||||
#endif /* linux */
|
||||
|
||||
|
||||
/*
|
||||
* functions to map NIC to KRING indexes (n2k) and vice versa (k2n)
|
||||
*/
|
||||
@ -515,7 +696,15 @@ PNMB(struct netmap_slot *slot, uint64_t *pp)
|
||||
}
|
||||
|
||||
/* default functions to handle rx/tx interrupts */
|
||||
int netmap_rx_irq(struct ifnet *, int, int *);
|
||||
int netmap_rx_irq(struct ifnet *, u_int, u_int *);
|
||||
#define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL)
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
MALLOC_DECLARE(M_NETMAP);
|
||||
#endif /* __FreeBSD__ */
|
||||
|
||||
|
||||
void netmap_disable_all_rings(struct ifnet *);
|
||||
void netmap_enable_all_rings(struct ifnet *);
|
||||
|
||||
#endif /* _NET_NETMAP_KERN_H_ */
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2134,8 +2134,7 @@ re_rxeof(struct rl_softc *sc, int *rx_npktsp)
|
||||
|
||||
ifp = sc->rl_ifp;
|
||||
#ifdef DEV_NETMAP
|
||||
if (netmap_rx_irq(ifp, 0 | (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT),
|
||||
&rx_npkts))
|
||||
if (netmap_rx_irq(ifp, 0, &rx_npkts))
|
||||
return 0;
|
||||
#endif /* DEV_NETMAP */
|
||||
if (ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0)
|
||||
@ -2380,7 +2379,7 @@ re_txeof(struct rl_softc *sc)
|
||||
|
||||
ifp = sc->rl_ifp;
|
||||
#ifdef DEV_NETMAP
|
||||
if (netmap_tx_irq(ifp, 0 | (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)))
|
||||
if (netmap_tx_irq(ifp, 0))
|
||||
return;
|
||||
#endif /* DEV_NETMAP */
|
||||
/* Invalidate the TX descriptor list */
|
||||
|
265
sys/net/netmap.h
265
sys/net/netmap.h
@ -38,6 +38,8 @@
|
||||
* Detailed info on netmap is available with "man netmap" or at
|
||||
*
|
||||
* http://info.iet.unipi.it/~luigi/netmap/
|
||||
*
|
||||
* This API is also used to communicate with the VALE software switch
|
||||
*/
|
||||
|
||||
#ifndef _NET_NETMAP_H_
|
||||
@ -46,106 +48,95 @@
|
||||
/*
|
||||
* --- Netmap data structures ---
|
||||
*
|
||||
* The data structures used by netmap are shown below. Those in
|
||||
* capital letters are in an mmapp()ed area shared with userspace,
|
||||
* while others are private to the kernel.
|
||||
* Shared structures do not contain pointers but only memory
|
||||
* offsets, so that addressing is portable between kernel and userspace.
|
||||
* The userspace data structures used by netmap are shown below.
|
||||
* They are allocated by the kernel and mmap()ed by userspace threads.
|
||||
* Pointers are implemented as memory offsets or indexes,
|
||||
* so that they can be easily dereferenced in kernel and userspace.
|
||||
|
||||
KERNEL (opaque, obviously)
|
||||
|
||||
softc
|
||||
+----------------+
|
||||
| standard fields|
|
||||
| if_pspare[0] ----------+
|
||||
+----------------+ |
|
||||
|
|
||||
+----------------+<------+
|
||||
|(netmap_adapter)|
|
||||
| | netmap_kring
|
||||
| tx_rings *--------------------------------->+---------------+
|
||||
| | netmap_kring | ring *---------.
|
||||
| rx_rings *--------->+---------------+ | nr_hwcur | |
|
||||
+----------------+ | ring *--------. | nr_hwavail | V
|
||||
| nr_hwcur | | | selinfo | |
|
||||
| nr_hwavail | | +---------------+ .
|
||||
| selinfo | | | ... | .
|
||||
+---------------+ | |(ntx+1 entries)|
|
||||
| .... | | | |
|
||||
|(nrx+1 entries)| | +---------------+
|
||||
| | |
|
||||
KERNEL +---------------+ |
|
||||
|
|
||||
====================================================================
|
||||
|
|
||||
USERSPACE | NETMAP_RING
|
||||
+---->+-------------+
|
||||
/ | cur |
|
||||
NETMAP_IF (nifp, one per file desc.) / | avail |
|
||||
+---------------+ / | buf_ofs |
|
||||
| ni_tx_rings | / +=============+
|
||||
| ni_rx_rings | / | buf_idx | slot[0]
|
||||
| | / | len, flags |
|
||||
| | / +-------------+
|
||||
+===============+ / | buf_idx | slot[1]
|
||||
| txring_ofs[0] | (rel.to nifp)--' | len, flags |
|
||||
| txring_ofs[1] | +-------------+
|
||||
(num_rings+1 entries) (nr_num_slots entries)
|
||||
| txring_ofs[n] | | buf_idx | slot[n-1]
|
||||
+---------------+ | len, flags |
|
||||
| rxring_ofs[0] | +-------------+
|
||||
USERSPACE | struct netmap_ring
|
||||
+---->+--------------+
|
||||
/ | cur |
|
||||
struct netmap_if (nifp, 1 per fd) / | avail |
|
||||
+---------------+ / | buf_ofs |
|
||||
| ni_tx_rings | / +==============+
|
||||
| ni_rx_rings | / | buf_idx, len | slot[0]
|
||||
| | / | flags, ptr |
|
||||
| | / +--------------+
|
||||
+===============+ / | buf_idx, len | slot[1]
|
||||
| txring_ofs[0] | (rel.to nifp)--' | flags, ptr |
|
||||
| txring_ofs[1] | +--------------+
|
||||
(ni_tx_rings+1 entries) (num_slots entries)
|
||||
| txring_ofs[t] | | buf_idx, len | slot[n-1]
|
||||
+---------------+ | flags, ptr |
|
||||
| rxring_ofs[0] | +--------------+
|
||||
| rxring_ofs[1] |
|
||||
(num_rings+1 entries)
|
||||
| txring_ofs[n] |
|
||||
(ni_rx_rings+1 entries)
|
||||
| rxring_ofs[r] |
|
||||
+---------------+
|
||||
|
||||
* The private descriptor ('softc' or 'adapter') of each interface
|
||||
* is extended with a "struct netmap_adapter" containing netmap-related
|
||||
* info (see description in dev/netmap/netmap_kernel.h.
|
||||
* Among other things, tx_rings and rx_rings point to the arrays of
|
||||
* "struct netmap_kring" which in turn reache the various
|
||||
* "struct netmap_ring", shared with userspace.
|
||||
|
||||
* The NETMAP_RING is the userspace-visible replica of the NIC ring.
|
||||
* Each slot has the index of a buffer, its length and some flags.
|
||||
* For each "interface" (NIC, host stack, VALE switch port) attached to a
|
||||
* file descriptor, the mmap()ed region contains a (logically readonly)
|
||||
* struct netmap_if pointing to struct netmap_ring's.
|
||||
* There is one netmap_ring per physical NIC ring, plus one tx/rx ring
|
||||
* pair attached to the host stack (this pair is unused for VALE ports).
|
||||
*
|
||||
* All physical/host stack ports share the same memory region,
|
||||
* so that zero-copy can be implemented between them.
|
||||
* VALE switch ports instead have separate memory regions.
|
||||
*
|
||||
* The netmap_ring is the userspace-visible replica of the NIC ring.
|
||||
* Each slot has the index of a buffer (MTU-sized and residing in the
|
||||
* mmapped region), its length and some flags. An extra 64-bit pointer
|
||||
* is provided for user-supplied buffers in the tx path.
|
||||
*
|
||||
* In user space, the buffer address is computed as
|
||||
* (char *)ring + buf_ofs + index*NETMAP_BUF_SIZE
|
||||
* In the kernel, buffers do not necessarily need to be contiguous,
|
||||
* and the virtual and physical addresses are derived through
|
||||
* a lookup table.
|
||||
*/
|
||||
|
||||
/*
|
||||
* struct netmap_slot is a buffer descriptor
|
||||
*
|
||||
* struct netmap_slot:
|
||||
* buf_idx the index of the buffer associated to the slot.
|
||||
* len the length of the payload
|
||||
* flags control operation on the slot, as defined below
|
||||
*
|
||||
* buf_idx is the index of the buffer associated to the slot.
|
||||
* len is the length of the payload
|
||||
* NS_BUF_CHANGED must be set whenever userspace wants
|
||||
* to change buf_idx (it might be necessary to
|
||||
* reprogram the NIC slot)
|
||||
* reprogram the NIC)
|
||||
*
|
||||
* NS_REPORT must be set if we want the NIC to generate an interrupt
|
||||
* when this slot is used. Leaving it to 0 improves
|
||||
* performance.
|
||||
*
|
||||
* NS_FORWARD if set on a receive ring, and the device is in
|
||||
* transparent mode, buffers released with the flag set
|
||||
* will be forwarded to the 'other' side (host stack
|
||||
* or NIC, respectively) on the next select() or ioctl()
|
||||
*
|
||||
* The following will be supported from NETMAP_API = 5
|
||||
* NS_NO_LEARN on a VALE switch, do not 'learn' the source port for
|
||||
* this packet.
|
||||
* NS_INDIRECT the netmap buffer contains a 64-bit pointer to
|
||||
* the actual userspace buffer. This may be useful
|
||||
* to reduce copies in a VM environment.
|
||||
*
|
||||
* NS_INDIRECT (tx rings only) data is in a userspace buffer pointed
|
||||
* by the ptr field in the slot.
|
||||
*
|
||||
* NS_MOREFRAG Part of a multi-segment frame. The last (or only)
|
||||
* segment must not have this flag.
|
||||
* Only supported on VALE ports.
|
||||
*
|
||||
* NS_PORT_MASK the high 8 bits of the flag, if not zero, indicate the
|
||||
* destination port for the VALE switch, overriding
|
||||
* the lookup table.
|
||||
*/
|
||||
|
||||
struct netmap_slot {
|
||||
uint32_t buf_idx; /* buffer index */
|
||||
uint16_t len; /* packet length, to be copied to/from the hw ring */
|
||||
uint16_t flags; /* buf changed, etc. */
|
||||
#define NS_BUF_CHANGED 0x0001 /* must resync the map, buffer changed */
|
||||
uint32_t buf_idx; /* buffer index */
|
||||
uint16_t len; /* packet length */
|
||||
uint16_t flags; /* buf changed, etc. */
|
||||
#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */
|
||||
#define NS_REPORT 0x0002 /* ask the hardware to report results
|
||||
* e.g. by generating an interrupt
|
||||
*/
|
||||
@ -157,62 +148,61 @@ struct netmap_slot {
|
||||
#define NS_MOREFRAG 0x0020
|
||||
#define NS_PORT_SHIFT 8
|
||||
#define NS_PORT_MASK (0xff << NS_PORT_SHIFT)
|
||||
/*
|
||||
* in rx rings, the high 8 bits
|
||||
* are the number of fragments.
|
||||
*/
|
||||
#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff)
|
||||
uint64_t ptr; /* pointer for indirect buffers */
|
||||
};
|
||||
|
||||
/*
|
||||
* struct netmap_ring
|
||||
*
|
||||
* Netmap representation of a TX or RX ring (also known as "queue").
|
||||
* This is a queue implemented as a fixed-size circular array.
|
||||
* At the software level, two fields are important: avail and cur.
|
||||
*
|
||||
* In TX rings:
|
||||
* avail indicates the number of slots available for transmission.
|
||||
* It is updated by the kernel after every netmap system call.
|
||||
* It MUST BE decremented by the application when it appends a
|
||||
* packet.
|
||||
*
|
||||
* avail tells how many slots are available for transmission.
|
||||
* It is updated by the kernel in each netmap system call.
|
||||
* It MUST BE decremented by the user when it
|
||||
* adds a new packet to send.
|
||||
*
|
||||
* cur indicates the slot to use for the next packet
|
||||
* to send (i.e. the "tail" of the queue).
|
||||
* It MUST BE incremented by the application before
|
||||
* It MUST BE incremented by the user before
|
||||
* netmap system calls to reflect the number of newly
|
||||
* sent packets.
|
||||
* It is checked by the kernel on netmap system calls
|
||||
* (normally unmodified by the kernel unless invalid).
|
||||
*
|
||||
* The kernel side of netmap uses two additional fields in its own
|
||||
* private ring structure, netmap_kring:
|
||||
* nr_hwcur is a copy of nr_cur on an NIOCTXSYNC.
|
||||
* nr_hwavail is the number of slots known as available by the
|
||||
* hardware. It is updated on an INTR (inc by the
|
||||
* number of packets sent) and on a NIOCTXSYNC
|
||||
* (decrease by nr_cur - nr_hwcur)
|
||||
* A special case, nr_hwavail is -1 if the transmit
|
||||
* side is idle (no pending transmits).
|
||||
*
|
||||
* In RX rings:
|
||||
*
|
||||
* avail is the number of packets available (possibly 0).
|
||||
* It MUST BE decremented by the application when it consumes
|
||||
* a packet, and it is updated to nr_hwavail on a NIOCRXSYNC
|
||||
* It is updated by the kernel in each netmap system call.
|
||||
* It MUST BE decremented by the user when it
|
||||
* consumes a packet.
|
||||
*
|
||||
* cur indicates the first slot that contains a packet not
|
||||
* processed yet (the "head" of the queue).
|
||||
* It MUST BE incremented by the software when it consumes
|
||||
* yet processed (the "head" of the queue).
|
||||
* It MUST BE incremented by the user when it consumes
|
||||
* a packet.
|
||||
*
|
||||
* reserved indicates the number of buffers before 'cur'
|
||||
* that the application has still in use. Normally 0,
|
||||
* it MUST BE incremented by the application when it
|
||||
* that the user has not released yet. Normally 0,
|
||||
* it MUST BE incremented by the user when it
|
||||
* does not return the buffer immediately, and decremented
|
||||
* when the buffer is finally freed.
|
||||
*
|
||||
* The kernel side of netmap uses two additional fields in the kring:
|
||||
* nr_hwcur is a copy of nr_cur on an NIOCRXSYNC
|
||||
* nr_hwavail is the number of packets available. It is updated
|
||||
* on INTR (inc by the number of new packets arrived)
|
||||
* and on NIOCRXSYNC (decreased by nr_cur - nr_hwcur).
|
||||
*
|
||||
* DATA OWNERSHIP/LOCKING:
|
||||
* The netmap_ring is owned by the user program and it is only
|
||||
* accessed or modified in the upper half of the kernel during
|
||||
* a system call.
|
||||
*
|
||||
* The netmap_kring is only modified by the upper half of the kernel.
|
||||
* The netmap_ring, all slots, and buffers in the range
|
||||
* [reserved-cur , cur+avail[ are owned by the user program,
|
||||
* and the kernel only touches them in the same thread context
|
||||
* during a system call.
|
||||
* Other buffers are reserved for use by the NIC's DMA engines.
|
||||
*
|
||||
* FLAGS
|
||||
* NR_TIMESTAMP updates the 'ts' field on each syscall. This is
|
||||
@ -228,7 +218,7 @@ struct netmap_slot {
|
||||
*/
|
||||
struct netmap_ring {
|
||||
/*
|
||||
* nr_buf_base_ofs is meant to be used through macros.
|
||||
* buf_ofs is meant to be used through macros.
|
||||
* It contains the offset of the buffer region from this
|
||||
* descriptor.
|
||||
*/
|
||||
@ -253,23 +243,29 @@ struct netmap_ring {
|
||||
|
||||
/*
|
||||
* Netmap representation of an interface and its queue(s).
|
||||
* This is initialized by the kernel when binding a file
|
||||
* descriptor to a port, and should be considered as readonly
|
||||
* by user programs. The kernel never uses it.
|
||||
*
|
||||
* There is one netmap_if for each file descriptor on which we want
|
||||
* to select/poll. We assume that on each interface has the same number
|
||||
* of receive and transmit queues.
|
||||
* to select/poll.
|
||||
* select/poll operates on one or all pairs depending on the value of
|
||||
* nmr_queueid passed on the ioctl.
|
||||
*/
|
||||
struct netmap_if {
|
||||
char ni_name[IFNAMSIZ]; /* name of the interface. */
|
||||
const u_int ni_version; /* API version, currently unused */
|
||||
const u_int ni_rx_rings; /* number of rx rings */
|
||||
const u_int ni_tx_rings; /* if zero, same as ni_rx_rings */
|
||||
const uint32_t ni_version; /* API version, currently unused */
|
||||
const uint32_t ni_flags; /* properties */
|
||||
#define NI_PRIV_MEM 0x1 /* private memory region */
|
||||
|
||||
const uint32_t ni_rx_rings; /* number of rx rings */
|
||||
const uint32_t ni_tx_rings; /* number of tx rings */
|
||||
/*
|
||||
* The following array contains the offset of each netmap ring
|
||||
* from this structure. The first ni_tx_queues+1 entries refer
|
||||
* to the tx rings, the next ni_rx_queues+1 refer to the rx rings
|
||||
* from this structure. The first ni_tx_rings+1 entries refer
|
||||
* to the tx rings, the next ni_rx_rings+1 refer to the rx rings
|
||||
* (the last entry in each block refers to the host stack rings).
|
||||
* The area is filled up by the kernel on NIOCREG,
|
||||
* The area is filled up by the kernel on NIOCREGIF,
|
||||
* and then only read by userspace code.
|
||||
*/
|
||||
const ssize_t ring_ofs[0];
|
||||
@ -282,23 +278,47 @@ struct netmap_if {
|
||||
* NIOCGINFO takes a struct ifreq, the interface name is the input,
|
||||
* the outputs are number of queues and number of descriptor
|
||||
* for each queue (useful to set number of threads etc.).
|
||||
* The info returned is only advisory and may change before
|
||||
* the interface is bound to a file descriptor.
|
||||
*
|
||||
* NIOCREGIF takes an interface name within a struct ifreq,
|
||||
* and activates netmap mode on the interface (if possible).
|
||||
*
|
||||
* For vale ports, starting with NETMAP_API = 5,
|
||||
* nr_tx_rings and nr_rx_rings specify how many software rings
|
||||
* are created (0 means 1).
|
||||
* nr_name is the name of the interface
|
||||
*
|
||||
* NIOCREGIF is also used to attach a NIC to a VALE switch.
|
||||
* In this case the name is vale*:ifname, and "nr_cmd"
|
||||
* is set to 'NETMAP_BDG_ATTACH' or 'NETMAP_BDG_DETACH'.
|
||||
* nr_ringid specifies which rings should be attached, 0 means all,
|
||||
* NETMAP_HW_RING + n means only the n-th ring.
|
||||
* The process can terminate after the interface has been attached.
|
||||
* nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings
|
||||
* indicate the configuration of the port on return.
|
||||
*
|
||||
* NIOCUNREGIF unregisters the interface associated to the fd.
|
||||
* this is deprecated and will go away.
|
||||
* On input, non-zero values for nr_tx_rings, nr_tx_slots and the
|
||||
* rx counterparts may be used to reconfigure the port according
|
||||
* to the requested values, but this is not guaranteed.
|
||||
* The actual values are returned on completion of the ioctl().
|
||||
*
|
||||
* nr_ringid
|
||||
* indicates how rings should be bound to the file descriptors.
|
||||
* The default (0) means all physical rings of a NIC are bound.
|
||||
* NETMAP_HW_RING plus a ring number lets you bind just
|
||||
* a single ring pair.
|
||||
* NETMAP_SW_RING binds only the host tx/rx rings
|
||||
* NETMAP_NO_TX_POLL prevents select()/poll() from pushing
|
||||
* out packets on the tx ring unless POLLOUT is specified.
|
||||
*
|
||||
* NETMAP_PRIV_MEM is a return value used to indicate that
|
||||
* this ring is in a private memory region hence buffer
|
||||
* swapping cannot be used
|
||||
*
|
||||
* nr_cmd is used to configure NICs attached to a VALE switch,
|
||||
* or to dump the configuration of a VALE switch.
|
||||
*
|
||||
* nr_cmd = NETMAP_BDG_ATTACH and nr_name = vale*:ifname
|
||||
* attaches the NIC to the switch, with nr_ringid specifying
|
||||
* which rings to use
|
||||
*
|
||||
* nr_cmd = NETMAP_BDG_DETACH and nr_name = vale*:ifname
|
||||
* disconnects a previously attached NIC
|
||||
*
|
||||
* nr_cmd = NETMAP_BDG_LIST is used to list the configuration
|
||||
* of VALE switches, with additional arguments.
|
||||
*
|
||||
* NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
|
||||
* whose identity is set in NIOCREGIF through nr_ringid
|
||||
@ -312,7 +332,7 @@ struct netmap_if {
|
||||
struct nmreq {
|
||||
char nr_name[IFNAMSIZ];
|
||||
uint32_t nr_version; /* API version */
|
||||
#define NETMAP_API 4 /* current version */
|
||||
#define NETMAP_API 5 /* current version */
|
||||
uint32_t nr_offset; /* nifp offset in the shared region */
|
||||
uint32_t nr_memsize; /* size of the shared region */
|
||||
uint32_t nr_tx_slots; /* slots in tx rings */
|
||||
@ -320,6 +340,7 @@ struct nmreq {
|
||||
uint16_t nr_tx_rings; /* number of tx rings */
|
||||
uint16_t nr_rx_rings; /* number of rx rings */
|
||||
uint16_t nr_ringid; /* ring(s) we care about */
|
||||
#define NETMAP_PRIV_MEM 0x8000 /* rings use private memory */
|
||||
#define NETMAP_HW_RING 0x4000 /* low bits indicate one hw ring */
|
||||
#define NETMAP_SW_RING 0x2000 /* process the sw ring */
|
||||
#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */
|
||||
@ -343,7 +364,7 @@ struct nmreq {
|
||||
*/
|
||||
#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */
|
||||
#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */
|
||||
#define NIOCUNREGIF _IO('i', 147) /* interface unregister */
|
||||
#define NIOCUNREGIF _IO('i', 147) /* deprecated. Was interface unregister */
|
||||
#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */
|
||||
#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */
|
||||
#endif /* !NIOCREGIF */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2012 Luigi Rizzo. All rights reserved.
|
||||
* Copyright (C) 2012-2013 Luigi Rizzo. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@ -130,20 +130,14 @@ netmap_open(struct my_ring *me, int ringid, int promisc)
|
||||
req.nr_version = NETMAP_API;
|
||||
strncpy(req.nr_name, me->ifname, sizeof(req.nr_name));
|
||||
req.nr_ringid = ringid;
|
||||
err = ioctl(fd, NIOCGINFO, &req);
|
||||
if (err) {
|
||||
D("cannot get info on %s, errno %d ver %d",
|
||||
me->ifname, errno, req.nr_version);
|
||||
goto error;
|
||||
}
|
||||
me->memsize = l = req.nr_memsize;
|
||||
if (verbose)
|
||||
D("memsize is %d MB", l>>20);
|
||||
err = ioctl(fd, NIOCREGIF, &req);
|
||||
if (err) {
|
||||
D("Unable to register %s", me->ifname);
|
||||
goto error;
|
||||
}
|
||||
me->memsize = l = req.nr_memsize;
|
||||
if (verbose)
|
||||
D("memsize is %d MB", l>>20);
|
||||
|
||||
if (me->mem == NULL) {
|
||||
me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved.
|
||||
* Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@ -25,7 +25,7 @@
|
||||
|
||||
/*
|
||||
* $FreeBSD$
|
||||
* $Id$
|
||||
* $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $
|
||||
*
|
||||
* Example program to show how to build a multithreaded packet
|
||||
* source/sink using the netmap device.
|
||||
@ -40,7 +40,10 @@
|
||||
|
||||
#include <ctype.h> // isprint()
|
||||
|
||||
const char *default_payload="netmap pkt-gen payload\n"
|
||||
const char *default_payload="netmap pkt-gen DIRECT payload\n"
|
||||
"http://info.iet.unipi.it/~luigi/netmap/ ";
|
||||
|
||||
const char *indirect_payload="netmap pkt-gen indirect payload\n"
|
||||
"http://info.iet.unipi.it/~luigi/netmap/ ";
|
||||
|
||||
int time_second; // support for RD() debugging macro
|
||||
@ -58,8 +61,8 @@ struct pkt {
|
||||
|
||||
struct ip_range {
|
||||
char *name;
|
||||
struct in_addr start, end, cur;
|
||||
uint16_t port0, port1, cur_p;
|
||||
uint32_t start, end; /* same as struct in_addr */
|
||||
uint16_t port0, port1;
|
||||
};
|
||||
|
||||
struct mac_range {
|
||||
@ -80,6 +83,7 @@ struct glob_arg {
|
||||
int burst;
|
||||
int forever;
|
||||
int npackets; /* total packets to send */
|
||||
int frags; /* fragments per packet */
|
||||
int nthreads;
|
||||
int cpus;
|
||||
int options; /* testing */
|
||||
@ -103,6 +107,8 @@ struct glob_arg {
|
||||
void *mmap_addr;
|
||||
int mmap_size;
|
||||
char *ifname;
|
||||
char *nmr_config;
|
||||
int dummy_send;
|
||||
};
|
||||
enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP };
|
||||
|
||||
@ -137,45 +143,58 @@ struct targ {
|
||||
static void
|
||||
extract_ip_range(struct ip_range *r)
|
||||
{
|
||||
char *p_lo, *p_hi;
|
||||
char buf1[16]; // one ip address
|
||||
char *ap, *pp;
|
||||
struct in_addr a;
|
||||
|
||||
D("extract IP range from %s", r->name);
|
||||
p_lo = index(r->name, ':'); /* do we have ports ? */
|
||||
if (p_lo) {
|
||||
D(" found ports at %s", p_lo);
|
||||
*p_lo++ = '\0';
|
||||
p_hi = index(p_lo, '-');
|
||||
if (p_hi)
|
||||
*p_hi++ = '\0';
|
||||
else
|
||||
p_hi = p_lo;
|
||||
r->port0 = strtol(p_lo, NULL, 0);
|
||||
r->port1 = strtol(p_hi, NULL, 0);
|
||||
if (r->port1 < r->port0) {
|
||||
r->cur_p = r->port0;
|
||||
r->port0 = r->port1;
|
||||
r->port1 = r->cur_p;
|
||||
r->port0 = r->port1 = 0;
|
||||
r->start = r->end = 0;
|
||||
|
||||
/* the first - splits start/end of range */
|
||||
ap = index(r->name, '-'); /* do we have ports ? */
|
||||
if (ap) {
|
||||
*ap++ = '\0';
|
||||
}
|
||||
/* grab the initial values (mandatory) */
|
||||
pp = index(r->name, ':');
|
||||
if (pp) {
|
||||
*pp++ = '\0';
|
||||
r->port0 = r->port1 = strtol(pp, NULL, 0);
|
||||
};
|
||||
inet_aton(r->name, &a);
|
||||
r->start = r->end = ntohl(a.s_addr);
|
||||
if (ap) {
|
||||
pp = index(ap, ':');
|
||||
if (pp) {
|
||||
*pp++ = '\0';
|
||||
if (*pp)
|
||||
r->port1 = strtol(pp, NULL, 0);
|
||||
}
|
||||
if (*ap) {
|
||||
inet_aton(ap, &a);
|
||||
r->end = ntohl(a.s_addr);
|
||||
}
|
||||
r->cur_p = r->port0;
|
||||
D("ports are %d to %d", r->port0, r->port1);
|
||||
}
|
||||
p_hi = index(r->name, '-'); /* do we have upper ip ? */
|
||||
if (p_hi) {
|
||||
*p_hi++ = '\0';
|
||||
} else
|
||||
p_hi = r->name;
|
||||
inet_aton(r->name, &r->start);
|
||||
inet_aton(p_hi, &r->end);
|
||||
if (r->start.s_addr > r->end.s_addr) {
|
||||
r->cur = r->start;
|
||||
if (r->port0 > r->port1) {
|
||||
uint16_t tmp = r->port0;
|
||||
r->port0 = r->port1;
|
||||
r->port1 = tmp;
|
||||
}
|
||||
if (r->start > r->end) {
|
||||
uint32_t tmp = r->start;
|
||||
r->start = r->end;
|
||||
r->end = r->cur;
|
||||
r->end = tmp;
|
||||
}
|
||||
{
|
||||
struct in_addr a;
|
||||
char buf1[16]; // one ip address
|
||||
|
||||
a.s_addr = htonl(r->end);
|
||||
strncpy(buf1, inet_ntoa(a), sizeof(buf1));
|
||||
a.s_addr = htonl(r->start);
|
||||
D("range is %s:%d to %s:%d",
|
||||
inet_ntoa(a), r->port0, buf1, r->port1);
|
||||
}
|
||||
r->cur = r->start;
|
||||
strncpy(buf1, inet_ntoa(r->end), sizeof(buf1));
|
||||
D("range is %s %d to %s %d", inet_ntoa(r->start), r->port0,
|
||||
buf1, r->port1);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -255,6 +274,53 @@ system_ncpus(void)
|
||||
#endif /* __APPLE__ */
|
||||
|
||||
|
||||
/*
|
||||
* parse the vale configuration in conf and put it in nmr.
|
||||
* The configuration may consist of 0 to 4 numbers separated
|
||||
* by commas: #tx-slots,#rx-slots,#tx-rinzgs,#rx-rings.
|
||||
* Missing numbers or zeroes stand for default values.
|
||||
* As an additional convenience, if exactly one number
|
||||
* is specified, then this is assigned to bot #tx-slots and #rx-slots.
|
||||
* If there is no 4th number, then the 3rd is assigned to bot #tx-rings
|
||||
* and #rx-rings.
|
||||
*/
|
||||
void parse_nmr_config(const char* conf, struct nmreq *nmr)
|
||||
{
|
||||
char *w, *tok;
|
||||
int i, v;
|
||||
|
||||
nmr->nr_tx_rings = nmr->nr_rx_rings = 0;
|
||||
nmr->nr_tx_slots = nmr->nr_rx_slots = 0;
|
||||
if (conf == NULL || ! *conf)
|
||||
return;
|
||||
w = strdup(conf);
|
||||
for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) {
|
||||
v = atoi(tok);
|
||||
switch (i) {
|
||||
case 0:
|
||||
nmr->nr_tx_slots = nmr->nr_rx_slots = v;
|
||||
break;
|
||||
case 1:
|
||||
nmr->nr_rx_slots = v;
|
||||
break;
|
||||
case 2:
|
||||
nmr->nr_tx_rings = nmr->nr_rx_rings = v;
|
||||
break;
|
||||
case 3:
|
||||
nmr->nr_rx_rings = v;
|
||||
break;
|
||||
default:
|
||||
D("ignored config: %s", tok);
|
||||
break;
|
||||
}
|
||||
}
|
||||
D("txr %d txd %d rxr %d rxd %d",
|
||||
nmr->nr_tx_rings, nmr->nr_tx_slots,
|
||||
nmr->nr_rx_rings, nmr->nr_rx_slots);
|
||||
free(w);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* locate the src mac address for our interface, put it
|
||||
* into the user-supplied buffer. return 0 if ok, -1 on error.
|
||||
@ -361,7 +427,9 @@ dump_payload(char *p, int len, struct netmap_ring *ring, int cur)
|
||||
|
||||
/* get the length in ASCII of the length of the packet. */
|
||||
|
||||
printf("ring %p cur %5d len %5d buf %p\n", ring, cur, len, p);
|
||||
printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n",
|
||||
ring, cur, ring->slot[cur].buf_idx,
|
||||
ring->slot[cur].flags, len);
|
||||
/* hexdump routine */
|
||||
for (i = 0; i < len; ) {
|
||||
memset(buf, sizeof(buf), ' ');
|
||||
@ -389,6 +457,54 @@ dump_payload(char *p, int len, struct netmap_ring *ring, int cur)
|
||||
#define uh_sum check
|
||||
#endif /* linux */
|
||||
|
||||
/*
|
||||
* increment the addressed in the packet,
|
||||
* starting from the least significant field.
|
||||
* DST_IP DST_PORT SRC_IP SRC_PORT
|
||||
*/
|
||||
static void
|
||||
update_addresses(struct pkt *pkt, struct glob_arg *g)
|
||||
{
|
||||
uint32_t a;
|
||||
uint16_t p;
|
||||
struct ip *ip = &pkt->ip;
|
||||
struct udphdr *udp = &pkt->udp;
|
||||
|
||||
p = ntohs(udp->uh_sport);
|
||||
if (p < g->src_ip.port1) { /* just inc, no wrap */
|
||||
udp->uh_sport = htons(p + 1);
|
||||
return;
|
||||
}
|
||||
udp->uh_sport = htons(g->src_ip.port0);
|
||||
|
||||
a = ntohl(ip->ip_src.s_addr);
|
||||
if (a < g->src_ip.end) { /* just inc, no wrap */
|
||||
ip->ip_src.s_addr = htonl(a + 1);
|
||||
return;
|
||||
}
|
||||
ip->ip_src.s_addr = htonl(g->src_ip.start);
|
||||
|
||||
udp->uh_sport = htons(g->src_ip.port0);
|
||||
p = ntohs(udp->uh_dport);
|
||||
if (p < g->dst_ip.port1) { /* just inc, no wrap */
|
||||
udp->uh_dport = htons(p + 1);
|
||||
return;
|
||||
}
|
||||
udp->uh_dport = htons(g->dst_ip.port0);
|
||||
|
||||
a = ntohl(ip->ip_dst.s_addr);
|
||||
if (a < g->dst_ip.end) { /* just inc, no wrap */
|
||||
ip->ip_dst.s_addr = htonl(a + 1);
|
||||
return;
|
||||
}
|
||||
ip->ip_dst.s_addr = htonl(g->dst_ip.start);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize one packet and prepare for the next one.
|
||||
* The copy could be done better instead of repeating it each time.
|
||||
*/
|
||||
static void
|
||||
initialize_packet(struct targ *targ)
|
||||
{
|
||||
@ -398,9 +514,10 @@ initialize_packet(struct targ *targ)
|
||||
struct udphdr *udp;
|
||||
uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip);
|
||||
const char *payload = targ->g->options & OPT_INDIRECT ?
|
||||
"XXXXXXXXXXXXXXXXXXXXXX" : default_payload;
|
||||
indirect_payload : default_payload;
|
||||
int i, l, l0 = strlen(payload);
|
||||
|
||||
/* create a nice NUL-terminated string */
|
||||
for (i = 0; i < paylen;) {
|
||||
l = min(l0, paylen - i);
|
||||
bcopy(payload, pkt->body + i, l);
|
||||
@ -409,6 +526,7 @@ initialize_packet(struct targ *targ)
|
||||
pkt->body[i-1] = '\0';
|
||||
ip = &pkt->ip;
|
||||
|
||||
/* prepare the headers */
|
||||
ip->ip_v = IPVERSION;
|
||||
ip->ip_hl = 5;
|
||||
ip->ip_id = 0;
|
||||
@ -418,22 +536,14 @@ initialize_packet(struct targ *targ)
|
||||
ip->ip_off = htons(IP_DF); /* Don't fragment */
|
||||
ip->ip_ttl = IPDEFTTL;
|
||||
ip->ip_p = IPPROTO_UDP;
|
||||
ip->ip_dst.s_addr = targ->g->dst_ip.cur.s_addr;
|
||||
if (++targ->g->dst_ip.cur.s_addr > targ->g->dst_ip.end.s_addr)
|
||||
targ->g->dst_ip.cur.s_addr = targ->g->dst_ip.start.s_addr;
|
||||
ip->ip_src.s_addr = targ->g->src_ip.cur.s_addr;
|
||||
if (++targ->g->src_ip.cur.s_addr > targ->g->src_ip.end.s_addr)
|
||||
targ->g->src_ip.cur.s_addr = targ->g->src_ip.start.s_addr;
|
||||
ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start);
|
||||
ip->ip_src.s_addr = htonl(targ->g->src_ip.start);
|
||||
ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0));
|
||||
|
||||
|
||||
udp = &pkt->udp;
|
||||
udp->uh_sport = htons(targ->g->src_ip.cur_p);
|
||||
if (++targ->g->src_ip.cur_p > targ->g->src_ip.port1)
|
||||
targ->g->src_ip.cur_p = targ->g->src_ip.port0;
|
||||
udp->uh_dport = htons(targ->g->dst_ip.cur_p);
|
||||
if (++targ->g->dst_ip.cur_p > targ->g->dst_ip.port1)
|
||||
targ->g->dst_ip.cur_p = targ->g->dst_ip.port0;
|
||||
udp->uh_sport = htons(targ->g->src_ip.port0);
|
||||
udp->uh_dport = htons(targ->g->dst_ip.port0);
|
||||
udp->uh_ulen = htons(paylen);
|
||||
/* Magic: taken from sbin/dhclient/packet.c */
|
||||
udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp),
|
||||
@ -461,13 +571,18 @@ initialize_packet(struct targ *targ)
|
||||
*/
|
||||
static int
|
||||
send_packets(struct netmap_ring *ring, struct pkt *pkt,
|
||||
int size, u_int count, int options)
|
||||
struct glob_arg *g, u_int count, int options, u_int nfrags)
|
||||
{
|
||||
u_int sent, cur = ring->cur;
|
||||
int fcnt;
|
||||
int size = g->pkt_size;
|
||||
|
||||
if (ring->avail < count)
|
||||
count = ring->avail;
|
||||
|
||||
if (count < nfrags) {
|
||||
D("truncating packet, no room for frags %d %d",
|
||||
count, nfrags);
|
||||
}
|
||||
#if 0
|
||||
if (options & (OPT_COPY | OPT_PREFETCH) ) {
|
||||
for (sent = 0; sent < count; sent++) {
|
||||
@ -480,25 +595,36 @@ send_packets(struct netmap_ring *ring, struct pkt *pkt,
|
||||
cur = ring->cur;
|
||||
}
|
||||
#endif
|
||||
for (sent = 0; sent < count; sent++) {
|
||||
for (fcnt = nfrags, sent = 0; sent < count; sent++) {
|
||||
struct netmap_slot *slot = &ring->slot[cur];
|
||||
char *p = NETMAP_BUF(ring, slot->buf_idx);
|
||||
|
||||
slot->flags = 0;
|
||||
if (options & OPT_DUMP)
|
||||
dump_payload(p, size, ring, cur);
|
||||
if (options & OPT_INDIRECT) {
|
||||
slot->flags |= NS_INDIRECT;
|
||||
*((struct pkt **)(void *)p) = pkt;
|
||||
} else if (options & OPT_COPY)
|
||||
slot->ptr = (uint64_t)pkt;
|
||||
} else if (options & OPT_COPY) {
|
||||
pkt_copy(pkt, p, size);
|
||||
else if (options & OPT_MEMCPY)
|
||||
if (fcnt == 1)
|
||||
update_addresses(pkt, g);
|
||||
} else if (options & OPT_MEMCPY) {
|
||||
memcpy(p, pkt, size);
|
||||
else if (options & OPT_PREFETCH)
|
||||
if (fcnt == 1)
|
||||
update_addresses(pkt, g);
|
||||
} else if (options & OPT_PREFETCH) {
|
||||
prefetch(p);
|
||||
}
|
||||
if (options & OPT_DUMP)
|
||||
dump_payload(p, size, ring, cur);
|
||||
slot->len = size;
|
||||
if (sent == count - 1)
|
||||
if (--fcnt > 0)
|
||||
slot->flags |= NS_MOREFRAG;
|
||||
else
|
||||
fcnt = nfrags;
|
||||
if (sent == count - 1) {
|
||||
slot->flags &= ~NS_MOREFRAG;
|
||||
slot->flags |= NS_REPORT;
|
||||
}
|
||||
cur = NETMAP_RING_NEXT(ring, cur);
|
||||
}
|
||||
ring->avail -= sent;
|
||||
@ -801,6 +927,7 @@ sender_body(void *data)
|
||||
for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
|
||||
if (pcap_inject(p, pkt, size) != -1)
|
||||
sent++;
|
||||
update_addresses(pkt, targ->g);
|
||||
if (i > 10000) {
|
||||
targ->count = sent;
|
||||
i = 0;
|
||||
@ -814,6 +941,7 @@ sender_body(void *data)
|
||||
for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
|
||||
if (write(targ->g->main_fd, pkt, size) != -1)
|
||||
sent++;
|
||||
update_addresses(pkt, targ->g);
|
||||
if (i > 10000) {
|
||||
targ->count = sent;
|
||||
i = 0;
|
||||
@ -821,6 +949,8 @@ sender_body(void *data)
|
||||
}
|
||||
} else {
|
||||
int tosend = 0;
|
||||
int frags = targ->g->frags;
|
||||
|
||||
while (!targ->cancel && (n == 0 || sent < n)) {
|
||||
|
||||
if (rate_limit && tosend <= 0) {
|
||||
@ -855,11 +985,20 @@ sender_body(void *data)
|
||||
txring = NETMAP_TXRING(nifp, i);
|
||||
if (txring->avail == 0)
|
||||
continue;
|
||||
m = send_packets(txring, &targ->pkt, targ->g->pkt_size,
|
||||
limit, options);
|
||||
if (frags > 1)
|
||||
limit = ((limit + frags - 1) / frags) * frags;
|
||||
|
||||
m = send_packets(txring, &targ->pkt, targ->g,
|
||||
limit, options, frags);
|
||||
ND("limit %d avail %d frags %d m %d",
|
||||
limit, txring->avail, frags, m);
|
||||
sent += m;
|
||||
tosend -= m;
|
||||
targ->count = sent;
|
||||
if (rate_limit) {
|
||||
tosend -= m;
|
||||
if (tosend <= 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* flush any remaining packets */
|
||||
@ -909,7 +1048,6 @@ receive_packets(struct netmap_ring *ring, u_int limit, int dump)
|
||||
struct netmap_slot *slot = &ring->slot[cur];
|
||||
char *p = NETMAP_BUF(ring, slot->buf_idx);
|
||||
|
||||
slot->flags = OPT_INDIRECT; // XXX
|
||||
if (dump)
|
||||
dump_payload(p, slot->len, ring, cur);
|
||||
|
||||
@ -1063,18 +1201,20 @@ usage(void)
|
||||
"\t-n count number of iterations (can be 0)\n"
|
||||
"\t-t pkts_to_send also forces tx mode\n"
|
||||
"\t-r pkts_to_receive also forces rx mode\n"
|
||||
"\t-l pkts_size in bytes excluding CRC\n"
|
||||
"\t-d dst-ip end with %%n to sweep n addresses\n"
|
||||
"\t-s src-ip end with %%n to sweep n addresses\n"
|
||||
"\t-D dst-mac end with %%n to sweep n addresses\n"
|
||||
"\t-S src-mac end with %%n to sweep n addresses\n"
|
||||
"\t-l pkt_size in bytes excluding CRC\n"
|
||||
"\t-d dst_ip[:port[-dst_ip:port]] single or range\n"
|
||||
"\t-s src_ip[:port[-src_ip:port]] single or range\n"
|
||||
"\t-D dst-mac\n"
|
||||
"\t-S src-mac\n"
|
||||
"\t-a cpu_id use setaffinity\n"
|
||||
"\t-b burst size testing, mostly\n"
|
||||
"\t-c cores cores to use\n"
|
||||
"\t-p threads processes/threads to use\n"
|
||||
"\t-T report_ms milliseconds between reports\n"
|
||||
"\t-P use libpcap instead of netmap\n"
|
||||
"\t-P use libpcap instead of netmap\n"
|
||||
"\t-w wait_for_link_time in seconds\n"
|
||||
"\t-R rate in packets per second\n"
|
||||
"\t-X dump payload\n"
|
||||
"",
|
||||
cmd);
|
||||
|
||||
@ -1112,6 +1252,7 @@ start_threads(struct glob_arg *g)
|
||||
strncpy(tifreq.nr_name, g->ifname, sizeof(tifreq.nr_name));
|
||||
tifreq.nr_version = NETMAP_API;
|
||||
tifreq.nr_ringid = (g->nthreads > 1) ? (i | NETMAP_HW_RING) : 0;
|
||||
parse_nmr_config(g->nmr_config, &tifreq);
|
||||
|
||||
/*
|
||||
* if we are acting as a receiver only, do not touch the transmit ring.
|
||||
@ -1126,8 +1267,10 @@ start_threads(struct glob_arg *g)
|
||||
D("Unable to register %s", g->ifname);
|
||||
continue;
|
||||
}
|
||||
D("memsize is %d MB", tifreq.nr_memsize >> 20);
|
||||
targs[i].nmr = tifreq;
|
||||
targs[i].nifp = NETMAP_IF(g->mmap_addr, tifreq.nr_offset);
|
||||
D("nifp flags 0x%x", targs[i].nifp->ni_flags);
|
||||
/* start threads. */
|
||||
targs[i].qfirst = (g->nthreads > 1) ? i : 0;
|
||||
targs[i].qlast = (g->nthreads > 1) ? i+1 :
|
||||
@ -1343,9 +1486,11 @@ main(int arc, char **argv)
|
||||
g.cpus = 1;
|
||||
g.forever = 1;
|
||||
g.tx_rate = 0;
|
||||
g.frags = 1;
|
||||
g.nmr_config = "";
|
||||
|
||||
while ( (ch = getopt(arc, argv,
|
||||
"a:f:n:i:It:r:l:d:s:D:S:b:c:o:p:PT:w:WvR:X")) != -1) {
|
||||
"a:f:F:n:i:It:r:l:d:s:D:S:b:c:o:p:PT:w:WvR:XC:")) != -1) {
|
||||
struct sf *fn;
|
||||
|
||||
switch(ch) {
|
||||
@ -1358,6 +1503,15 @@ main(int arc, char **argv)
|
||||
g.npackets = atoi(optarg);
|
||||
break;
|
||||
|
||||
case 'F':
|
||||
i = atoi(optarg);
|
||||
if (i < 1 || i > 63) {
|
||||
D("invalid frags %d [1..63], ignore", i);
|
||||
break;
|
||||
}
|
||||
g.frags = i;
|
||||
break;
|
||||
|
||||
case 'f':
|
||||
for (fn = func; fn->key; fn++) {
|
||||
if (!strcmp(fn->key, optarg))
|
||||
@ -1383,6 +1537,8 @@ main(int arc, char **argv)
|
||||
g.dev_type = DEV_TAP;
|
||||
else
|
||||
g.dev_type = DEV_NETMAP;
|
||||
if (!strcmp(g.ifname, "null"))
|
||||
g.dummy_send = 1;
|
||||
break;
|
||||
|
||||
case 'I':
|
||||
@ -1454,6 +1610,9 @@ main(int arc, char **argv)
|
||||
break;
|
||||
case 'X':
|
||||
g.options |= OPT_DUMP;
|
||||
break;
|
||||
case 'C':
|
||||
g.nmr_config = strdup(optarg);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1507,6 +1666,8 @@ main(int arc, char **argv)
|
||||
D("cannot open pcap on %s", g.ifname);
|
||||
usage();
|
||||
}
|
||||
} else if (g.dummy_send) {
|
||||
D("using a dummy send routine");
|
||||
} else {
|
||||
bzero(&nmr, sizeof(nmr));
|
||||
nmr.nr_version = NETMAP_API;
|
||||
@ -1523,20 +1684,36 @@ main(int arc, char **argv)
|
||||
if (g.main_fd == -1) {
|
||||
D("Unable to open /dev/netmap");
|
||||
// fail later
|
||||
} else {
|
||||
if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) {
|
||||
D("Unable to get if info without name");
|
||||
} else {
|
||||
D("map size is %d Kb", nmr.nr_memsize >> 10);
|
||||
}
|
||||
bzero(&nmr, sizeof(nmr));
|
||||
nmr.nr_version = NETMAP_API;
|
||||
strncpy(nmr.nr_name, g.ifname, sizeof(nmr.nr_name));
|
||||
if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) {
|
||||
D("Unable to get if info for %s", g.ifname);
|
||||
}
|
||||
devqueues = nmr.nr_rx_rings;
|
||||
}
|
||||
/*
|
||||
* Register the interface on the netmap device: from now on,
|
||||
* we can operate on the network interface without any
|
||||
* interference from the legacy network stack.
|
||||
*
|
||||
* We decide to put the first interface registration here to
|
||||
* give time to cards that take a long time to reset the PHY.
|
||||
*/
|
||||
bzero(&nmr, sizeof(nmr));
|
||||
nmr.nr_version = NETMAP_API;
|
||||
strncpy(nmr.nr_name, g.ifname, sizeof(nmr.nr_name));
|
||||
nmr.nr_version = NETMAP_API;
|
||||
parse_nmr_config(g.nmr_config, &nmr);
|
||||
if (ioctl(g.main_fd, NIOCREGIF, &nmr) == -1) {
|
||||
D("Unable to register interface %s", g.ifname);
|
||||
//continue, fail later
|
||||
}
|
||||
ND("%s: txr %d txd %d rxr %d rxd %d", g.ifname,
|
||||
nmr.nr_tx_rings, nmr.nr_tx_slots,
|
||||
nmr.nr_rx_rings, nmr.nr_rx_slots);
|
||||
//if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) {
|
||||
// D("Unable to get if info without name");
|
||||
//} else {
|
||||
// D("map size is %d Kb", nmr.nr_memsize >> 10);
|
||||
//}
|
||||
if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) {
|
||||
D("Unable to get if info for %s", g.ifname);
|
||||
}
|
||||
devqueues = nmr.nr_rx_rings;
|
||||
|
||||
/* validate provided nthreads. */
|
||||
if (g.nthreads < 1 || g.nthreads > devqueues) {
|
||||
@ -1559,19 +1736,6 @@ main(int arc, char **argv)
|
||||
// continue, fail later
|
||||
}
|
||||
|
||||
/*
|
||||
* Register the interface on the netmap device: from now on,
|
||||
* we can operate on the network interface without any
|
||||
* interference from the legacy network stack.
|
||||
*
|
||||
* We decide to put the first interface registration here to
|
||||
* give time to cards that take a long time to reset the PHY.
|
||||
*/
|
||||
nmr.nr_version = NETMAP_API;
|
||||
if (ioctl(g.main_fd, NIOCREGIF, &nmr) == -1) {
|
||||
D("Unable to register interface %s", g.ifname);
|
||||
//continue, fail later
|
||||
}
|
||||
|
||||
|
||||
/* Print some debug information. */
|
||||
@ -1595,6 +1759,7 @@ main(int arc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (g.options) {
|
||||
D("--- SPECIAL OPTIONS:%s%s%s%s%s\n",
|
||||
g.options & OPT_PREFETCH ? " prefetch" : "",
|
||||
@ -1603,23 +1768,24 @@ main(int arc, char **argv)
|
||||
g.options & OPT_INDIRECT ? " indirect" : "",
|
||||
g.options & OPT_COPY ? " copy" : "");
|
||||
}
|
||||
|
||||
if (g.tx_rate == 0) {
|
||||
g.tx_period.tv_sec = 0;
|
||||
g.tx_period.tv_nsec = 0;
|
||||
} else if (g.tx_rate == 1) {
|
||||
g.tx_period.tv_sec = 1;
|
||||
g.tx_period.tv_nsec = 0;
|
||||
} else {
|
||||
g.tx_period.tv_sec = 0;
|
||||
|
||||
g.tx_period.tv_sec = g.tx_period.tv_nsec = 0;
|
||||
if (g.tx_rate > 0) {
|
||||
/* try to have at least something every second,
|
||||
* reducing the burst size to 0.5s worth of data
|
||||
* (but no less than one full set of fragments)
|
||||
*/
|
||||
if (g.burst > g.tx_rate/2)
|
||||
g.burst = g.tx_rate/2;
|
||||
if (g.burst < g.frags)
|
||||
g.burst = g.frags;
|
||||
g.tx_period.tv_nsec = (1e9 / g.tx_rate) * g.burst;
|
||||
if (g.tx_period.tv_nsec > 1000000000) {
|
||||
g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000;
|
||||
g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000;
|
||||
}
|
||||
g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000;
|
||||
g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000;
|
||||
}
|
||||
D("Sending %d packets every %d.%09d ns",
|
||||
g.burst, (int)g.tx_period.tv_sec, (int)g.tx_period.tv_nsec);
|
||||
if (g.td_body == sender_body)
|
||||
D("Sending %d packets every %ld.%09ld s",
|
||||
g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec);
|
||||
/* Wait for PHY reset. */
|
||||
D("Wait %d secs for phy reset", wait_link);
|
||||
sleep(wait_link);
|
||||
|
Loading…
Reference in New Issue
Block a user