mirror of
https://git.FreeBSD.org/src.git
synced 2024-12-27 11:55:06 +00:00
Important update for the igb driver:
- Add the change made in em to the actual unrefreshed number of descriptors is used as a basis in rxeof on the way out to determine if more refresh is needed. NOTE: there is a difference in the ring setup in igb, this is not accidental, it is necessitated by hardware behavior, when you reset the newer adapters it will not let you write RDH, it ALWAYS sets it to 0. Thus the way em does it is not possible. - Change the sysctl handling of flow control, it will now make the change dynamically when the variable setting changes rather than requiring a reset. - Change the eee sysctl naming, validation found the old unintuitive :) - Last but not least, some important performance tweaks in the TX path, I found that UDP behavior could be drastically hindered or improved with just small changes in the start loop. What I have here is what testing has shown to be the best overall. Its interesting to note that changing the clean threshold to start at a full half of the ring, made a BIG difference in performance. I hope that this will prove to be advantageous for most workloads. MFC in a week.
This commit is contained in:
parent
4c821a3978
commit
cf696f26c9
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=220375
@ -1,6 +1,6 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright (c) 2001-2010, Intel Corporation
|
||||
Copyright (c) 2001-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -99,7 +99,7 @@ int igb_display_debug_stats = 0;
|
||||
/*********************************************************************
|
||||
* Driver version:
|
||||
*********************************************************************/
|
||||
char igb_driver_version[] = "version - 2.1.7";
|
||||
char igb_driver_version[] = "version - 2.2.3";
|
||||
|
||||
|
||||
/*********************************************************************
|
||||
@ -262,6 +262,7 @@ static void igb_handle_link(void *context, int pending);
|
||||
|
||||
static void igb_set_sysctl_value(struct adapter *, const char *,
|
||||
const char *, int *, int);
|
||||
static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
|
||||
|
||||
#ifdef DEVICE_POLLING
|
||||
static poll_handler_t igb_poll;
|
||||
@ -350,8 +351,8 @@ static int igb_fc_setting = e1000_fc_full;
|
||||
TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
|
||||
|
||||
/* Energy Efficient Ethernet - default to off */
|
||||
static int igb_eee_setting = FALSE;
|
||||
TUNABLE_INT("hw.igb.ee_setting", &igb_eee_setting);
|
||||
static int igb_eee_disabled = TRUE;
|
||||
TUNABLE_INT("hw.igb.eee_disabled", &igb_eee_disabled);
|
||||
|
||||
/*
|
||||
** DMA Coalescing, only for i350 - default to off,
|
||||
@ -445,6 +446,11 @@ igb_attach(device_t dev)
|
||||
OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
|
||||
&igb_enable_aim, 1, "Interrupt Moderation");
|
||||
|
||||
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
|
||||
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
|
||||
OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
|
||||
adapter, 0, igb_set_flowcntl, "I", "Flow Control");
|
||||
|
||||
callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
|
||||
|
||||
/* Determine hardware and mac info */
|
||||
@ -471,11 +477,6 @@ igb_attach(device_t dev)
|
||||
"max number of rx packets to process", &adapter->rx_process_limit,
|
||||
igb_rx_process_limit);
|
||||
|
||||
/* Sysctl for setting the interface flow control */
|
||||
igb_set_sysctl_value(adapter, "flow_control",
|
||||
"configure flow control",
|
||||
&adapter->fc_setting, igb_fc_setting);
|
||||
|
||||
/*
|
||||
* Validate number of transmit and receive descriptors. It
|
||||
* must not exceed hardware maximum, and must be multiple
|
||||
@ -552,10 +553,10 @@ igb_attach(device_t dev)
|
||||
igb_set_sysctl_value(adapter, "dma_coalesce",
|
||||
"configure dma coalesce",
|
||||
&adapter->dma_coalesce, igb_dma_coalesce);
|
||||
igb_set_sysctl_value(adapter, "eee_control",
|
||||
igb_set_sysctl_value(adapter, "eee_disabled",
|
||||
"enable Energy Efficient Ethernet",
|
||||
&adapter->hw.dev_spec._82575.eee_disable,
|
||||
igb_eee_setting);
|
||||
igb_eee_disabled);
|
||||
e1000_set_eee_i350(&adapter->hw);
|
||||
}
|
||||
|
||||
@ -822,11 +823,12 @@ igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
|
||||
if (!adapter->link_active)
|
||||
return;
|
||||
|
||||
/* Call cleanup if number of TX descriptors low */
|
||||
if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
|
||||
igb_txeof(txr);
|
||||
|
||||
while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
|
||||
/* Cleanup if TX descriptors are low */
|
||||
if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
|
||||
igb_txeof(txr);
|
||||
if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
|
||||
if (txr->tx_avail <= IGB_MAX_SCATTER) {
|
||||
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
|
||||
break;
|
||||
}
|
||||
@ -932,13 +934,6 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
|
||||
|
||||
/* Process the queue */
|
||||
while (next != NULL) {
|
||||
/* Call cleanup if number of TX descriptors low */
|
||||
if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
|
||||
igb_txeof(txr);
|
||||
if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
|
||||
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
|
||||
break;
|
||||
}
|
||||
if ((err = igb_xmit(txr, &next)) != 0) {
|
||||
if (next != NULL)
|
||||
err = drbr_enqueue(ifp, txr->br, next);
|
||||
@ -949,6 +944,12 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
|
||||
ETHER_BPF_MTAP(ifp, next);
|
||||
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
|
||||
break;
|
||||
if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
|
||||
igb_txeof(txr);
|
||||
if (txr->tx_avail <= IGB_MAX_SCATTER) {
|
||||
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
|
||||
break;
|
||||
}
|
||||
next = drbr_dequeue(ifp, txr->br);
|
||||
}
|
||||
if (enq > 0) {
|
||||
@ -1266,10 +1267,13 @@ igb_init_locked(struct adapter *adapter)
|
||||
else
|
||||
#endif /* DEVICE_POLLING */
|
||||
{
|
||||
igb_enable_intr(adapter);
|
||||
E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
|
||||
igb_enable_intr(adapter);
|
||||
E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
|
||||
}
|
||||
|
||||
/* Set Energy Efficient Ethernet */
|
||||
e1000_set_eee_i350(&adapter->hw);
|
||||
|
||||
/* Don't reset the phy next time init gets called */
|
||||
adapter->hw.phy.reset_disable = TRUE;
|
||||
}
|
||||
@ -1458,10 +1462,6 @@ igb_msix_que(void *arg)
|
||||
more_tx = igb_txeof(txr);
|
||||
IGB_TX_UNLOCK(txr);
|
||||
|
||||
/* If RX ring is depleted do refresh first */
|
||||
if (rxr->next_to_check == rxr->next_to_refresh)
|
||||
igb_refresh_mbufs(rxr, rxr->next_to_check);
|
||||
|
||||
more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
|
||||
|
||||
if (igb_enable_aim == FALSE)
|
||||
@ -2670,14 +2670,6 @@ igb_reset(struct adapter *adapter)
|
||||
fc->pause_time = IGB_FC_PAUSE_TIME;
|
||||
fc->send_xon = TRUE;
|
||||
|
||||
/* Set Flow control, use the tunable location if sane */
|
||||
if ((igb_fc_setting >= 0) && (igb_fc_setting < 4))
|
||||
fc->requested_mode = adapter->fc_setting;
|
||||
else
|
||||
fc->requested_mode = e1000_fc_none;
|
||||
|
||||
fc->current_mode = fc->requested_mode;
|
||||
|
||||
/* Issue a global reset */
|
||||
e1000_reset_hw(hw);
|
||||
E1000_WRITE_REG(hw, E1000_WUC, 0);
|
||||
@ -2864,7 +2856,7 @@ igb_dma_malloc(struct adapter *adapter, bus_size_t size,
|
||||
}
|
||||
|
||||
error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
|
||||
BUS_DMA_NOWAIT, &dma->dma_map);
|
||||
BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
|
||||
if (error) {
|
||||
device_printf(adapter->dev,
|
||||
"%s: bus_dmamem_alloc(%ju) failed: %d\n",
|
||||
@ -3631,19 +3623,17 @@ igb_txeof(struct tx_ring *txr)
|
||||
* If we have a minimum free, clear IFF_DRV_OACTIVE
|
||||
* to tell the stack that it is OK to send packets.
|
||||
*/
|
||||
if (txr->tx_avail > IGB_TX_OP_THRESHOLD)
|
||||
if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
|
||||
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
|
||||
|
||||
/* All clean, turn off the watchdog */
|
||||
if (txr->tx_avail == adapter->num_tx_desc) {
|
||||
txr->queue_status = IGB_QUEUE_IDLE;
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
/* All clean, turn off the watchdog */
|
||||
if (txr->tx_avail == adapter->num_tx_desc) {
|
||||
txr->queue_status = IGB_QUEUE_IDLE;
|
||||
return (FALSE);
|
||||
}
|
||||
}
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
|
||||
/*********************************************************************
|
||||
*
|
||||
* Refresh mbuf buffers for RX descriptor rings
|
||||
@ -3830,13 +3820,11 @@ igb_allocate_receive_buffers(struct rx_ring *rxr)
|
||||
static void
|
||||
igb_free_receive_ring(struct rx_ring *rxr)
|
||||
{
|
||||
struct adapter *adapter;
|
||||
struct adapter *adapter = rxr->adapter;
|
||||
struct igb_rx_buf *rxbuf;
|
||||
int i;
|
||||
|
||||
adapter = rxr->adapter;
|
||||
i = rxr->next_to_check;
|
||||
while (i != rxr->next_to_refresh) {
|
||||
|
||||
for (int i = 0; i < adapter->num_rx_desc; i++) {
|
||||
rxbuf = &rxr->rx_buffers[i];
|
||||
if (rxbuf->m_head != NULL) {
|
||||
bus_dmamap_sync(rxr->htag, rxbuf->hmap,
|
||||
@ -3854,12 +3842,7 @@ igb_free_receive_ring(struct rx_ring *rxr)
|
||||
}
|
||||
rxbuf->m_head = NULL;
|
||||
rxbuf->m_pack = NULL;
|
||||
|
||||
if (++i == adapter->num_rx_desc)
|
||||
i = 0;
|
||||
}
|
||||
rxr->next_to_check = 0;
|
||||
rxr->next_to_refresh = 0;
|
||||
}
|
||||
|
||||
|
||||
@ -3877,33 +3860,32 @@ igb_setup_receive_ring(struct rx_ring *rxr)
|
||||
struct igb_rx_buf *rxbuf;
|
||||
bus_dma_segment_t pseg[1], hseg[1];
|
||||
struct lro_ctrl *lro = &rxr->lro;
|
||||
int i, j, nsegs, error = 0;
|
||||
int rsize, nsegs, error = 0;
|
||||
|
||||
adapter = rxr->adapter;
|
||||
dev = adapter->dev;
|
||||
ifp = adapter->ifp;
|
||||
|
||||
/* Clear the ring contents */
|
||||
IGB_RX_LOCK(rxr);
|
||||
/* Invalidate all descriptors */
|
||||
for (i = 0; i < adapter->num_rx_desc; i++) {
|
||||
union e1000_adv_rx_desc* cur;
|
||||
cur = &rxr->rx_base[i];
|
||||
cur->wb.upper.status_error = 0;
|
||||
}
|
||||
rsize = roundup2(adapter->num_rx_desc *
|
||||
sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
|
||||
bzero((void *)rxr->rx_base, rsize);
|
||||
|
||||
/*
|
||||
** Free current RX buffer structures and their mbufs
|
||||
*/
|
||||
igb_free_receive_ring(rxr);
|
||||
|
||||
/* Configure for header split? */
|
||||
if (igb_header_split)
|
||||
rxr->hdr_split = TRUE;
|
||||
|
||||
/* Get our indices */
|
||||
i = j = rxr->next_to_refresh;
|
||||
if (++j == adapter->num_rx_desc)
|
||||
j = 0;
|
||||
/* Now replenish the ring mbufs */
|
||||
while (j != rxr->next_to_check) {
|
||||
for (int j = 0; j < adapter->num_rx_desc; ++j) {
|
||||
struct mbuf *mh, *mp;
|
||||
|
||||
rxbuf = &rxr->rx_buffers[i];
|
||||
rxbuf = &rxr->rx_buffers[j];
|
||||
if (rxr->hdr_split == FALSE)
|
||||
goto skip_head;
|
||||
|
||||
@ -3926,7 +3908,7 @@ igb_setup_receive_ring(struct rx_ring *rxr)
|
||||
bus_dmamap_sync(rxr->htag,
|
||||
rxbuf->hmap, BUS_DMASYNC_PREREAD);
|
||||
/* Update descriptor */
|
||||
rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr);
|
||||
rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
|
||||
|
||||
skip_head:
|
||||
/* Now the payload cluster */
|
||||
@ -3947,16 +3929,12 @@ igb_setup_receive_ring(struct rx_ring *rxr)
|
||||
bus_dmamap_sync(rxr->ptag,
|
||||
rxbuf->pmap, BUS_DMASYNC_PREREAD);
|
||||
/* Update descriptor */
|
||||
rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr);
|
||||
|
||||
/* Setup for next loop */
|
||||
i = j;
|
||||
if (++j == adapter->num_rx_desc)
|
||||
j = 0;
|
||||
rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
|
||||
}
|
||||
|
||||
/* Setup our descriptor indices */
|
||||
rxr->next_to_refresh = i;
|
||||
rxr->next_to_check = 0;
|
||||
rxr->next_to_refresh = adapter->num_rx_desc - 1;
|
||||
rxr->lro_enabled = FALSE;
|
||||
rxr->rx_split_packets = 0;
|
||||
rxr->rx_bytes = 0;
|
||||
@ -3989,12 +3967,12 @@ igb_setup_receive_ring(struct rx_ring *rxr)
|
||||
return (0);
|
||||
|
||||
fail:
|
||||
rxr->next_to_refresh = i;
|
||||
igb_free_receive_ring(rxr);
|
||||
IGB_RX_UNLOCK(rxr);
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
||||
/*********************************************************************
|
||||
*
|
||||
* Initialize all receive rings.
|
||||
@ -4528,7 +4506,7 @@ igb_rxeof(struct igb_queue *que, int count, int *done)
|
||||
}
|
||||
|
||||
/* Catch any remainders */
|
||||
if (processed != 0 || i == rxr->next_to_refresh)
|
||||
if (igb_rx_unrefreshed(rxr))
|
||||
igb_refresh_mbufs(rxr, i);
|
||||
|
||||
rxr->next_to_check = i;
|
||||
@ -5552,3 +5530,38 @@ igb_set_sysctl_value(struct adapter *adapter, const char *name,
|
||||
OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
|
||||
}
|
||||
|
||||
/*
|
||||
** Set flow control using sysctl:
|
||||
** Flow control values:
|
||||
** 0 - off
|
||||
** 1 - rx pause
|
||||
** 2 - tx pause
|
||||
** 3 - full
|
||||
*/
|
||||
static int
|
||||
igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int error;
|
||||
struct adapter *adapter;
|
||||
|
||||
error = sysctl_handle_int(oidp, &igb_fc_setting, 0, req);
|
||||
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
adapter = (struct adapter *) arg1;
|
||||
switch (igb_fc_setting) {
|
||||
case e1000_fc_rx_pause:
|
||||
case e1000_fc_tx_pause:
|
||||
case e1000_fc_full:
|
||||
adapter->hw.fc.requested_mode = igb_fc_setting;
|
||||
break;
|
||||
case e1000_fc_none:
|
||||
default:
|
||||
adapter->hw.fc.requested_mode = e1000_fc_none;
|
||||
}
|
||||
|
||||
adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
|
||||
e1000_force_mac_fc(&adapter->hw);
|
||||
return error;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright (c) 2001-2010, Intel Corporation
|
||||
Copyright (c) 2001-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -132,10 +132,9 @@
|
||||
|
||||
/*
|
||||
* This parameter controls when the driver calls the routine to reclaim
|
||||
* transmit descriptors.
|
||||
* transmit descriptors. Cleaning earlier seems a win.
|
||||
*/
|
||||
#define IGB_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 8)
|
||||
#define IGB_TX_OP_THRESHOLD (adapter->num_tx_desc / 32)
|
||||
#define IGB_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 2)
|
||||
|
||||
/*
|
||||
* This parameter controls whether or not autonegotation is enabled.
|
||||
@ -400,7 +399,6 @@ struct adapter {
|
||||
u16 link_speed;
|
||||
u16 link_duplex;
|
||||
u32 smartspeed;
|
||||
u32 fc_setting;
|
||||
u32 dma_coalesce;
|
||||
|
||||
/* Interface queues */
|
||||
@ -483,6 +481,21 @@ struct igb_rx_buf {
|
||||
bus_dmamap_t pmap; /* bus_dma map for packet */
|
||||
};
|
||||
|
||||
/*
|
||||
** Find the number of unrefreshed RX descriptors
|
||||
*/
|
||||
static inline u16
|
||||
igb_rx_unrefreshed(struct rx_ring *rxr)
|
||||
{
|
||||
struct adapter *adapter = rxr->adapter;
|
||||
|
||||
if (rxr->next_to_check > rxr->next_to_refresh)
|
||||
return (rxr->next_to_check - rxr->next_to_refresh - 1);
|
||||
else
|
||||
return ((adapter->num_rx_desc + rxr->next_to_check) -
|
||||
rxr->next_to_refresh - 1);
|
||||
}
|
||||
|
||||
#define IGB_CORE_LOCK_INIT(_sc, _name) \
|
||||
mtx_init(&(_sc)->core_mtx, _name, "IGB Core Lock", MTX_DEF)
|
||||
#define IGB_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx)
|
||||
|
Loading…
Reference in New Issue
Block a user