1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-10-18 02:19:39 +00:00

ena: Add reset reason for corrupted TX cdescs

TX completion descriptors may sometimes contain errors due
to corruption. Upon identifying such a case, the driver will
trigger a reset with an explicit reset reason
ENA_REGS_RESET_TX_DESCRIPTOR_MALFORMED.

Approved by: cperciva (mentor)
MFC after: 2 weeks
Sponsored by: Amazon, Inc.
This commit is contained in:
Osama Abboud 2024-08-07 06:24:19 +00:00 committed by Osama Abboud
parent 89ce3f6314
commit 3872721846
5 changed files with 55 additions and 41 deletions

View File

@ -560,6 +560,32 @@ ena_free_rx_dma_tag(struct ena_adapter *adapter)
return (ret);
}
int
validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id, int tx_req_id_rc)
{
struct ena_adapter *adapter = tx_ring->adapter;
enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
if (unlikely(tx_req_id_rc != 0)) {
if (tx_req_id_rc == ENA_COM_FAULT) {
reset_reason = ENA_REGS_RESET_TX_DESCRIPTOR_MALFORMED;
ena_log(adapter->pdev, ERR,
"TX descriptor malformed. req_id %hu qid %hu\n",
req_id, tx_ring->qid);
} else if (tx_req_id_rc == ENA_COM_INVAL) {
ena_log_nm(adapter->pdev, WARN,
"Invalid req_id %hu in qid %hu\n",
req_id, tx_ring->qid);
counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
}
ena_trigger_reset(adapter, reset_reason);
return (EFAULT);
}
return (0);
}
static void
ena_release_all_tx_dmamap(struct ena_ring *tx_ring)
{

View File

@ -389,6 +389,7 @@ struct ena_stats_dev {
counter_u64_t bad_rx_desc_num;
counter_u64_t invalid_state;
counter_u64_t missing_intr;
counter_u64_t tx_desc_malformed;
};
struct ena_hw_stats {
@ -548,6 +549,7 @@ static const struct ena_reset_stats_offset resets_to_stats_offset_map[ENA_REGS_R
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_TOO_MANY_RX_DESCS, bad_rx_desc_num),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_DRIVER_INVALID_STATE, invalid_state),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_MISS_INTERRUPT, missing_intr),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_TX_DESCRIPTOR_MALFORMED, tx_desc_malformed),
};
int ena_up(struct ena_adapter *adapter);
@ -562,6 +564,7 @@ int ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
int ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num);
int ena_update_base_cpu(struct ena_adapter *adapter, int new_num);
int ena_update_cpu_stride(struct ena_adapter *adapter, uint32_t new_num);
int validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id, int tx_req_id_rc);
static inline int
ena_mbuf_count(struct mbuf *mbuf)
{

View File

@ -200,29 +200,22 @@ ena_get_tx_req_id(struct ena_ring *tx_ring, struct ena_com_io_cq *io_cq,
uint16_t *req_id)
{
struct ena_adapter *adapter = tx_ring->adapter;
int rc;
int rc = ena_com_tx_comp_req_id_get(io_cq, req_id);
rc = ena_com_tx_comp_req_id_get(io_cq, req_id);
if (rc == ENA_COM_TRY_AGAIN)
if (unlikely(rc == ENA_COM_TRY_AGAIN))
return (EAGAIN);
if (unlikely(rc != 0)) {
ena_log(adapter->pdev, ERR, "Invalid req_id %hu in qid %hu\n",
rc = validate_tx_req_id(tx_ring, *req_id, rc);
if (unlikely(tx_ring->tx_buffer_info[*req_id].mbuf == NULL)) {
ena_log(adapter->pdev, ERR,
"tx_info doesn't have valid mbuf. req_id %hu qid %hu\n",
*req_id, tx_ring->qid);
counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
goto err;
ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
rc = EFAULT;
}
if (tx_ring->tx_buffer_info[*req_id].mbuf != NULL)
return (0);
ena_log(adapter->pdev, ERR,
"tx_info doesn't have valid mbuf. req_id %hu qid %hu\n",
*req_id, tx_ring->qid);
err:
ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
return (EFAULT);
return (rc);
}
/**

View File

@ -71,7 +71,6 @@ static void ena_netmap_unmap_last_socket_chain(struct ena_netmap_ctx *,
struct ena_tx_buffer *);
static void ena_netmap_tx_cleanup(struct ena_netmap_ctx *);
static uint16_t ena_netmap_tx_clean_one(struct ena_netmap_ctx *, uint16_t);
static inline int validate_tx_req_id(struct ena_ring *, uint16_t);
static int ena_netmap_rx_frames(struct ena_netmap_ctx *);
static int ena_netmap_rx_frame(struct ena_netmap_ctx *);
static int ena_netmap_rx_load_desc(struct ena_netmap_ctx *, uint16_t, int *);
@ -795,25 +794,33 @@ ena_netmap_unmap_last_socket_chain(struct ena_netmap_ctx *ctx,
static void
ena_netmap_tx_cleanup(struct ena_netmap_ctx *ctx)
{
struct ena_ring *tx_ring = ctx->ring;
int rc;
uint16_t req_id;
uint16_t total_tx_descs = 0;
ctx->nm_i = ctx->kring->nr_hwtail;
ctx->nt = ctx->ring->next_to_clean;
ctx->nt = tx_ring->next_to_clean;
/* Reclaim buffers for completed transmissions */
while (ena_com_tx_comp_req_id_get(ctx->io_cq, &req_id) >= 0) {
if (validate_tx_req_id(ctx->ring, req_id) != 0)
do {
rc = ena_com_tx_comp_req_id_get(ctx->io_cq, &req_id);
if(unlikely(rc == ENA_COM_TRY_AGAIN))
break;
rc = validate_tx_req_id(tx_ring, req_id, rc);
if(unlikely(rc != 0))
break;
total_tx_descs += ena_netmap_tx_clean_one(ctx, req_id);
}
} while (1);
ctx->kring->nr_hwtail = ctx->nm_i;
if (total_tx_descs > 0) {
/* acknowledge completion of sent packets */
ctx->ring->next_to_clean = ctx->nt;
ena_com_comp_ack(ctx->ring->ena_com_io_sq, total_tx_descs);
tx_ring->next_to_clean = ctx->nt;
ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs);
}
}
@ -856,23 +863,6 @@ ena_netmap_tx_clean_one(struct ena_netmap_ctx *ctx, uint16_t req_id)
return tx_info->tx_descs;
}
static inline int
validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id)
{
struct ena_adapter *adapter = tx_ring->adapter;
if (likely(req_id < tx_ring->ring_size))
return (0);
ena_log_nm(adapter->pdev, WARN, "Invalid req_id %hu in qid %hu\n",
req_id, tx_ring->qid);
counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
return (EFAULT);
}
static int
ena_netmap_rxsync(struct netmap_kring *kring, int flags)
{

View File

@ -294,6 +294,8 @@ ena_sysctl_add_stats(struct ena_adapter *adapter)
&dev_stats->invalid_state, "Driver invalid state count");
SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_intr", CTLFLAG_RD,
&dev_stats->missing_intr, "Missing interrupt count");
SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "tx_desc_malformed", CTLFLAG_RD,
&dev_stats->tx_desc_malformed, "TX descriptors malformed count");
SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "total_resets", CTLFLAG_RD,
&dev_stats->total_resets, "Total resets count");