From 538ff57b75c5bf64f178ffedc1690665b1f82a47 Mon Sep 17 00:00:00 2001 From: Andrew Gallatin Date: Mon, 15 Apr 2019 17:14:50 +0000 Subject: [PATCH] mlx5en: Enable new pfil(9) KPI ethernet filtering hooks This allows efficient filtering at packet ingress on mlx5en. Note that the packets are filtered (and potentially dropped) *before* the driver has committed to (re)allocating an mbuf for the packet. Dropped packets are treated essentially the same as an error. Nothing is allocated, and the existing buffer is recycled. This allows us to drop malicious packets at close to line rate with very little CPU use. Reviewed by: hselasky, slavash, kib Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D19063 --- sys/dev/mlx5/mlx5_en/en.h | 2 ++ sys/dev/mlx5/mlx5_en/mlx5_en_main.c | 13 +++++++++ sys/dev/mlx5/mlx5_en/mlx5_en_rx.c | 43 +++++++++++++++++++++++++++-- 3 files changed, 55 insertions(+), 3 deletions(-) diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h index e90acdde1105..5dd41a6663d7 100644 --- a/sys/dev/mlx5/mlx5_en/en.h +++ b/sys/dev/mlx5/mlx5_en/en.h @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -838,6 +839,7 @@ struct mlx5e_priv { struct mlx5e_clbr_point clbr_points[2]; u_int clbr_gen; + struct pfil_head *pfil; struct mlx5e_channel channel[]; }; diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c index 47c209ace586..e79facb9d331 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -3664,6 +3664,7 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev) struct sysctl_oid_list *child; int ncv = mdev->priv.eq_table.num_comp_vectors; char unit[16]; + struct pfil_head_args pa; int err; int i; u32 eth_proto_cap; @@ -3898,6 +3899,12 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev) callout_init(&priv->tstmp_clbr, CALLOUT_DIRECT); mlx5e_reset_calibration_callout(priv); + pa.pa_version = PFIL_VERSION; + pa.pa_flags = PFIL_IN; + pa.pa_type = PFIL_TYPE_ETHERNET; + pa.pa_headname = ifp->if_xname; + priv->pfil = pfil_head_register(&pa); + return (priv); #ifdef RATELIMIT @@ -3974,6 +3981,12 @@ mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv) pause("W", hz); } + /* deregister pfil */ + if (priv->pfil != NULL) { + pfil_head_unregister(priv->pfil); + priv->pfil = NULL; + } + /* unregister device */ ifmedia_removeall(&priv->media); ether_ifdetach(ifp); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c index 079c5ddb11a7..ab0bedc01b05 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c @@ -430,15 +430,18 @@ mlx5e_decompress_cqes(struct mlx5e_cq *cq) static int mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget) { - int i; + struct pfil_head *pfil; + int i, rv; + CURVNET_SET_QUIET(rq->ifp->if_vnet); + pfil = rq->channel->priv->pfil; for (i = 0; i < budget; i++) { struct mlx5e_rx_wqe *wqe; struct mlx5_cqe64 *cqe; struct mbuf *mb; __be16 wqe_counter_be; u16 wqe_counter; - u32 byte_cnt; + u32 byte_cnt, seglen; cqe = mlx5e_get_cqe(&rq->cq); if (!cqe) @@ -462,6 +465,39 @@ mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget) rq->stats.wqe_err++; goto wq_ll_pop; } + if (pfil != NULL && PFIL_HOOKED_IN(pfil)) { + seglen = MIN(byte_cnt, MLX5E_MAX_RX_BYTES); + rv = pfil_run_hooks(rq->channel->priv->pfil, + rq->mbuf[wqe_counter].data, rq->ifp, + seglen | PFIL_MEMPTR | PFIL_IN, NULL); + + switch (rv) { + case PFIL_DROPPED: + case PFIL_CONSUMED: + /* + * Filter dropped or consumed it. In + * either case, we can just recycle + * buffer; there is no more work to do. + */ + rq->stats.packets++; + goto wq_ll_pop; + case PFIL_REALLOCED: + /* + * Filter copied it; recycle buffer + * and receive the new mbuf allocated + * by the Filter + */ + mb = pfil_mem2mbuf(rq->mbuf[wqe_counter].data); + goto rx_common; + default: + /* + * The Filter said it was OK, so + * receive like normal. + */ + KASSERT(rv == PFIL_PASS, + ("Filter returned %d!\n", rv)); + } + } if ((MHLEN - MLX5E_NET_IP_ALIGN) >= byte_cnt && (mb = m_gethdr(M_NOWAIT, MT_DATA)) != NULL) { #if (MLX5E_MAX_RX_SEGS != 1) @@ -480,7 +516,7 @@ mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget) bus_dmamap_unload(rq->dma_tag, rq->mbuf[wqe_counter].dma_map); } - +rx_common: mlx5e_build_rx_mbuf(cqe, rq, mb, byte_cnt); rq->stats.bytes += byte_cnt; rq->stats.packets++; @@ -499,6 +535,7 @@ mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget) mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); } + CURVNET_RESTORE(); mlx5_cqwq_update_db_record(&rq->cq.wq);