From: Tariq Toukan <tariqt@nvidia.com>
Date: Wed, 19 Jan 2022 21:28:36 +0200
Subject: net/mlx5e: RX, Test the XDP program existence out of the handler
Patch-mainline: v5.18-rc1
Git-commit: e26eceb90b01ed941a6d13419a890930adcac494
References: jsc#PED-1549
Instead of early return inside mlx5e_xdp_handle(), let the caller check
if an XDP program is loaded. This allows saving a few unnecessary
function calls and calculations in case !prog.
Performance test: single core, drop packets in iptables
Before: 3,872,504 pps
After: 3,975,628 pps (+2.66%)
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 5 --
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h | 1
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 9 ++-
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 49 ++++++++++++--------
4 files changed, 39 insertions(+), 25 deletions(-)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -120,15 +120,12 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *
/* returns true if packet was consumed by xdp */
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+ struct bpf_prog *prog,
u32 *len, struct xdp_buff *xdp)
{
- struct bpf_prog *prog = rcu_dereference(rq->xdp_prog);
u32 act;
int err;
- if (!prog)
- return false;
-
act = bpf_prog_run_xdp(prog, xdp);
switch (act) {
case XDP_PASS:
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -48,6 +48,7 @@
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+ struct bpf_prog *prog,
u32 *len, struct xdp_buff *xdp);
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -4,6 +4,7 @@
#include "rx.h"
#include "en/xdp.h"
#include <net/xdp_sock_drv.h>
+#include <linux/filter.h>
/* RX data path */
@@ -31,6 +32,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_m
{
struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk;
u32 cqe_bcnt32 = cqe_bcnt;
+ struct bpf_prog *prog;
/* Check packet size. Note LRO doesn't use linear SKB */
if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@@ -65,7 +67,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_m
* allocated first from the Reuse Ring, so it has enough space.
*/
- if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp))) {
+ prog = rcu_dereference(rq->xdp_prog);
+ if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, &cqe_bcnt32, xdp))) {
if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
return NULL; /* page/packet was consumed by XDP */
@@ -83,6 +86,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_l
u32 cqe_bcnt)
{
struct xdp_buff *xdp = wi->di->xsk;
+ struct bpf_prog *prog;
/* wi->offset is not used in this function, because xdp->data and the
* DMA address point directly to the necessary place. Furthermore, the
@@ -101,7 +105,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_l
return NULL;
}
- if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp)))
+ prog = rcu_dereference(rq->xdp_prog);
+ if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, &cqe_bcnt, xdp)))
return NULL; /* page/packet was consumed by XDP */
/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -34,6 +34,7 @@
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/bitmap.h>
+#include <linux/filter.h>
#include <net/ip6_checksum.h>
#include <net/page_pool.h>
#include <net/inet_ecn.h>
@@ -1523,11 +1524,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_r
{
struct mlx5e_dma_info *di = wi->di;
u16 rx_headroom = rq->buff.headroom;
- struct xdp_buff xdp;
+ struct bpf_prog *prog;
struct sk_buff *skb;
+ u32 metasize = 0;
void *va, *data;
u32 frag_size;
- u32 metasize;
va = page_address(di->page) + wi->offset;
data = va + rx_headroom;
@@ -1535,16 +1536,21 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_r
dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
frag_size, DMA_FROM_DEVICE);
- net_prefetchw(va); /* xdp_frame data area */
net_prefetch(data);
- mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
- if (mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp))
- return NULL; /* page/packet was consumed by XDP */
+ prog = rcu_dereference(rq->xdp_prog);
+ if (prog) {
+ struct xdp_buff xdp;
+
+ net_prefetchw(va); /* xdp_frame data area */
+ mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
+ if (mlx5e_xdp_handle(rq, di, prog, &cqe_bcnt, &xdp))
+ return NULL; /* page/packet was consumed by XDP */
- rx_headroom = xdp.data - xdp.data_hard_start;
+ rx_headroom = xdp.data - xdp.data_hard_start;
+ metasize = xdp.data - xdp.data_meta;
+ }
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
- metasize = xdp.data - xdp.data_meta;
skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
if (unlikely(!skb))
return NULL;
@@ -1842,11 +1848,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct m
struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
u16 rx_headroom = rq->buff.headroom;
u32 cqe_bcnt32 = cqe_bcnt;
- struct xdp_buff xdp;
+ struct bpf_prog *prog;
struct sk_buff *skb;
+ u32 metasize = 0;
void *va, *data;
u32 frag_size;
- u32 metasize;
/* Check packet size. Note LRO doesn't use linear SKB */
if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@@ -1860,19 +1866,24 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct m
dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
frag_size, DMA_FROM_DEVICE);
- net_prefetchw(va); /* xdp_frame data area */
net_prefetch(data);
- mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
- if (mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp)) {
- if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
- __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
- return NULL; /* page/packet was consumed by XDP */
- }
+ prog = rcu_dereference(rq->xdp_prog);
+ if (prog) {
+ struct xdp_buff xdp;
+
+ net_prefetchw(va); /* xdp_frame data area */
+ mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
+ if (mlx5e_xdp_handle(rq, di, prog, &cqe_bcnt32, &xdp)) {
+ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
+ __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+ return NULL; /* page/packet was consumed by XDP */
+ }
- rx_headroom = xdp.data - xdp.data_hard_start;
+ rx_headroom = xdp.data - xdp.data_hard_start;
+ metasize = xdp.data - xdp.data_meta;
+ }
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
- metasize = xdp.data - xdp.data_meta;
skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32, metasize);
if (unlikely(!skb))
return NULL;