Blob Blame History Raw
From: Tariq Toukan <tariqt@nvidia.com>
Date: Wed, 19 Jan 2022 21:28:36 +0200
Subject: net/mlx5e: RX, Test the XDP program existence out of the handler
Patch-mainline: v5.18-rc1
Git-commit: e26eceb90b01ed941a6d13419a890930adcac494
References: jsc#PED-1549

Instead of early return inside mlx5e_xdp_handle(), let the caller check
if an XDP program is loaded.  This allows saving a few unnecessary
function calls and calculations in case !prog.

Performance test: single core, drop packets in iptables
Before: 3,872,504 pps
After:  3,975,628 pps (+2.66%)

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c    |    5 --
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h    |    1 
 drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c |    9 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c     |   49 ++++++++++++--------
 4 files changed, 39 insertions(+), 25 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -120,15 +120,12 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *
 
 /* returns true if packet was consumed by xdp */
 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+		      struct bpf_prog *prog,
 		      u32 *len, struct xdp_buff *xdp)
 {
-	struct bpf_prog *prog = rcu_dereference(rq->xdp_prog);
 	u32 act;
 	int err;
 
-	if (!prog)
-		return false;
-
 	act = bpf_prog_run_xdp(prog, xdp);
 	switch (act) {
 	case XDP_PASS:
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -48,6 +48,7 @@
 struct mlx5e_xsk_param;
 int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+		      struct bpf_prog *prog,
 		      u32 *len, struct xdp_buff *xdp);
 void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -4,6 +4,7 @@
 #include "rx.h"
 #include "en/xdp.h"
 #include <net/xdp_sock_drv.h>
+#include <linux/filter.h>
 
 /* RX data path */
 
@@ -31,6 +32,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_m
 {
 	struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk;
 	u32 cqe_bcnt32 = cqe_bcnt;
+	struct bpf_prog *prog;
 
 	/* Check packet size. Note LRO doesn't use linear SKB */
 	if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@@ -65,7 +67,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_m
 	 * allocated first from the Reuse Ring, so it has enough space.
 	 */
 
-	if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp))) {
+	prog = rcu_dereference(rq->xdp_prog);
+	if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, &cqe_bcnt32, xdp))) {
 		if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
 			__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
 		return NULL; /* page/packet was consumed by XDP */
@@ -83,6 +86,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_l
 					      u32 cqe_bcnt)
 {
 	struct xdp_buff *xdp = wi->di->xsk;
+	struct bpf_prog *prog;
 
 	/* wi->offset is not used in this function, because xdp->data and the
 	 * DMA address point directly to the necessary place. Furthermore, the
@@ -101,7 +105,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_l
 		return NULL;
 	}
 
-	if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp)))
+	prog = rcu_dereference(rq->xdp_prog);
+	if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, &cqe_bcnt, xdp)))
 		return NULL; /* page/packet was consumed by XDP */
 
 	/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -34,6 +34,7 @@
 #include <linux/ipv6.h>
 #include <linux/tcp.h>
 #include <linux/bitmap.h>
+#include <linux/filter.h>
 #include <net/ip6_checksum.h>
 #include <net/page_pool.h>
 #include <net/inet_ecn.h>
@@ -1523,11 +1524,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_r
 {
 	struct mlx5e_dma_info *di = wi->di;
 	u16 rx_headroom = rq->buff.headroom;
-	struct xdp_buff xdp;
+	struct bpf_prog *prog;
 	struct sk_buff *skb;
+	u32 metasize = 0;
 	void *va, *data;
 	u32 frag_size;
-	u32 metasize;
 
 	va             = page_address(di->page) + wi->offset;
 	data           = va + rx_headroom;
@@ -1535,16 +1536,21 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_r
 
 	dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
 				      frag_size, DMA_FROM_DEVICE);
-	net_prefetchw(va); /* xdp_frame data area */
 	net_prefetch(data);
 
-	mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
-	if (mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp))
-		return NULL; /* page/packet was consumed by XDP */
+	prog = rcu_dereference(rq->xdp_prog);
+	if (prog) {
+		struct xdp_buff xdp;
+
+		net_prefetchw(va); /* xdp_frame data area */
+		mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
+		if (mlx5e_xdp_handle(rq, di, prog, &cqe_bcnt, &xdp))
+			return NULL; /* page/packet was consumed by XDP */
 
-	rx_headroom = xdp.data - xdp.data_hard_start;
+		rx_headroom = xdp.data - xdp.data_hard_start;
+		metasize = xdp.data - xdp.data_meta;
+	}
 	frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
-	metasize = xdp.data - xdp.data_meta;
 	skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
 	if (unlikely(!skb))
 		return NULL;
@@ -1842,11 +1848,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct m
 	struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
 	u16 rx_headroom = rq->buff.headroom;
 	u32 cqe_bcnt32 = cqe_bcnt;
-	struct xdp_buff xdp;
+	struct bpf_prog *prog;
 	struct sk_buff *skb;
+	u32 metasize = 0;
 	void *va, *data;
 	u32 frag_size;
-	u32 metasize;
 
 	/* Check packet size. Note LRO doesn't use linear SKB */
 	if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@@ -1860,19 +1866,24 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct m
 
 	dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
 				      frag_size, DMA_FROM_DEVICE);
-	net_prefetchw(va); /* xdp_frame data area */
 	net_prefetch(data);
 
-	mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
-	if (mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp)) {
-		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
-			__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
-		return NULL; /* page/packet was consumed by XDP */
-	}
+	prog = rcu_dereference(rq->xdp_prog);
+	if (prog) {
+		struct xdp_buff xdp;
+
+		net_prefetchw(va); /* xdp_frame data area */
+		mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
+		if (mlx5e_xdp_handle(rq, di, prog, &cqe_bcnt32, &xdp)) {
+			if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
+				__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+			return NULL; /* page/packet was consumed by XDP */
+		}
 
-	rx_headroom = xdp.data - xdp.data_hard_start;
+		rx_headroom = xdp.data - xdp.data_hard_start;
+		metasize = xdp.data - xdp.data_meta;
+	}
 	frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
-	metasize = xdp.data - xdp.data_meta;
 	skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32, metasize);
 	if (unlikely(!skb))
 		return NULL;