Blob Blame History Raw
From: Maxim Mikityanskiy <maximmi@nvidia.com>
Date: Fri, 30 Sep 2022 09:28:52 -0700
Subject: net/mlx5e: Use partial batches in legacy RQ
Patch-mainline: v6.1-rc1
Git-commit: 42847fed55523bebb712bfd7e2c4616db00c3aef
References: jsc#PED-1549

Legacy RQ allocates WQEs in batches. If the batch allocation fails, the
pages of the allocated part are released. This commit changes this
behavior to allow to use the pages that have been already allocated.

After this change, we need to be careful about indexing rq->wqe.frags[].
The WQ size is a power of two that divides by wqe_bulk (8), and the old
code used whole bulks, which allowed to use indices [8*K; 8*K+7] without
overflowing. Now that the bulks may be partial, the range can start at
any location (not only at 8*K), so we need to wrap them around to avoid
out-of-bounds array access.

Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c |   39 ++++++++++++------------
 1 file changed, 21 insertions(+), 18 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -427,7 +427,6 @@ static void mlx5e_dealloc_rx_wqe(struct
 static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
 {
 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
-	int err;
 	int i;
 
 	if (rq->xsk_pool) {
@@ -442,20 +441,16 @@ static int mlx5e_alloc_rx_wqes(struct ml
 	}
 
 	for (i = 0; i < wqe_bulk; i++) {
-		struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i);
-
-		err = mlx5e_alloc_rx_wqe(rq, wqe, ix + i);
-		if (unlikely(err))
-			goto free_wqes;
-	}
+		int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+		struct mlx5e_rx_wqe_cyc *wqe;
 
-	return 0;
+		wqe = mlx5_wq_cyc_get_wqe(wq, j);
 
-free_wqes:
-	while (--i >= 0)
-		mlx5e_dealloc_rx_wqe(rq, ix + i);
+		if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, j)))
+			break;
+	}
 
-	return err;
+	return i;
 }
 
 static inline void
@@ -821,8 +816,8 @@ static void mlx5e_dealloc_rx_mpwqe(struc
 INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 {
 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+	bool busy = false;
 	u8 wqe_bulk;
-	int err;
 
 	if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
 		return false;
@@ -837,14 +832,22 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_
 
 	do {
 		u16 head = mlx5_wq_cyc_get_head(wq);
+		int count;
+		u8 bulk;
 
-		err = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk);
-		if (unlikely(err)) {
+		/* Don't allow any newly allocated WQEs to share the same page
+		 * with old WQEs that aren't completed yet. Stop earlier.
+		 */
+		bulk = wqe_bulk - ((head + wqe_bulk) & rq->wqe.info.wqe_index_mask);
+
+		count = mlx5e_alloc_rx_wqes(rq, head, bulk);
+		if (likely(count > 0))
+			mlx5_wq_cyc_push_n(wq, count);
+		if (unlikely(count != bulk)) {
 			rq->stats->buff_alloc_err++;
+			busy = true;
 			break;
 		}
-
-		mlx5_wq_cyc_push_n(wq, wqe_bulk);
 	} while (mlx5_wq_cyc_missing(wq) >= wqe_bulk);
 
 	/* ensure wqes are visible to device before updating doorbell record */
@@ -852,7 +855,7 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_
 
 	mlx5_wq_cyc_update_db_record(wq);
 
-	return !!err;
+	return busy;
 }
 
 void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq)