From: Maxim Mikityanskiy <maximmi@nvidia.com>
Date: Fri, 30 Sep 2022 09:28:54 -0700
Subject: net/mlx5e: Remove the outer loop when allocating legacy RQ WQEs
Patch-mainline: v6.1-rc1
Git-commit: 0b482232374528b62ef978241f8efc548dce7edb
References: jsc#PED-1549
Legacy RQ WQEs are allocated in a loop in small batches (8 WQEs). As
partial batches are allowed, there is no point to have a loop in a loop,
so the outer loop is removed, and the batch size is increased up to the
total number of WQEs to allocate, still not smaller than 8.
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 41 ++++++++++--------------
drivers/net/ethernet/mellanox/mlx5/core/wq.h | 2 -
2 files changed, 19 insertions(+), 24 deletions(-)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -424,7 +424,7 @@ static void mlx5e_dealloc_rx_wqe(struct
mlx5e_free_rx_wqe(rq, wi, false);
}
-static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
+static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
int i;
@@ -805,38 +805,33 @@ static void mlx5e_dealloc_rx_mpwqe(struc
INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ int wqe_bulk, count;
bool busy = false;
- u8 wqe_bulk;
+ u16 head;
if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
return false;
- wqe_bulk = rq->wqe.info.wqe_bulk;
-
- if (mlx5_wq_cyc_missing(wq) < wqe_bulk)
+ if (mlx5_wq_cyc_missing(wq) < rq->wqe.info.wqe_bulk)
return false;
if (rq->page_pool)
page_pool_nid_changed(rq->page_pool, numa_mem_id());
- do {
- u16 head = mlx5_wq_cyc_get_head(wq);
- int count;
- u8 bulk;
-
- /* Don't allow any newly allocated WQEs to share the same page
- * with old WQEs that aren't completed yet. Stop earlier.
- */
- bulk = wqe_bulk - ((head + wqe_bulk) & rq->wqe.info.wqe_index_mask);
-
- count = mlx5e_alloc_rx_wqes(rq, head, bulk);
- mlx5_wq_cyc_push_n(wq, count);
- if (unlikely(count != bulk)) {
- rq->stats->buff_alloc_err++;
- busy = true;
- break;
- }
- } while (mlx5_wq_cyc_missing(wq) >= wqe_bulk);
+ wqe_bulk = mlx5_wq_cyc_missing(wq);
+ head = mlx5_wq_cyc_get_head(wq);
+
+ /* Don't allow any newly allocated WQEs to share the same page with old
+ * WQEs that aren't completed yet. Stop earlier.
+ */
+ wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask;
+
+ count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk);
+ mlx5_wq_cyc_push_n(wq, count);
+ if (unlikely(count != wqe_bulk)) {
+ rq->stats->buff_alloc_err++;
+ busy = true;
+ }
/* ensure wqes are visible to device before updating doorbell record */
dma_wmb();
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -123,7 +123,7 @@ static inline void mlx5_wq_cyc_push(stru
wq->cur_sz++;
}
-static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u8 n)
+static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u16 n)
{
wq->wqe_ctr += n;
wq->cur_sz += n;