Blob Blame History Raw
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Thu, 16 Apr 2020 11:29:49 +0300
Subject: net/mlx5e: Unify reserving space for WQEs
Patch-mainline: v5.8-rc1
Git-commit: ec9cdca0663a543ede2072ff091beec1787e3374
References: jsc#SLE-15172

In our fast-path design, a WQE (Work Queue Element) must not cross the
page boundary. To enforce that, for WQEs consisting of more than one BB
(Basic Block), the driver checks the available contiguous space in the
WQ in advance, and if it's not enough, it pads it with NOPs.

This patch modifies the code that calculates the position of next WQE,
considering the padding, and prepares the WQE. This code is common for
all SQ types. In this patch it's reorganized in a way that makes the
usage pattern unified for all SQ types, and makes the implementations
self-contained and look almost the same, preparing the repeating code to
further attempts to deduplicate it.

One place is left as is: mlx5e_sq_xmit and mlx5e_fill_sq_frag_edge call
inside, because it is special in a way that it may also copy WQE's cseg
and eseg when reserving space. This will be eliminated in one of the
following patches, and this place will be converted to the new approach,
too.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h          |   56 +++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c           |   31 ++++++-
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h           |   17 ---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c |   16 ---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c            |   26 ------
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c            |   11 --
 6 files changed, 88 insertions(+), 69 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -81,6 +81,62 @@ mlx5e_post_nop_fence(struct mlx5_wq_cyc
 	return wqe;
 }
 
+static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
+{
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	u16 pi, contig_wqebbs;
+
+	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs < size)) {
+		struct mlx5e_tx_wqe_info *wi, *edge_wi;
+
+		wi = &sq->db.wqe_info[pi];
+		edge_wi = wi + contig_wqebbs;
+
+		/* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+		for (; wi < edge_wi; wi++) {
+			*wi = (struct mlx5e_tx_wqe_info) {
+				.num_wqebbs = 1,
+			};
+			mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+		}
+		sq->stats->nop += contig_wqebbs;
+
+		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	}
+
+	return pi;
+}
+
+static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size)
+{
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	u16 pi, contig_wqebbs;
+
+	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs < size)) {
+		struct mlx5e_icosq_wqe_info *wi, *edge_wi;
+
+		wi = &sq->db.wqe_info[pi];
+		edge_wi = wi + contig_wqebbs;
+
+		/* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+		for (; wi < edge_wi; wi++) {
+			*wi = (struct mlx5e_icosq_wqe_info) {
+				.opcode = MLX5_OPCODE_NOP,
+				.num_wqebbs = 1,
+			};
+			mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+		}
+
+		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	}
+
+	return pi;
+}
+
 static inline void
 mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
 			u16 pi, u16 nnops)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -178,21 +178,42 @@ xdp_abort:
 	}
 }
 
-static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
 {
-	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
-	struct mlx5e_xdpsq_stats *stats = sq->stats;
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	u16 pi, contig_wqebbs;
 
 	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 	contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs < size)) {
+		struct mlx5e_xdp_wqe_info *wi, *edge_wi;
+
+		wi = &sq->db.wqe_info[pi];
+		edge_wi = wi + contig_wqebbs;
+
+		/* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+		for (; wi < edge_wi; wi++) {
+			*wi = (struct mlx5e_xdp_wqe_info) {
+				.num_wqebbs = 1,
+				.num_pkts = 0,
+			};
+			mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+		}
+		sq->stats->nops += contig_wqebbs;
 
-	if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS)) {
-		mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs);
 		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 	}
 
+	return pi;
+}
+
+static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+{
+	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+	struct mlx5e_xdpsq_stats *stats = sq->stats;
+	u16 pi;
+
+	pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS);
 	session->wqe = MLX5E_TX_FETCH_WQE(sq, pi);
 
 	prefetchw(session->wqe->data);
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -138,23 +138,6 @@ mlx5e_xdp_no_room_for_inline_pkt(struct
 }
 
 static inline void
-mlx5e_fill_xdpsq_frag_edge(struct mlx5e_xdpsq *sq, struct mlx5_wq_cyc *wq,
-			   u16 pi, u16 nnops)
-{
-	struct mlx5e_xdp_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
-	edge_wi = wi + nnops;
-	/* fill sq frag edge with nops to avoid wqe wrapping two pages */
-	for (; wi < edge_wi; wi++) {
-		wi->num_wqebbs = 1;
-		wi->num_pkts   = 0;
-		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
-	}
-
-	sq->stats->nops += nnops;
-}
-
-static inline void
 mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
 			 struct mlx5e_xdp_xmit_data *xdptxd,
 			 struct mlx5e_xdpsq_stats *stats)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -165,14 +165,8 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx
 			      bool skip_static_post, bool fence_first_post)
 {
 	bool progress_fence = skip_static_post || !fence_first_post;
-	struct mlx5_wq_cyc *wq = &sq->wq;
-	u16 contig_wqebbs_room, pi;
 
-	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
-	if (unlikely(contig_wqebbs_room <
-		     MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS))
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+	mlx5e_txqsq_get_next_pi(sq, MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS);
 
 	if (!skip_static_post)
 		post_static_params(sq, priv_tx, fence_first_post);
@@ -346,10 +340,8 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_kt
 			 u32 seq)
 {
 	struct mlx5e_sq_stats *stats = sq->stats;
-	struct mlx5_wq_cyc *wq = &sq->wq;
 	enum mlx5e_ktls_sync_retval ret;
 	struct tx_sync_info info = {};
-	u16 contig_wqebbs_room, pi;
 	u8 num_wqebbs;
 	int i = 0;
 
@@ -380,11 +372,7 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_kt
 	}
 
 	num_wqebbs = mlx5e_ktls_dumps_num_wqebbs(sq, info.nr_frags, info.sync_len);
-	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
-
-	if (unlikely(contig_wqebbs_room < num_wqebbs))
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+	mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
 
 	for (; i < info.nr_frags; i++) {
 		unsigned int orig_fsz, frag_offset = 0, n = 0;
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -468,22 +468,6 @@ static void mlx5e_post_rx_mpwqe(struct m
 	mlx5_wq_ll_update_db_record(wq);
 }
 
-static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
-					      struct mlx5_wq_cyc *wq,
-					      u16 pi, u16 nnops)
-{
-	struct mlx5e_icosq_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
-	edge_wi = wi + nnops;
-
-	/* fill sq frag edge with nops to avoid wqe wrapping two pages */
-	for (; wi < edge_wi; wi++) {
-		wi->opcode = MLX5_OPCODE_NOP;
-		wi->num_wqebbs = 1;
-		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
-	}
-}
-
 static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 {
 	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
@@ -492,7 +476,7 @@ static int mlx5e_alloc_rx_mpwqe(struct m
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5e_umr_wqe *umr_wqe;
 	u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
-	u16 pi, contig_wqebbs_room;
+	u16 pi;
 	int err;
 	int i;
 
@@ -502,13 +486,7 @@ static int mlx5e_alloc_rx_mpwqe(struct m
 		goto err;
 	}
 
-	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
-	if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
-		mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
-		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	}
-
+	pi = mlx5e_icosq_get_next_pi(sq, MLX5E_UMR_WQEBBS);
 	umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
 	memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts));
 
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -575,7 +575,6 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_t
 			  struct mlx5_av *av, u32 dqpn, u32 dqkey,
 			  bool xmit_more)
 {
-	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5i_tx_wqe *wqe;
 
 	struct mlx5_wqe_datagram_seg *datagram;
@@ -585,9 +584,9 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_t
 	struct mlx5e_tx_wqe_info *wi;
 
 	struct mlx5e_sq_stats *stats = sq->stats;
-	u16 headlen, ihs, pi, contig_wqebbs_room;
 	u16 ds_cnt, ds_cnt_inl = 0;
 	u8 num_wqebbs, opcode;
+	u16 headlen, ihs, pi;
 	u32 num_bytes;
 	int num_dma;
 	__be16 mss;
@@ -623,13 +622,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_t
 	}
 
 	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
-	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
-		pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
-	}
-
+	pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
 	wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
 
 	/* fill wqe */