Blob Blame History Raw
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Mon, 30 Mar 2020 16:28:34 +0300
Subject: net/mlx5e: Calculate SQ stop room in a robust way
Patch-mainline: v5.8-rc1
Git-commit: 5ffb4d858b7051720f20bcbb92dce0b433e60d88
References: jsc#SLE-15172

Currently, different formulas are used to estimate the space that may be
taken by WQEs in the SQ during a single packet transmit. This space is
called stop room, and it's checked in the end of packet transmit to find
out if the next packet could overflow the SQ. If it could, the driver
tells the kernel to stop sending next packets.

Many factors affect the stop room:

1. Padding with NOPs to avoid WQEs spanning over page boundaries.

2. Enabled and disabled offloads (TLS, upcoming MPWQE).

3. The maximum size of a WQE.

The padding is performed before every WQE if it doesn't fit the current
page.

The current formula assumes that only one padding will be required per
packet, and it doesn't take into account that the WQEs posted during the
transmission of a single packet might exceed the page size in very rare
circumstances. For example, to hit this condition with 4096-byte pages,
TLS offload will have to interrupt an almost-full MPWQE session, be in
the resync flow and try to transmit a near to maximum amount of data.

To avoid SQ overflows in such rare cases after MPWQE is added, this
patch introduces a more robust formula to estimate the stop room. The
new formula uses the fact that a WQE of size X will not require more
than X-1 WQEBBs of padding. More exact estimations are possible, but
they result in much more complex and error-prone code for little gain.

Before this patch, the TLS stop room included space for both INNOVA and
ConnectX TLS offloads that couldn't run at the same time anyway, so this
patch accounts only for the active one.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h       |   40 ++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c        |    4 +
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h        |    2 
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c |   13 +++++
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h |   12 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c  |   14 +++++
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h  |    7 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c       |   28 ++++++++---
 8 files changed, 84 insertions(+), 36 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -6,25 +6,6 @@
 
 #include "en.h"
 
-#define MLX5E_SQ_NOPS_ROOM (MLX5_SEND_WQE_MAX_WQEBBS - 1)
-#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
-			    MLX5E_SQ_NOPS_ROOM)
-
-#ifndef CONFIG_MLX5_EN_TLS
-#define MLX5E_SQ_TLS_ROOM (0)
-#else
-/* TLS offload requires additional stop_room for:
- *  - a resync SKB.
- * kTLS offload requires fixed additional stop_room for:
- * - a static params WQE, and a progress params WQE.
- * The additional MTU-depending room for the resync DUMP WQEs
- * will be calculated and added in runtime.
- */
-#define MLX5E_SQ_TLS_ROOM  \
-	(MLX5_SEND_WQE_MAX_WQEBBS + \
-	 MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS)
-#endif
-
 #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
 
 enum mlx5e_icosq_wqe_type {
@@ -331,4 +312,25 @@ mlx5e_set_eseg_swp(struct sk_buff *skb,
 	}
 }
 
+static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size)
+{
+	BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS);
+
+	/* A WQE must not cross the page boundary, hence two conditions:
+	 * 1. Its size must not exceed the page size.
+	 * 2. If the WQE size is X, and the space remaining in a page is less
+	 *    than X, this space needs to be padded with NOPs. So, one WQE of
+	 *    size X may require up to X-1 WQEBBs of padding, which makes the
+	 *    stop room of X-1 + X.
+	 * WQE size is also limited by the hardware limit.
+	 */
+
+	if (__builtin_constant_p(wqe_size))
+		BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
+	else
+		WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
+
+	return wqe_size * 2 - 1;
+}
+
 #endif
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -257,8 +257,10 @@ enum {
 static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
 {
 	if (unlikely(!sq->mpwqe.wqe)) {
+		const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+
 		if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
-						     MLX5E_XDPSQ_STOP_ROOM))) {
+						     stop_room))) {
 			/* SQ is full, ring doorbell */
 			mlx5e_xmit_xdp_doorbell(sq);
 			sq->stats->full++;
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -40,8 +40,6 @@
 	(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
 #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
 
-#define MLX5E_XDPSQ_STOP_ROOM (MLX5E_SQ_STOP_ROOM)
-
 #define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg))
 #define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \
 	DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -4,6 +4,19 @@
 #include "en.h"
 #include "en_accel/ktls.h"
 
+u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq)
+{
+	u16 num_dumps, stop_room = 0;
+
+	num_dumps = mlx5e_ktls_dumps_num_wqes(sq, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE);
+
+	stop_room += mlx5e_stop_room_for_wqe(MLX5E_KTLS_STATIC_WQEBBS);
+	stop_room += mlx5e_stop_room_for_wqe(MLX5E_KTLS_PROGRESS_WQEBBS);
+	stop_room += num_dumps * mlx5e_stop_room_for_wqe(MLX5E_KTLS_DUMP_WQEBBS);
+
+	return stop_room;
+}
+
 static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn)
 {
 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -102,15 +102,16 @@ bool mlx5e_ktls_handle_tx_skb(struct tls
 void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
 					   struct mlx5e_tx_wqe_info *wi,
 					   u32 *dma_fifo_cc);
+u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq);
+
 static inline u8
-mlx5e_ktls_dumps_num_wqebbs(struct mlx5e_txqsq *sq, unsigned int nfrags,
-			    unsigned int sync_len)
+mlx5e_ktls_dumps_num_wqes(struct mlx5e_txqsq *sq, unsigned int nfrags,
+			  unsigned int sync_len)
 {
 	/* Given the MTU and sync_len, calculates an upper bound for the
-	 * number of WQEBBs needed for the TX resync DUMP WQEs of a record.
+	 * number of DUMP WQEs needed for the TX resync of a record.
 	 */
-	return MLX5E_KTLS_DUMP_WQEBBS *
-		(nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu));
+	return nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu);
 }
 #else
 
@@ -122,7 +123,6 @@ static inline void
 mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
 				      struct mlx5e_tx_wqe_info *wi,
 				      u32 *dma_fifo_cc) {}
-
 #endif
 
 #endif /* __MLX5E_TLS_H__ */
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
@@ -240,3 +240,17 @@ void mlx5e_tls_cleanup(struct mlx5e_priv
 	kfree(tls);
 	priv->tls = NULL;
 }
+
+u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq)
+{
+	struct mlx5_core_dev *mdev = sq->channel->mdev;
+
+	if (!mlx5_accel_is_tls_device(mdev))
+		return 0;
+
+	if (MLX5_CAP_GEN(mdev, tls_tx))
+		return mlx5e_ktls_get_stop_room(sq);
+
+	/* Resync SKB. */
+	return mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+}
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
@@ -94,6 +94,8 @@ int mlx5e_tls_get_count(struct mlx5e_pri
 int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
 int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
 
+u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq);
+
 #else
 
 static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
@@ -108,6 +110,11 @@ static inline int mlx5e_tls_get_count(st
 static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; }
 static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; }
 
+static inline u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq)
+{
+	return 0;
+}
+
 #endif
 
 #endif /* __MLX5E_TLS_H__ */
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1122,6 +1122,22 @@ static int mlx5e_alloc_txqsq_db(struct m
 	return 0;
 }
 
+static int mlx5e_calc_sq_stop_room(struct mlx5e_txqsq *sq, u8 log_sq_size)
+{
+	int sq_size = 1 << log_sq_size;
+
+	sq->stop_room  = mlx5e_tls_get_stop_room(sq);
+	sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+
+	if (WARN_ON(sq->stop_room >= sq_size)) {
+		netdev_err(sq->channel->netdev, "Stop room %hu is bigger than the SQ size %d\n",
+			   sq->stop_room, sq_size);
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
 static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
 static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 			     int txq_ix,
@@ -1146,20 +1162,16 @@ static int mlx5e_alloc_txqsq(struct mlx5
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 	sq->stats     = &c->priv->channel_stats[c->ix].sq[tc];
-	sq->stop_room = MLX5E_SQ_STOP_ROOM;
 	INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
 	if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
 		set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
 	if (MLX5_IPSEC_DEV(c->priv->mdev))
 		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
-#ifdef CONFIG_MLX5_EN_TLS
-	if (mlx5_accel_is_tls_device(c->priv->mdev)) {
+	if (mlx5_accel_is_tls_device(c->priv->mdev))
 		set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
-		sq->stop_room += MLX5E_SQ_TLS_ROOM +
-			mlx5e_ktls_dumps_num_wqebbs(sq, MAX_SKB_FRAGS,
-						    TLS_MAX_PAYLOAD_SIZE);
-	}
-#endif
+	err = mlx5e_calc_sq_stop_room(sq, params->log_sq_size);
+	if (err)
+		return err;
 
 	param->wq.db_numa_node = cpu_to_node(c->cpu);
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);