Blob Blame History Raw
From: Tariq Toukan <tariqt@nvidia.com>
Date: Wed, 27 Jul 2022 12:43:45 +0300
Subject: net/mlx5e: kTLS, Recycle objects of device-offloaded TLS TX
 connections
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Patch-mainline: v6.0-rc1
Git-commit: c4dfe704f53fb530e2a198c8d68e489cb68ebd70
References: jsc#PED-1549

The transport interface send (TIS) object is responsible for performing
all transport related operations of the transmit side.  The ConnectX HW
uses a TIS object to save and access the TLS crypto information and state
of an offloaded TX kTLS connection.

Before this patch, we used to create a new TIS per connection and destroy
it once it’s closed. Every create and destroy of a TIS is a FW command.

Same applies for the private TLS context, where we used to dynamically
allocate and free it per connection.

Resources recycling reduce the impact of the allocation/free operations
and helps speeding up the connection rate.

In this feature we maintain a pool of TX objects and use it to recycle
the resources instead of re-creating them per connection.

A cached TIS popped from the pool is updated to serve the new connection
via the fast-path HW interface, updating the tls static and progress
params. This is a very fast operation, significantly faster than FW
commands.

On recycling, a WQE fence is required after the context params change.
This guarantees that the data is sent after the context has been
successfully updated in hardware, and that the context modification
doesn't interfere with existing traffic.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h   |   10 
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h       |   14 
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c |    2 
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c    |  211 +++++++---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c             |    9 
 5 files changed, 199 insertions(+), 47 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -194,4 +194,14 @@ static inline void mlx5e_accel_cleanup_r
 {
 	mlx5e_ktls_cleanup_rx(priv);
 }
+
+static inline int mlx5e_accel_init_tx(struct mlx5e_priv *priv)
+{
+	return mlx5e_ktls_init_tx(priv);
+}
+
+static inline void mlx5e_accel_cleanup_tx(struct mlx5e_priv *priv)
+{
+	mlx5e_ktls_cleanup_tx(priv);
+}
 #endif /* __MLX5E_EN_ACCEL_H__ */
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -42,6 +42,8 @@ static inline bool mlx5e_ktls_type_check
 }
 
 void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
+int mlx5e_ktls_init_tx(struct mlx5e_priv *priv);
+void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv);
 int mlx5e_ktls_init_rx(struct mlx5e_priv *priv);
 void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv);
 int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable);
@@ -62,6 +64,8 @@ static inline bool mlx5e_is_ktls_rx(stru
 struct mlx5e_tls_sw_stats {
 	atomic64_t tx_tls_ctx;
 	atomic64_t tx_tls_del;
+	atomic64_t tx_tls_pool_alloc;
+	atomic64_t tx_tls_pool_free;
 	atomic64_t rx_tls_ctx;
 	atomic64_t rx_tls_del;
 };
@@ -69,6 +73,7 @@ struct mlx5e_tls_sw_stats {
 struct mlx5e_tls {
 	struct mlx5e_tls_sw_stats sw_stats;
 	struct workqueue_struct *rx_wq;
+	struct mlx5e_tls_tx_pool *tx_pool;
 };
 
 int mlx5e_ktls_init(struct mlx5e_priv *priv);
@@ -83,6 +88,15 @@ static inline void mlx5e_ktls_build_netd
 {
 }
 
+static inline int mlx5e_ktls_init_tx(struct mlx5e_priv *priv)
+{
+	return 0;
+}
+
+static inline void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv)
+{
+}
+
 static inline int mlx5e_ktls_init_rx(struct mlx5e_priv *priv)
 {
 	return 0;
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c
@@ -41,6 +41,8 @@
 static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_del) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_alloc) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_free) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_ctx) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_del) },
 };
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -35,6 +35,7 @@ u16 mlx5e_ktls_get_stop_room(struct mlx5
 	stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS);
 	stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS);
 	stop_room += num_dumps * mlx5e_stop_room_for_wqe(mdev, MLX5E_KTLS_DUMP_WQEBBS);
+	stop_room += 1; /* fence nop */
 
 	return stop_room;
 }
@@ -56,13 +57,17 @@ static int mlx5e_ktls_create_tis(struct
 }
 
 struct mlx5e_ktls_offload_context_tx {
-	struct tls_offload_context_tx *tx_ctx;
-	struct tls12_crypto_info_aes_gcm_128 crypto_info;
-	struct mlx5e_tls_sw_stats *sw_stats;
+	/* fast path */
 	u32 expected_seq;
 	u32 tisn;
-	u32 key_id;
 	bool ctx_post_pending;
+	/* control / resync */
+	struct list_head list_node; /* member of the pool */
+	struct tls12_crypto_info_aes_gcm_128 crypto_info;
+	struct tls_offload_context_tx *tx_ctx;
+	struct mlx5_core_dev *mdev;
+	struct mlx5e_tls_sw_stats *sw_stats;
+	u32 key_id;
 };
 
 static void
@@ -86,28 +91,136 @@ mlx5e_get_ktls_tx_priv_ctx(struct tls_co
 	return *ctx;
 }
 
+static struct mlx5e_ktls_offload_context_tx *
+mlx5e_tls_priv_tx_init(struct mlx5_core_dev *mdev, struct mlx5e_tls_sw_stats *sw_stats)
+{
+	struct mlx5e_ktls_offload_context_tx *priv_tx;
+	int err;
+
+	priv_tx = kzalloc(sizeof(*priv_tx), GFP_KERNEL);
+	if (!priv_tx)
+		return ERR_PTR(-ENOMEM);
+
+	priv_tx->mdev = mdev;
+	priv_tx->sw_stats = sw_stats;
+
+	err = mlx5e_ktls_create_tis(mdev, &priv_tx->tisn);
+	if (err) {
+		kfree(priv_tx);
+		return ERR_PTR(err);
+	}
+
+	return priv_tx;
+}
+
+static void mlx5e_tls_priv_tx_cleanup(struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+	mlx5e_destroy_tis(priv_tx->mdev, priv_tx->tisn);
+	kfree(priv_tx);
+}
+
+static void mlx5e_tls_priv_tx_list_cleanup(struct list_head *list)
+{
+	struct mlx5e_ktls_offload_context_tx *obj;
+
+	list_for_each_entry(obj, list, list_node)
+		mlx5e_tls_priv_tx_cleanup(obj);
+}
+
+/* Recycling pool API */
+
+struct mlx5e_tls_tx_pool {
+	struct mlx5_core_dev *mdev;
+	struct mlx5e_tls_sw_stats *sw_stats;
+	struct mutex lock; /* Protects access to the pool */
+	struct list_head list;
+#define MLX5E_TLS_TX_POOL_MAX_SIZE (256)
+	size_t size;
+};
+
+static struct mlx5e_tls_tx_pool *mlx5e_tls_tx_pool_init(struct mlx5_core_dev *mdev,
+							struct mlx5e_tls_sw_stats *sw_stats)
+{
+	struct mlx5e_tls_tx_pool *pool;
+
+	pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool)
+		return NULL;
+
+	INIT_LIST_HEAD(&pool->list);
+	mutex_init(&pool->lock);
+
+	pool->mdev = mdev;
+	pool->sw_stats = sw_stats;
+
+	return pool;
+}
+
+static void mlx5e_tls_tx_pool_cleanup(struct mlx5e_tls_tx_pool *pool)
+{
+	mlx5e_tls_priv_tx_list_cleanup(&pool->list);
+	atomic64_add(pool->size, &pool->sw_stats->tx_tls_pool_free);
+	kvfree(pool);
+}
+
+static void pool_push(struct mlx5e_tls_tx_pool *pool, struct mlx5e_ktls_offload_context_tx *obj)
+{
+	mutex_lock(&pool->lock);
+	if (pool->size >= MLX5E_TLS_TX_POOL_MAX_SIZE) {
+		mutex_unlock(&pool->lock);
+		mlx5e_tls_priv_tx_cleanup(obj);
+		atomic64_inc(&pool->sw_stats->tx_tls_pool_free);
+		return;
+	}
+	list_add(&obj->list_node, &pool->list);
+	pool->size++;
+	mutex_unlock(&pool->lock);
+}
+
+static struct mlx5e_ktls_offload_context_tx *pool_pop(struct mlx5e_tls_tx_pool *pool)
+{
+	struct mlx5e_ktls_offload_context_tx *obj;
+
+	mutex_lock(&pool->lock);
+	if (pool->size == 0) {
+		obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats);
+		if (!IS_ERR(obj))
+			atomic64_inc(&pool->sw_stats->tx_tls_pool_alloc);
+		goto out;
+	}
+
+	obj = list_first_entry(&pool->list, struct mlx5e_ktls_offload_context_tx,
+			       list_node);
+	list_del(&obj->list_node);
+	pool->size--;
+out:
+	mutex_unlock(&pool->lock);
+	return obj;
+}
+
+/* End of pool API */
+
 int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk,
 		      struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn)
 {
 	struct mlx5e_ktls_offload_context_tx *priv_tx;
+	struct mlx5e_tls_tx_pool *pool;
 	struct tls_context *tls_ctx;
-	struct mlx5_core_dev *mdev;
 	struct mlx5e_priv *priv;
 	int err;
 
 	tls_ctx = tls_get_ctx(sk);
 	priv = netdev_priv(netdev);
-	mdev = priv->mdev;
+	pool = priv->tls->tx_pool;
 
-	priv_tx = kzalloc(sizeof(*priv_tx), GFP_KERNEL);
-	if (!priv_tx)
-		return -ENOMEM;
+	priv_tx = pool_pop(pool);
+	if (IS_ERR(priv_tx))
+		return PTR_ERR(priv_tx);
 
-	err = mlx5_ktls_create_key(mdev, crypto_info, &priv_tx->key_id);
+	err = mlx5_ktls_create_key(pool->mdev, crypto_info, &priv_tx->key_id);
 	if (err)
 		goto err_create_key;
 
-	priv_tx->sw_stats = &priv->tls->sw_stats;
 	priv_tx->expected_seq = start_offload_tcp_sn;
 	priv_tx->crypto_info  =
 		*(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
@@ -115,36 +228,29 @@ int mlx5e_ktls_add_tx(struct net_device
 
 	mlx5e_set_ktls_tx_priv_ctx(tls_ctx, priv_tx);
 
-	err = mlx5e_ktls_create_tis(mdev, &priv_tx->tisn);
-	if (err)
-		goto err_create_tis;
-
 	priv_tx->ctx_post_pending = true;
 	atomic64_inc(&priv_tx->sw_stats->tx_tls_ctx);
 
 	return 0;
 
-err_create_tis:
-	mlx5_ktls_destroy_key(mdev, priv_tx->key_id);
 err_create_key:
-	kfree(priv_tx);
+	pool_push(pool, priv_tx);
 	return err;
 }
 
 void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx)
 {
 	struct mlx5e_ktls_offload_context_tx *priv_tx;
-	struct mlx5_core_dev *mdev;
+	struct mlx5e_tls_tx_pool *pool;
 	struct mlx5e_priv *priv;
 
 	priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
 	priv = netdev_priv(netdev);
-	mdev = priv->mdev;
+	pool = priv->tls->tx_pool;
 
 	atomic64_inc(&priv_tx->sw_stats->tx_tls_del);
-	mlx5e_destroy_tis(mdev, priv_tx->tisn);
-	mlx5_ktls_destroy_key(mdev, priv_tx->key_id);
-	kfree(priv_tx);
+	mlx5_ktls_destroy_key(priv_tx->mdev, priv_tx->key_id);
+	pool_push(pool, priv_tx);
 }
 
 static void tx_fill_wi(struct mlx5e_txqsq *sq,
@@ -205,6 +311,16 @@ post_progress_params(struct mlx5e_txqsq
 	sq->pc += num_wqebbs;
 }
 
+static void tx_post_fence_nop(struct mlx5e_txqsq *sq)
+{
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+	tx_fill_wi(sq, pi, 1, 0, NULL);
+
+	mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc);
+}
+
 static void
 mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
 			      struct mlx5e_ktls_offload_context_tx *priv_tx,
@@ -216,6 +332,7 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx
 		post_static_params(sq, priv_tx, fence_first_post);
 
 	post_progress_params(sq, priv_tx, progress_fence);
+	tx_post_fence_nop(sq);
 }
 
 struct tx_sync_info {
@@ -308,7 +425,7 @@ tx_post_resync_params(struct mlx5e_txqsq
 }
 
 static int
-tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool first)
+tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn)
 {
 	struct mlx5_wqe_ctrl_seg *cseg;
 	struct mlx5_wqe_data_seg *dseg;
@@ -330,7 +447,6 @@ tx_post_resync_dump(struct mlx5e_txqsq *
 	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8)  | MLX5_OPCODE_DUMP);
 	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | ds_cnt);
 	cseg->tis_tir_num      = cpu_to_be32(tisn << 8);
-	cseg->fm_ce_se         = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
 
 	fsz = skb_frag_size(frag);
 	dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
@@ -365,16 +481,6 @@ void mlx5e_ktls_tx_handle_resync_dump_co
 	stats->tls_dump_bytes += wi->num_bytes;
 }
 
-static void tx_post_fence_nop(struct mlx5e_txqsq *sq)
-{
-	struct mlx5_wq_cyc *wq = &sq->wq;
-	u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-
-	tx_fill_wi(sq, pi, 1, 0, NULL);
-
-	mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc);
-}
-
 static enum mlx5e_ktls_sync_retval
 mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
 			 struct mlx5e_txqsq *sq,
@@ -395,14 +501,6 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_kt
 
 	tx_post_resync_params(sq, priv_tx, info.rcd_sn);
 
-	/* If no dump WQE was sent, we need to have a fence NOP WQE before the
-	 * actual data xmit.
-	 */
-	if (!info.nr_frags) {
-		tx_post_fence_nop(sq);
-		return MLX5E_KTLS_SYNC_DONE;
-	}
-
 	for (i = 0; i < info.nr_frags; i++) {
 		unsigned int orig_fsz, frag_offset = 0, n = 0;
 		skb_frag_t *f = &info.frags[i];
@@ -410,13 +508,12 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_kt
 		orig_fsz = skb_frag_size(f);
 
 		do {
-			bool fence = !(i || frag_offset);
 			unsigned int fsz;
 
 			n++;
 			fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - frag_offset);
 			skb_frag_size_set(f, fsz);
-			if (tx_post_resync_dump(sq, f, priv_tx->tisn, fence)) {
+			if (tx_post_resync_dump(sq, f, priv_tx->tisn)) {
 				page_ref_add(skb_frag_page(f), n - 1);
 				goto err_out;
 			}
@@ -464,9 +561,8 @@ bool mlx5e_ktls_handle_tx_skb(struct net
 
 	priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
 
-	if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) {
+	if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx)))
 		mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false);
-	}
 
 	seq = ntohl(tcp_hdr(skb)->seq);
 	if (unlikely(priv_tx->expected_seq != seq)) {
@@ -504,3 +600,24 @@ err_out:
 	dev_kfree_skb_any(skb);
 	return false;
 }
+
+int mlx5e_ktls_init_tx(struct mlx5e_priv *priv)
+{
+	if (!mlx5e_is_ktls_tx(priv->mdev))
+		return 0;
+
+	priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats);
+	if (!priv->tls->tx_pool)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv)
+{
+	if (!mlx5e_is_ktls_tx(priv->mdev))
+		return;
+
+	mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool);
+	priv->tls->tx_pool = NULL;
+}
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3144,6 +3144,7 @@ static void mlx5e_cleanup_nic_tx(struct
 		mlx5e_mqprio_rl_free(priv->mqprio_rl);
 		priv->mqprio_rl = NULL;
 	}
+	mlx5e_accel_cleanup_tx(priv);
 	mlx5e_destroy_tises(priv);
 }
 
@@ -5146,9 +5147,17 @@ static int mlx5e_init_nic_tx(struct mlx5
 		return err;
 	}
 
+	err = mlx5e_accel_init_tx(priv);
+	if (err)
+		goto err_destroy_tises;
+
 	mlx5e_set_mqprio_rl(priv);
 	mlx5e_dcbnl_initialize(priv);
 	return 0;
+
+err_destroy_tises:
+	mlx5e_destroy_tises(priv);
+	return err;
 }
 
 static void mlx5e_nic_enable(struct mlx5e_priv *priv)