Blob Blame History Raw
From: Paul Blakey <paulb@nvidia.com>
Date: Sun, 11 Jul 2021 16:56:54 +0300
Subject: net/mlx5: Allow skipping counter refresh on creation
Patch-mainline: v5.16-rc1
Git-commit: 504e15724893a839213fad5eedfbd511d9ba75cc
References: jsc#SLE-19253

CT creates a counter for each CT rule, and for each such counter,
fs_counters tries to queue mlx5_fc_stats_work() work again via
mod_delayed_work(0) call to refresh all counters. This call has a
large performance impact when reaching high insertion rate and
accounts for ~8% of the insertion time when using software steering.

Allow skipping the refresh of all counters during counter creation.
Change CT to use this refresh skipping for it's counters.

Signed-off-by: Paul Blakey <paulb@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c    |    2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c |   14 +++++++++++---
 include/linux/mlx5/fs.h                               |    4 ++++
 3 files changed, 16 insertions(+), 4 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -889,7 +889,7 @@ mlx5_tc_ct_counter_create(struct mlx5_tc
 		return ERR_PTR(-ENOMEM);
 
 	counter->is_shared = false;
-	counter->counter = mlx5_fc_create(ct_priv->dev, true);
+	counter->counter = mlx5_fc_create_ex(ct_priv->dev, true);
 	if (IS_ERR(counter->counter)) {
 		ct_dbg("Failed to create counter for ct entry");
 		ret = PTR_ERR(counter->counter);
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -301,7 +301,7 @@ static struct mlx5_fc *mlx5_fc_acquire(s
 	return mlx5_fc_single_alloc(dev);
 }
 
-struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging)
 {
 	struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging);
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
@@ -332,8 +332,6 @@ struct mlx5_fc *mlx5_fc_create(struct ml
 			goto err_out_alloc;
 
 		llist_add(&counter->addlist, &fc_stats->addlist);
-
-		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
 	}
 
 	return counter;
@@ -342,6 +340,16 @@ err_out_alloc:
 	mlx5_fc_release(dev, counter);
 	return ERR_PTR(err);
 }
+
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+{
+	struct mlx5_fc *counter = mlx5_fc_create_ex(dev, aging);
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+	if (aging)
+		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+	return counter;
+}
 EXPORT_SYMBOL(mlx5_fc_create);
 
 u32 mlx5_fc_id(struct mlx5_fc *counter)
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -245,6 +245,10 @@ int mlx5_modify_rule_destination(struct
 				 struct mlx5_flow_destination *old_dest);
 
 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
+
+/* As mlx5_fc_create() but doesn't queue stats refresh thread. */
+struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging);
+
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
 u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,