Blob Blame History Raw
From: Avihai Horon <avihaih@nvidia.com>
Date: Thu, 28 Oct 2021 16:03:06 +0300
Subject: net/mlx5: Dynamically resize flow counters query buffer
Patch-mainline: v5.17-rc1
Git-commit: b247f32aecad09e6cf7edff7739e6f2c9dc5fca9
References: bsc#1195175

The flow counters bulk query buffer is allocated once during
mlx5_fc_init_stats(). For PFs and VFs this buffer usually takes a little
more than 512KB of memory, which is aligned to the next power of 2, to
1MB. For SFs, this buffer is reduced and takes around 128 Bytes.

The buffer size determines the maximum number of flow counters that
can be queried at a time. Thus, having a bigger buffer can improve
performance for users that need to query many flow counters.

There are cases that don't use many flow counters and don't need a big
buffer (e.g. SFs, VFs). Since this size is critical with large scale,
in these cases the buffer size should be reduced.

In order to reduce memory consumption while maintaining query
performance, change the query buffer's allocation scheme to the
following:
- First allocate the buffer with small initial size.
- If the number of counters surpasses the initial size, resize the
  buffer to the maximum size.

The buffer only grows and isn't shrank, because users with many flow
counters don't care about the buffer size and we don't want to add
resize overhead if the current number of counters drops.

This solution is preferable to the current one, which is less accurate
and only addresses SFs.

Signed-off-by: Avihai Horon <avihaih@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c |   74 ++++++++++++++----
 include/linux/mlx5/driver.h                           |    4 
 2 files changed, 64 insertions(+), 14 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -38,9 +38,10 @@
 #include "fs_cmd.h"
 
 #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
+#define MLX5_FC_BULK_QUERY_ALLOC_PERIOD msecs_to_jiffies(180 * 1000)
 /* Max number of counters to query in bulk read is 32K */
 #define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
-#define MLX5_SF_NUM_COUNTERS_BULK 8
+#define MLX5_INIT_COUNTERS_BULK 8
 #define MLX5_FC_POOL_MAX_THRESHOLD BIT(18)
 #define MLX5_FC_POOL_USED_BUFF_RATIO 10
 
@@ -145,13 +146,15 @@ static void mlx5_fc_stats_remove(struct
 	spin_unlock(&fc_stats->counters_idr_lock);
 }
 
-static int get_max_bulk_query_len(struct mlx5_core_dev *dev)
+static int get_init_bulk_query_len(struct mlx5_core_dev *dev)
 {
-	int num_counters_bulk = mlx5_core_is_sf(dev) ?
-					MLX5_SF_NUM_COUNTERS_BULK :
-					MLX5_SW_MAX_COUNTERS_BULK;
+	return min_t(int, MLX5_INIT_COUNTERS_BULK,
+		     (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+}
 
-	return min_t(int, num_counters_bulk,
+static int get_max_bulk_query_len(struct mlx5_core_dev *dev)
+{
+	return min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
 		     (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
 }
 
@@ -177,7 +180,7 @@ static void mlx5_fc_stats_query_counter_
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 	bool query_more_counters = (first->id <= last_id);
-	int max_bulk_len = get_max_bulk_query_len(dev);
+	int cur_bulk_len = fc_stats->bulk_query_len;
 	u32 *data = fc_stats->bulk_query_out;
 	struct mlx5_fc *counter = first;
 	u32 bulk_base_id;
@@ -189,7 +192,7 @@ static void mlx5_fc_stats_query_counter_
 		bulk_base_id = counter->id & ~0x3;
 
 		/* number of counters to query inc. the last counter */
-		bulk_len = min_t(int, max_bulk_len,
+		bulk_len = min_t(int, cur_bulk_len,
 				 ALIGN(last_id - bulk_base_id + 1, 4));
 
 		err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len,
@@ -230,6 +233,41 @@ static void mlx5_fc_release(struct mlx5_
 		mlx5_fc_free(dev, counter);
 }
 
+static void mlx5_fc_stats_bulk_query_size_increase(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	int max_bulk_len = get_max_bulk_query_len(dev);
+	unsigned long now = jiffies;
+	u32 *bulk_query_out_tmp;
+	int max_out_len;
+
+	if (fc_stats->bulk_query_alloc_failed &&
+	    time_before(now, fc_stats->next_bulk_query_alloc))
+		return;
+
+	max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
+	bulk_query_out_tmp = kzalloc(max_out_len, GFP_KERNEL);
+	if (!bulk_query_out_tmp) {
+		mlx5_core_warn_once(dev,
+				    "Can't increase flow counters bulk query buffer size, insufficient memory, bulk_size(%d)\n",
+				    max_bulk_len);
+		fc_stats->bulk_query_alloc_failed = true;
+		fc_stats->next_bulk_query_alloc =
+			now + MLX5_FC_BULK_QUERY_ALLOC_PERIOD;
+		return;
+	}
+
+	kfree(fc_stats->bulk_query_out);
+	fc_stats->bulk_query_out = bulk_query_out_tmp;
+	fc_stats->bulk_query_len = max_bulk_len;
+	if (fc_stats->bulk_query_alloc_failed) {
+		mlx5_core_info(dev,
+			       "Flow counters bulk query buffer size increased, bulk_size(%d)\n",
+			       max_bulk_len);
+		fc_stats->bulk_query_alloc_failed = false;
+	}
+}
+
 static void mlx5_fc_stats_work(struct work_struct *work)
 {
 	struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
@@ -247,15 +285,22 @@ static void mlx5_fc_stats_work(struct wo
 		queue_delayed_work(fc_stats->wq, &fc_stats->work,
 				   fc_stats->sampling_interval);
 
-	llist_for_each_entry(counter, addlist, addlist)
+	llist_for_each_entry(counter, addlist, addlist) {
 		mlx5_fc_stats_insert(dev, counter);
+		fc_stats->num_counters++;
+	}
 
 	llist_for_each_entry_safe(counter, tmp, dellist, dellist) {
 		mlx5_fc_stats_remove(dev, counter);
 
 		mlx5_fc_release(dev, counter);
+		fc_stats->num_counters--;
 	}
 
+	if (fc_stats->bulk_query_len < get_max_bulk_query_len(dev) &&
+	    fc_stats->num_counters > get_init_bulk_query_len(dev))
+		mlx5_fc_stats_bulk_query_size_increase(dev);
+
 	if (time_before(now, fc_stats->next_query) ||
 	    list_empty(&fc_stats->counters))
 		return;
@@ -378,8 +423,8 @@ EXPORT_SYMBOL(mlx5_fc_destroy);
 int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
-	int max_bulk_len;
-	int max_out_len;
+	int init_bulk_len;
+	int init_out_len;
 
 	spin_lock_init(&fc_stats->counters_idr_lock);
 	idr_init(&fc_stats->counters_idr);
@@ -387,11 +432,12 @@ int mlx5_init_fc_stats(struct mlx5_core_
 	init_llist_head(&fc_stats->addlist);
 	init_llist_head(&fc_stats->dellist);
 
-	max_bulk_len = get_max_bulk_query_len(dev);
-	max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
-	fc_stats->bulk_query_out = kzalloc(max_out_len, GFP_KERNEL);
+	init_bulk_len = get_init_bulk_query_len(dev);
+	init_out_len = mlx5_cmd_fc_get_bulk_query_out_len(init_bulk_len);
+	fc_stats->bulk_query_out = kzalloc(init_out_len, GFP_KERNEL);
 	if (!fc_stats->bulk_query_out)
 		return -ENOMEM;
+	fc_stats->bulk_query_len = init_bulk_len;
 
 	fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
 	if (!fc_stats->wq)
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -478,6 +478,10 @@ struct mlx5_fc_stats {
 	unsigned long next_query;
 	unsigned long sampling_interval; /* jiffies */
 	u32 *bulk_query_out;
+	int bulk_query_len;
+	size_t num_counters;
+	bool bulk_query_alloc_failed;
+	unsigned long next_bulk_query_alloc;
 	struct mlx5_fc_pool fc_pool;
 };