Blob Blame History Raw
From: Michael Guralnik <michaelgur@nvidia.com>
Date: Thu, 19 May 2022 12:22:55 +0300
Subject: net/mlx5: Expose vnic diagnostic counters for eswitch managed vports
Patch-mainline: v6.0-rc1
Git-commit: 606e6a72e29dff9e3341c4cc9b554420e4793f40
References: jsc#PED-1549

Expose on vport group managers debug counters for their managed vports.

Counters are exposed through debugfs, the directory will be present only
for functions that are eswitch managers and only counters that are
supported on their specific HW/FW will be exposed.

Example:
$ ls /sys/kernel/debug/mlx5/0000:08:00.0/esw/
pf sf_8  vf_0  vf_1

$ ls -l /sys/kernel/debug/mlx5/0000:08:00.0/esw/vf_0/vnic_diag/
cq_overrun
quota_exceeded_command
total_q_under_processor_handle
invalid_command
send_queue_priority_update_flow

List of all counter added:
total_q_under_processor_handle - number of queues in error state due to an
async error or errored command.
send_queue_priority_update_flow - number of QP/SQ priority/SL update
events.
cq_overrun - number of times CQ entered an error state due to an
overflow.
async_eq_overrun -number of time an EQ mapped to async events was
overrun.
comp_eq_overrun - number of time an EQ mapped to completion events was
overrun.
quota_exceeded_command - number of commands issued and failed due to quota
exceeded.
invalid_command - number of commands issued and failed dues to any reason
other than quota exceeded.

Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile           |    2 
 drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c      |  182 +++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c          |    6 
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h          |    5 
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c |    3 
 5 files changed, 197 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c

--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -68,7 +68,7 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE)   +=
 #
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
 				      ecpf.o rdma.o esw/legacy.o \
-				      esw/devlink_port.o esw/vporttbl.o esw/qos.o
+				      esw/debugfs.o esw/devlink_port.o esw/vporttbl.o esw/qos.o
 
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += esw/acl/helper.o \
 				      esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/debugfs.h>
+#include "eswitch.h"
+
+enum vnic_diag_counter {
+	MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE,
+	MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW,
+	MLX5_VNIC_DIAG_COMP_EQ_OVERRUN,
+	MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN,
+	MLX5_VNIC_DIAG_CQ_OVERRUN,
+	MLX5_VNIC_DIAG_INVALID_COMMAND,
+	MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND,
+};
+
+static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter,
+				    u32 *val)
+{
+	u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
+	struct mlx5_core_dev *dev = vport->dev;
+	u16 vport_num = vport->vport;
+	void *vnic_diag_out;
+	int err;
+
+	MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
+	MLX5_SET(query_vnic_env_in, in, vport_number, vport_num);
+	if (!mlx5_esw_is_manager_vport(dev->priv.eswitch, vport_num))
+		MLX5_SET(query_vnic_env_in, in, other_vport, 1);
+
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	vnic_diag_out = MLX5_ADDR_OF(query_vnic_env_out, out, vport_env);
+	switch (counter) {
+	case MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE:
+		*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, total_error_queues);
+		break;
+	case MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW:
+		*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out,
+				send_queue_priority_update_flow);
+		break;
+	case MLX5_VNIC_DIAG_COMP_EQ_OVERRUN:
+		*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, comp_eq_overrun);
+		break;
+	case MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN:
+		*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, async_eq_overrun);
+		break;
+	case MLX5_VNIC_DIAG_CQ_OVERRUN:
+		*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, cq_overrun);
+		break;
+	case MLX5_VNIC_DIAG_INVALID_COMMAND:
+		*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, invalid_command);
+		break;
+	case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND:
+		*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command);
+		break;
+	}
+
+	return 0;
+}
+
+static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport,
+			    enum vnic_diag_counter type)
+{
+	u32 val = 0;
+	int ret;
+
+	ret = mlx5_esw_query_vnic_diag(vport, type, &val);
+	if (ret)
+		return ret;
+
+	seq_printf(file, "%d\n", val);
+	return 0;
+}
+
+static int total_q_under_processor_handle_show(struct seq_file *file, void *priv)
+{
+	return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE);
+}
+
+static int send_queue_priority_update_flow_show(struct seq_file *file, void *priv)
+{
+	return __show_vnic_diag(file, file->private,
+				MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW);
+}
+
+static int comp_eq_overrun_show(struct seq_file *file, void *priv)
+{
+	return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_COMP_EQ_OVERRUN);
+}
+
+static int async_eq_overrun_show(struct seq_file *file, void *priv)
+{
+	return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN);
+}
+
+static int cq_overrun_show(struct seq_file *file, void *priv)
+{
+	return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_CQ_OVERRUN);
+}
+
+static int invalid_command_show(struct seq_file *file, void *priv)
+{
+	return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_INVALID_COMMAND);
+}
+
+static int quota_exceeded_command_show(struct seq_file *file, void *priv)
+{
+	return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND);
+}
+
+DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle);
+DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow);
+DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun);
+DEFINE_SHOW_ATTRIBUTE(async_eq_overrun);
+DEFINE_SHOW_ATTRIBUTE(cq_overrun);
+DEFINE_SHOW_ATTRIBUTE(invalid_command);
+DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command);
+
+void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num)
+{
+	struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+	debugfs_remove_recursive(vport->dbgfs);
+	vport->dbgfs = NULL;
+}
+
+/* vnic diag dir name is "pf", "ecpf" or "{vf/sf}_xxxx" */
+#define VNIC_DIAG_DIR_NAME_MAX_LEN 8
+
+void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num)
+{
+	struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+	struct dentry *vnic_diag;
+	char dir_name[VNIC_DIAG_DIR_NAME_MAX_LEN];
+	int err;
+
+	if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
+		return;
+
+	if (vport_num == MLX5_VPORT_PF) {
+		strcpy(dir_name, "pf");
+	} else if (vport_num == MLX5_VPORT_ECPF) {
+		strcpy(dir_name, "ecpf");
+	} else {
+		err = snprintf(dir_name, VNIC_DIAG_DIR_NAME_MAX_LEN, "%s_%d", is_sf ? "sf" : "vf",
+			       is_sf ? sf_num : vport_num - MLX5_VPORT_FIRST_VF);
+		if (WARN_ON(err < 0))
+			return;
+	}
+
+	vport->dbgfs = debugfs_create_dir(dir_name, esw->dbgfs);
+	vnic_diag = debugfs_create_dir("vnic_diag", vport->dbgfs);
+
+	if (MLX5_CAP_GEN(esw->dev, vnic_env_queue_counters)) {
+		debugfs_create_file("total_q_under_processor_handle", 0444, vnic_diag, vport,
+				    &total_q_under_processor_handle_fops);
+		debugfs_create_file("send_queue_priority_update_flow", 0444, vnic_diag, vport,
+				    &send_queue_priority_update_flow_fops);
+	}
+
+	if (MLX5_CAP_GEN(esw->dev, eq_overrun_count)) {
+		debugfs_create_file("comp_eq_overrun", 0444, vnic_diag, vport,
+				    &comp_eq_overrun_fops);
+		debugfs_create_file("async_eq_overrun", 0444, vnic_diag, vport,
+				    &async_eq_overrun_fops);
+	}
+
+	if (MLX5_CAP_GEN(esw->dev, vnic_env_cq_overrun))
+		debugfs_create_file("cq_overrun", 0444, vnic_diag, vport, &cq_overrun_fops);
+
+	if (MLX5_CAP_GEN(esw->dev, invalid_command_count))
+		debugfs_create_file("invalid_command", 0444, vnic_diag, vport,
+				    &invalid_command_fops);
+
+	if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count))
+		debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport,
+				    &quota_exceeded_command_fops);
+}
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -36,6 +36,7 @@
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/mpfs.h>
+#include <linux/debugfs.h>
 #include "esw/acl/lgcy.h"
 #include "esw/legacy.h"
 #include "esw/qos.h"
@@ -1002,6 +1003,7 @@ int mlx5_eswitch_load_vport(struct mlx5_
 	if (err)
 		return err;
 
+	mlx5_esw_vport_debugfs_create(esw, vport_num, false, 0);
 	err = esw_offloads_load_rep(esw, vport_num);
 	if (err)
 		goto err_rep;
@@ -1009,6 +1011,7 @@ int mlx5_eswitch_load_vport(struct mlx5_
 	return err;
 
 err_rep:
+	mlx5_esw_vport_debugfs_destroy(esw, vport_num);
 	mlx5_esw_vport_disable(esw, vport_num);
 	return err;
 }
@@ -1016,6 +1019,7 @@ err_rep:
 void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num)
 {
 	esw_offloads_unload_rep(esw, vport_num);
+	mlx5_esw_vport_debugfs_destroy(esw, vport_num);
 	mlx5_esw_vport_disable(esw, vport_num);
 }
 
@@ -1622,6 +1626,7 @@ int mlx5_eswitch_init(struct mlx5_core_d
 	dev->priv.eswitch = esw;
 	BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
 
+	esw->dbgfs = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(esw->dev));
 	esw_info(dev,
 		 "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
 		 esw->total_vports,
@@ -1645,6 +1650,7 @@ void mlx5_eswitch_cleanup(struct mlx5_es
 
 	esw_info(esw->dev, "cleanup\n");
 
+	debugfs_remove_recursive(esw->dbgfs);
 	esw->dev->priv.eswitch = NULL;
 	destroy_workqueue(esw->work_queue);
 	WARN_ON(refcount_read(&esw->qos.refcnt));
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -191,6 +191,7 @@ struct mlx5_vport {
 	enum mlx5_eswitch_vport_event enabled_events;
 	int index;
 	struct devlink_port *dl_port;
+	struct dentry *dbgfs;
 };
 
 struct mlx5_esw_indir_table;
@@ -336,6 +337,7 @@ struct mlx5_eswitch {
 		u32             large_group_num;
 	}  params;
 	struct blocking_notifier_head n_head;
+	struct dentry *dbgfs;
 };
 
 void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -684,6 +686,9 @@ int mlx5_esw_offloads_devlink_port_regis
 void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
 struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num);
 
+void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num);
+void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num);
+
 int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
 				      u16 vport_num, u32 controller, u32 sfnum);
 void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3704,12 +3704,14 @@ int mlx5_esw_offloads_sf_vport_enable(st
 	if (err)
 		goto devlink_err;
 
+	mlx5_esw_vport_debugfs_create(esw, vport_num, true, sfnum);
 	err = mlx5_esw_offloads_rep_load(esw, vport_num);
 	if (err)
 		goto rep_err;
 	return 0;
 
 rep_err:
+	mlx5_esw_vport_debugfs_destroy(esw, vport_num);
 	mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
 devlink_err:
 	mlx5_esw_vport_disable(esw, vport_num);
@@ -3719,6 +3721,7 @@ devlink_err:
 void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
 {
 	mlx5_esw_offloads_rep_unload(esw, vport_num);
+	mlx5_esw_vport_debugfs_destroy(esw, vport_num);
 	mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
 	mlx5_esw_vport_disable(esw, vport_num);
 }