Blob Blame History Raw
From: Moshe Shemesh <moshe@nvidia.com>
Date: Thu, 28 Jul 2022 18:53:49 +0300
Subject: net/mlx5: Lock mlx5 devlink health recovery callback
Patch-mainline: v6.0-rc1
Git-commit: d3dbdc9f8ddce46ae2646b2257106d8cdb3d5887
References: jsc#PED-1549

Change devlink instance locks in mlx5 driver to have devlink health
recovery callback locked, while keeping all driver paths which lead to
devl_ API functions called by the driver locked.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c |   12 +++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/main.c   |    4 ++--
 2 files changed, 13 insertions(+), 3 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -622,8 +622,14 @@ mlx5_fw_fatal_reporter_recover(struct de
 			       struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+	struct devlink *devlink = priv_to_devlink(dev);
+	int ret;
 
-	return mlx5_health_try_recover(dev);
+	devl_lock(devlink);
+	ret = mlx5_health_try_recover(dev);
+	devl_unlock(devlink);
+
+	return ret;
 }
 
 static int
@@ -666,16 +672,20 @@ static void mlx5_fw_fatal_reporter_err_w
 	struct mlx5_fw_reporter_ctx fw_reporter_ctx;
 	struct mlx5_core_health *health;
 	struct mlx5_core_dev *dev;
+	struct devlink *devlink;
 	struct mlx5_priv *priv;
 
 	health = container_of(work, struct mlx5_core_health, fatal_report_work);
 	priv = container_of(health, struct mlx5_priv, health);
 	dev = container_of(priv, struct mlx5_core_dev, priv);
+	devlink = priv_to_devlink(dev);
 
 	enter_error_state(dev, false);
 	if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
+		devl_lock(devlink);
 		if (mlx5_health_try_recover(dev))
 			mlx5_core_err(dev, "health recovery failed\n");
+		devl_unlock(devlink);
 		return;
 	}
 	fw_reporter_ctx.err_synd = health->synd;
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1932,7 +1932,7 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_t
 void mlx5_disable_device(struct mlx5_core_dev *dev)
 {
 	mlx5_error_sw_reset(dev);
-	mlx5_unload_one(dev);
+	mlx5_unload_one_devl_locked(dev);
 }
 
 int mlx5_recover_device(struct mlx5_core_dev *dev)
@@ -1943,7 +1943,7 @@ int mlx5_recover_device(struct mlx5_core
 			return -EIO;
 	}
 
-	return mlx5_load_one(dev, true);
+	return mlx5_load_one_devl_locked(dev, true);
 }
 
 static struct pci_driver mlx5_core_driver = {