Blob Blame History Raw
From: Moshe Shemesh <moshe@nvidia.com>
Date: Thu, 28 Jul 2022 18:53:45 +0300
Subject: net/mlx5: Lock mlx5 devlink reload callbacks
Patch-mainline: v6.0-rc1
Git-commit: 84a433a40d0ebf3bbf36b8bfa58c6f45dc782344
References: jsc#PED-1549

Change devlink instance locks in mlx5 driver to have devlink reload
callbacks locked, while keeping all driver paths which lead to devl_ API
functions called by the driver locked.

Add mlx5_load_one_devl_locked() and mlx5_unload_one_devl_locked() which
are used by the paths which are already locked such as devlink reload
callbacks.

This patch makes the driver use devl_ API also for traps register as
these functions are called from the driver paths parallel to reload that
requires locking now.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/dev.c       |   19 +------
 drivers/net/ethernet/mellanox/mlx5/core/devlink.c   |   50 ++++++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c   |   18 +------
 drivers/net/ethernet/mellanox/mlx5/core/main.c      |   34 ++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h |    2 
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c     |    6 ++
 6 files changed, 79 insertions(+), 50 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -335,13 +335,12 @@ static void del_adev(struct auxiliary_de
 
 int mlx5_attach_device(struct mlx5_core_dev *dev)
 {
-	struct devlink *devlink = priv_to_devlink(dev);
 	struct mlx5_priv *priv = &dev->priv;
 	struct auxiliary_device *adev;
 	struct auxiliary_driver *adrv;
 	int ret = 0, i;
 
-	devl_lock(devlink);
+	devl_assert_locked(priv_to_devlink(dev));
 	mutex_lock(&mlx5_intf_mutex);
 	priv->flags &= ~MLX5_PRIV_FLAGS_DETACH;
 	priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
@@ -394,20 +393,18 @@ int mlx5_attach_device(struct mlx5_core_
 	}
 	priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
 	mutex_unlock(&mlx5_intf_mutex);
-	devl_unlock(devlink);
 	return ret;
 }
 
 void mlx5_detach_device(struct mlx5_core_dev *dev)
 {
-	struct devlink *devlink = priv_to_devlink(dev);
 	struct mlx5_priv *priv = &dev->priv;
 	struct auxiliary_device *adev;
 	struct auxiliary_driver *adrv;
 	pm_message_t pm = {};
 	int i;
 
-	devl_lock(devlink);
+	devl_assert_locked(priv_to_devlink(dev));
 	mutex_lock(&mlx5_intf_mutex);
 	priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
 	for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) {
@@ -441,21 +438,17 @@ skip_suspend:
 	priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW;
 	priv->flags |= MLX5_PRIV_FLAGS_DETACH;
 	mutex_unlock(&mlx5_intf_mutex);
-	devl_unlock(devlink);
 }
 
 int mlx5_register_device(struct mlx5_core_dev *dev)
 {
-	struct devlink *devlink;
 	int ret;
 
-	devlink = priv_to_devlink(dev);
-	devl_lock(devlink);
+	devl_assert_locked(priv_to_devlink(dev));
 	mutex_lock(&mlx5_intf_mutex);
 	dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
 	ret = mlx5_rescan_drivers_locked(dev);
 	mutex_unlock(&mlx5_intf_mutex);
-	devl_unlock(devlink);
 	if (ret)
 		mlx5_unregister_device(dev);
 
@@ -464,15 +457,11 @@ int mlx5_register_device(struct mlx5_cor
 
 void mlx5_unregister_device(struct mlx5_core_dev *dev)
 {
-	struct devlink *devlink;
-
-	devlink = priv_to_devlink(dev);
-	devl_lock(devlink);
+	devl_assert_locked(priv_to_devlink(dev));
 	mutex_lock(&mlx5_intf_mutex);
 	dev->priv.flags = MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
 	mlx5_rescan_drivers_locked(dev);
 	mutex_unlock(&mlx5_intf_mutex);
-	devl_unlock(devlink);
 }
 
 static int add_drivers(struct mlx5_core_dev *dev)
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -108,7 +108,7 @@ static int mlx5_devlink_reload_fw_activa
 	if (err)
 		return err;
 
-	mlx5_unload_one(dev);
+	mlx5_unload_one_devl_locked(dev);
 	err = mlx5_health_wait_pci_up(dev);
 	if (err)
 		NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset");
@@ -143,6 +143,7 @@ static int mlx5_devlink_reload_down(stru
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct pci_dev *pdev = dev->pdev;
 	bool sf_dev_allocated;
+	int ret = 0;
 
 	sf_dev_allocated = mlx5_sf_dev_allocated(dev);
 	if (sf_dev_allocated) {
@@ -163,19 +164,25 @@ static int mlx5_devlink_reload_down(stru
 		NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable");
 	}
 
+	devl_lock(devlink);
 	switch (action) {
 	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
-		mlx5_unload_one(dev);
-		return 0;
+		mlx5_unload_one_devl_locked(dev);
+		break;
 	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
 		if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
-			return mlx5_devlink_trigger_fw_live_patch(devlink, extack);
-		return mlx5_devlink_reload_fw_activate(devlink, extack);
+			ret = mlx5_devlink_trigger_fw_live_patch(devlink, extack);
+		else
+			ret = mlx5_devlink_reload_fw_activate(devlink, extack);
+		break;
 	default:
 		/* Unsupported action should not get to this function */
 		WARN_ON(1);
-		return -EOPNOTSUPP;
+		ret = -EOPNOTSUPP;
 	}
+
+	devl_unlock(devlink);
+	return ret;
 }
 
 static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action,
@@ -183,24 +190,29 @@ static int mlx5_devlink_reload_up(struct
 				  struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	int ret = 0;
 
+	devl_lock(devlink);
 	*actions_performed = BIT(action);
 	switch (action) {
 	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
-		return mlx5_load_one(dev, false);
+		ret = mlx5_load_one_devl_locked(dev, false);
+		break;
 	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
 		if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
 			break;
 		/* On fw_activate action, also driver is reloaded and reinit performed */
 		*actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
-		return mlx5_load_one(dev, false);
+		ret = mlx5_load_one_devl_locked(dev, false);
+		break;
 	default:
 		/* Unsupported action should not get to this function */
 		WARN_ON(1);
-		return -EOPNOTSUPP;
+		ret = -EOPNOTSUPP;
 	}
 
-	return 0;
+	devl_unlock(devlink);
+	return ret;
 }
 
 static struct mlx5_devlink_trap *mlx5_find_trap_by_id(struct mlx5_core_dev *dev, int trap_id)
@@ -837,28 +849,28 @@ static int mlx5_devlink_traps_register(s
 	struct mlx5_core_dev *core_dev = devlink_priv(devlink);
 	int err;
 
-	err = devlink_trap_groups_register(devlink, mlx5_trap_groups_arr,
-					   ARRAY_SIZE(mlx5_trap_groups_arr));
+	err = devl_trap_groups_register(devlink, mlx5_trap_groups_arr,
+					ARRAY_SIZE(mlx5_trap_groups_arr));
 	if (err)
 		return err;
 
-	err = devlink_traps_register(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr),
-				     &core_dev->priv);
+	err = devl_traps_register(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr),
+				  &core_dev->priv);
 	if (err)
 		goto err_trap_group;
 	return 0;
 
 err_trap_group:
-	devlink_trap_groups_unregister(devlink, mlx5_trap_groups_arr,
-				       ARRAY_SIZE(mlx5_trap_groups_arr));
+	devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr,
+				    ARRAY_SIZE(mlx5_trap_groups_arr));
 	return err;
 }
 
 static void mlx5_devlink_traps_unregister(struct devlink *devlink)
 {
-	devlink_traps_unregister(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr));
-	devlink_trap_groups_unregister(devlink, mlx5_trap_groups_arr,
-				       ARRAY_SIZE(mlx5_trap_groups_arr));
+	devl_traps_unregister(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr));
+	devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr,
+				    ARRAY_SIZE(mlx5_trap_groups_arr));
 }
 
 int mlx5_devlink_register(struct devlink *devlink)
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1300,20 +1300,19 @@ abort:
  */
 int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
 {
-	struct devlink *devlink;
 	bool toggle_lag;
 	int ret;
 
 	if (!mlx5_esw_allowed(esw))
 		return 0;
 
+	devl_assert_locked(priv_to_devlink(esw->dev));
+
 	toggle_lag = !mlx5_esw_is_fdb_created(esw);
 
 	if (toggle_lag)
 		mlx5_lag_disable_change(esw->dev);
 
-	devlink = priv_to_devlink(esw->dev);
-	devl_lock(devlink);
 	down_write(&esw->mode_lock);
 	if (!mlx5_esw_is_fdb_created(esw)) {
 		ret = mlx5_eswitch_enable_locked(esw, num_vfs);
@@ -1327,7 +1326,6 @@ int mlx5_eswitch_enable(struct mlx5_eswi
 			esw->esw_funcs.num_vfs = num_vfs;
 	}
 	up_write(&esw->mode_lock);
-	devl_unlock(devlink);
 
 	if (toggle_lag)
 		mlx5_lag_enable_change(esw->dev);
@@ -1338,13 +1336,10 @@ int mlx5_eswitch_enable(struct mlx5_eswi
 /* When disabling sriov, free driver level resources. */
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf)
 {
-	struct devlink *devlink;
-
 	if (!mlx5_esw_allowed(esw))
 		return;
 
-	devlink = priv_to_devlink(esw->dev);
-	devl_lock(devlink);
+	devl_assert_locked(priv_to_devlink(esw->dev));
 	down_write(&esw->mode_lock);
 	/* If driver is unloaded, this function is called twice by remove_one()
 	 * and mlx5_unload(). Prevent the second call.
@@ -1373,7 +1368,6 @@ void mlx5_eswitch_disable_sriov(struct m
 
 unlock:
 	up_write(&esw->mode_lock);
-	devl_unlock(devlink);
 }
 
 /* Free resources for corresponding eswitch mode. It is called by devlink
@@ -1407,18 +1401,14 @@ void mlx5_eswitch_disable_locked(struct
 
 void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
 {
-	struct devlink *devlink;
-
 	if (!mlx5_esw_allowed(esw))
 		return;
 
+	devl_assert_locked(priv_to_devlink(esw->dev));
 	mlx5_lag_disable_change(esw->dev);
-	devlink = priv_to_devlink(esw->dev);
-	devl_lock(devlink);
 	down_write(&esw->mode_lock);
 	mlx5_eswitch_disable_locked(esw);
 	up_write(&esw->mode_lock);
-	devl_unlock(devlink);
 	mlx5_lag_enable_change(esw->dev);
 }
 
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1304,8 +1304,10 @@ static void mlx5_unload(struct mlx5_core
 
 int mlx5_init_one(struct mlx5_core_dev *dev)
 {
+	struct devlink *devlink = priv_to_devlink(dev);
 	int err = 0;
 
+	devl_lock(devlink);
 	mutex_lock(&dev->intf_state_mutex);
 	dev->state = MLX5_DEVICE_STATE_UP;
 
@@ -1334,6 +1336,7 @@ int mlx5_init_one(struct mlx5_core_dev *
 		goto err_register;
 
 	mutex_unlock(&dev->intf_state_mutex);
+	devl_unlock(devlink);
 	return 0;
 
 err_register:
@@ -1348,11 +1351,15 @@ function_teardown:
 err_function:
 	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
 	mutex_unlock(&dev->intf_state_mutex);
+	devl_unlock(devlink);
 	return err;
 }
 
 void mlx5_uninit_one(struct mlx5_core_dev *dev)
 {
+	struct devlink *devlink = priv_to_devlink(dev);
+
+	devl_lock(devlink);
 	mutex_lock(&dev->intf_state_mutex);
 
 	mlx5_unregister_device(dev);
@@ -1371,13 +1378,15 @@ void mlx5_uninit_one(struct mlx5_core_de
 	mlx5_function_teardown(dev, true);
 out:
 	mutex_unlock(&dev->intf_state_mutex);
+	devl_unlock(devlink);
 }
 
-int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery)
+int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery)
 {
 	int err = 0;
 	u64 timeout;
 
+	devl_assert_locked(priv_to_devlink(dev));
 	mutex_lock(&dev->intf_state_mutex);
 	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
 		mlx5_core_warn(dev, "interface is up, NOP\n");
@@ -1419,8 +1428,20 @@ out:
 	return err;
 }
 
-void mlx5_unload_one(struct mlx5_core_dev *dev)
+int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery)
 {
+	struct devlink *devlink = priv_to_devlink(dev);
+	int ret;
+
+	devl_lock(devlink);
+	ret = mlx5_load_one_devl_locked(dev, recovery);
+	devl_unlock(devlink);
+	return ret;
+}
+
+void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev)
+{
+	devl_assert_locked(priv_to_devlink(dev));
 	mutex_lock(&dev->intf_state_mutex);
 
 	mlx5_detach_device(dev);
@@ -1438,6 +1459,15 @@ out:
 	mutex_unlock(&dev->intf_state_mutex);
 }
 
+void mlx5_unload_one(struct mlx5_core_dev *dev)
+{
+	struct devlink *devlink = priv_to_devlink(dev);
+
+	devl_lock(devlink);
+	mlx5_unload_one_devl_locked(dev);
+	devl_unlock(devlink);
+}
+
 static const int types[] = {
 	MLX5_CAP_GENERAL,
 	MLX5_CAP_GENERAL_2,
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -290,7 +290,9 @@ void mlx5_mdev_uninit(struct mlx5_core_d
 int mlx5_init_one(struct mlx5_core_dev *dev);
 void mlx5_uninit_one(struct mlx5_core_dev *dev);
 void mlx5_unload_one(struct mlx5_core_dev *dev);
+void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev);
 int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery);
+int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery);
 
 int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out);
 
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -154,13 +154,16 @@ mlx5_device_disable_sriov(struct mlx5_co
 static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
 {
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+	struct devlink *devlink = priv_to_devlink(dev);
 	int err;
 
+	devl_lock(devlink);
 	err = mlx5_device_enable_sriov(dev, num_vfs);
 	if (err) {
 		mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err);
 		return err;
 	}
+	devl_unlock(devlink);
 
 	err = pci_enable_sriov(pdev, num_vfs);
 	if (err) {
@@ -173,10 +176,13 @@ static int mlx5_sriov_enable(struct pci_
 void mlx5_sriov_disable(struct pci_dev *pdev)
 {
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+	struct devlink *devlink = priv_to_devlink(dev);
 	int num_vfs = pci_num_vf(dev->pdev);
 
 	pci_disable_sriov(pdev);
+	devl_lock(devlink);
 	mlx5_device_disable_sriov(dev, num_vfs, true);
+	devl_unlock(devlink);
 }
 
 int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)