From: Moshe Shemesh <moshe@nvidia.com>
Date: Mon, 11 Apr 2022 20:38:44 +0300
Subject: net/mlx5: Avoid double clear or set of sync reset requested
Patch-mainline: v5.18-rc6
Git-commit: fc3d3db07b35885f238e1fa06b9f04a8fa7a62d0
References: git-fixes
Double clear of reset requested state can lead to NULL pointer as it
will try to delete the timer twice. This can happen for example on a
race between abort from FW and pci error or reset. Avoid such case using
test_and_clear_bit() to verify only one time reset requested state clear
flow. Similarly use test_and_set_bit() to verify only one time reset
requested state set flow.
Fixes: 7dd6df329d4c ("net/mlx5: Handle sync reset abort event")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Maher Sanalla <msanalla@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 28 ++++++++++++++-------
1 file changed, 19 insertions(+), 9 deletions(-)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -119,14 +119,19 @@ static void mlx5_stop_sync_reset_poll(st
del_timer_sync(&fw_reset->timer);
}
-static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
+static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already cleared\n");
+ return -EALREADY;
+ }
+
mlx5_stop_sync_reset_poll(dev);
- clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
if (poll_health)
mlx5_start_health_poll(dev);
+ return 0;
}
static void mlx5_sync_reset_reload_work(struct work_struct *work)
@@ -186,13 +191,17 @@ static int mlx5_fw_reset_set_reset_sync_
return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
}
-static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
+static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already set\n");
+ return -EALREADY;
+ }
mlx5_stop_health_poll(dev, true);
- set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
mlx5_start_sync_reset_poll(dev);
+ return 0;
}
static void mlx5_fw_live_patch_event(struct work_struct *work)
@@ -221,7 +230,9 @@ static void mlx5_sync_reset_request_even
err ? "Failed" : "Sent");
return;
}
- mlx5_sync_reset_set_reset_requested(dev);
+ if (mlx5_sync_reset_set_reset_requested(dev))
+ return;
+
err = mlx5_fw_reset_set_reset_sync_ack(dev);
if (err)
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
@@ -319,7 +330,8 @@ static void mlx5_sync_reset_now_event(st
struct mlx5_core_dev *dev = fw_reset->dev;
int err;
- mlx5_sync_reset_clear_reset_requested(dev, false);
+ if (mlx5_sync_reset_clear_reset_requested(dev, false))
+ return;
mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
@@ -348,10 +360,8 @@ static void mlx5_sync_reset_abort_event(
reset_abort_work);
struct mlx5_core_dev *dev = fw_reset->dev;
- if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
+ if (mlx5_sync_reset_clear_reset_requested(dev, true))
return;
-
- mlx5_sync_reset_clear_reset_requested(dev, true);
mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
}