Blob Blame History Raw
From 3f55a410b2327dd0a709e08729ffd963a5b78dd7 Mon Sep 17 00:00:00 2001
From: Dima Chumak <dchumak@nvidia.com>
Date: Wed, 30 Jun 2021 14:56:32 +0300
Subject: [PATCH] net/mlx5: Lag, fix multipath lag activation
Git-commit: 2f8b6161cca5fb34b0065e2eac8bb2b61b7bfe87
Patch-mainline: v5.15-rc1
References: stable-5.14.4

[ Upstream commit 2f8b6161cca5fb34b0065e2eac8bb2b61b7bfe87 ]

When handling FIB_EVENT_ENTRY_REPLACE event for a new multipath route,
lag activation can be missed if a stale (struct lag_mp)->mfi pointer
exists, which was associated with an older multipath route that had been
removed.

Normally, when a route is removed, it triggers mlx5_lag_fib_event(),
which handles FIB_EVENT_ENTRY_DEL and clears mfi pointer. But, if
mlx5_lag_check_prereq() condition isn't met, for example when eswitch is
in legacy mode, the fib event is skipped and mfi pointer becomes stale.

Fix by resetting mfi pointer to NULL in mlx5_deactivate_lag().

Fixes: 8a66e4585979 ("net/mlx5: Change ownership model for lag")
Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Acked-by: Takashi Iwai <tiwai@suse.de>

---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c    |    1 +
 drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c |    8 ++++++++
 drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h |    2 ++
 3 files changed, 11 insertions(+)

--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -305,6 +305,7 @@ static int mlx5_deactivate_lag(struct ml
 	int err;
 
 	ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
+	mlx5_lag_mp_reset(ldev);
 
 	if (ldev->shared_fdb) {
 		mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -302,6 +302,14 @@ static int mlx5_lag_fib_event(struct not
 	return NOTIFY_DONE;
 }
 
+void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
+{
+	/* Clear mfi, as it might become stale when a route delete event
+	 * has been missed, see mlx5_lag_fib_route_event().
+	 */
+	ldev->lag_mp.mfi = NULL;
+}
+
 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
 {
 	struct lag_mp *mp = &ldev->lag_mp;
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
@@ -21,11 +21,13 @@ struct lag_mp {
 
 #ifdef CONFIG_MLX5_ESWITCH
 
+void mlx5_lag_mp_reset(struct mlx5_lag *ldev);
 int mlx5_lag_mp_init(struct mlx5_lag *ldev);
 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev);
 
 #else /* CONFIG_MLX5_ESWITCH */
 
+static inline void mlx5_lag_mp_reset(struct mlx5_lag *ldev) {};
 static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; }
 static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {}