Blob Blame History Raw
From: Mark Zhang <markz@mellanox.com>
Date: Tue, 17 Mar 2020 11:28:28 +0200
Subject: net/mlx5: Use a separate work queue for fib event handling
Patch-mainline: v5.7-rc1
Git-commit: 6838a35a4567de7ddefd5c2d09ccfa41d754e4ee
References: jsc#SLE-15172

In VF lag mode when remove the bonding module without bring down the
bond device first, we could potentially have circular dependency when we
unload IB devices and also handle fib events:
1. The bond work starts first;
2. The "modprobe -rv bonding" process tries to release the bond device,
   with the "pernet_ops_rwsem" lock hold;
3. The bond work blocks in unregister_netdevice_notifier() and waits for
the lock because fib event came right before;
4. The kernel fib module tries to free all the fib entries by broadcasting
   the "FIB_EVENT_NH_DEL" event;
5. Upon the fib event this lag_mp module holds the fib lock and queue a
   fib work.
So:
   bond work -> modprobe task -> kernel fib module -> lag_mp -> bond work

Today we either reload IB devices in roce lag in nic mode or either handle
fib events in switchdev mode, but a new feature could change that we'll
need to reload IB devices also in switchdev mode so this is a future proof
fix as one may not notice this later.

Signed-off-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c |   14 ++++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h |    1 +
 2 files changed, 11 insertions(+), 4 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -93,9 +93,8 @@ static void mlx5_lag_set_port_affinity(s
 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
 {
 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
-	struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
 
-	flush_workqueue(ldev->wq);
+	flush_workqueue(mp->wq);
 }
 
 struct mlx5_fib_event_work {
@@ -293,7 +292,7 @@ static int mlx5_lag_fib_event(struct not
 		return NOTIFY_DONE;
 	}
 
-	queue_work(ldev->wq, &fib_work->work);
+	queue_work(mp->wq, &fib_work->work);
 
 	return NOTIFY_DONE;
 }
@@ -306,11 +305,17 @@ int mlx5_lag_mp_init(struct mlx5_lag *ld
 	if (mp->fib_nb.notifier_call)
 		return 0;
 
+	mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
+	if (!mp->wq)
+		return -ENOMEM;
+
 	mp->fib_nb.notifier_call = mlx5_lag_fib_event;
 	err = register_fib_notifier(&init_net, &mp->fib_nb,
 				    mlx5_lag_fib_event_flush, NULL);
-	if (err)
+	if (err) {
+		destroy_workqueue(mp->wq);
 		mp->fib_nb.notifier_call = NULL;
+	}
 
 	return err;
 }
@@ -323,5 +328,6 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag
 		return;
 
 	unregister_fib_notifier(&init_net, &mp->fib_nb);
+	destroy_workqueue(mp->wq);
 	mp->fib_nb.notifier_call = NULL;
 }
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
@@ -16,6 +16,7 @@ enum mlx5_lag_port_affinity {
 struct lag_mp {
 	struct notifier_block     fib_nb;
 	struct fib_info           *mfi; /* used in tracking fib events */
+	struct workqueue_struct   *wq;
 };
 
 #ifdef CONFIG_MLX5_ESWITCH