Blob Blame History Raw
From: Mark Bloch <mbloch@nvidia.com>
Date: Tue, 3 Aug 2021 16:19:59 -0700
Subject: net/mlx5: Lag, Create shared FDB when in switchdev mode
Patch-mainline: v5.15-rc1
Git-commit: 598fe77df855feeeca9dfda2ffe622ac7724e5c3
References: jsc#SLE-19253

If both eswitches are in switchdev mode and the uplink representors
are enslaved to the same bond device create a shared FDB configuration.

When moving to shared FDB mode not only the hardware needs be configured
but the RDMA driver needs to reconfigure itself.

When such change is done, unload the RDMA devices, configure the hardware
and load the RDMA representors.

When destroying the lag (can happen if a PCI function is unbinded,
driver is unloaded or by just removing a netdev from the bond) make sure
to restore the system to the previous state only if possible.

For example, if a PCI function is unbinded there is no need to load the
representors as the device is going away.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c    |  118 +++++++++++++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/lag.h    |    3 
 drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c |    2 
 3 files changed, 105 insertions(+), 18 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -32,7 +32,9 @@
 
 #include <linux/netdevice.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
 #include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
 #include "mlx5_core.h"
 #include "eswitch.h"
 #include "lag.h"
@@ -45,7 +47,7 @@
 static DEFINE_SPINLOCK(lag_lock);
 
 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
-			       u8 remap_port2)
+			       u8 remap_port2, bool shared_fdb)
 {
 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
 	void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
@@ -54,6 +56,7 @@ static int mlx5_cmd_create_lag(struct ml
 
 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+	MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
 
 	return mlx5_cmd_exec_in(dev, create_lag, in);
 }
@@ -224,35 +227,59 @@ void mlx5_modify_lag(struct mlx5_lag *ld
 }
 
 static int mlx5_create_lag(struct mlx5_lag *ldev,
-			   struct lag_tracker *tracker)
+			   struct lag_tracker *tracker,
+			   bool shared_fdb)
 {
 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
 	int err;
 
 	mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
 				       &ldev->v2p_map[MLX5_LAG_P2]);
 
-	mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
-		       ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
+	mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
+		       ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
+		       shared_fdb);
 
 	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
-				  ldev->v2p_map[MLX5_LAG_P2]);
-	if (err)
+				  ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
+	if (err) {
 		mlx5_core_err(dev0,
 			      "Failed to create LAG (%d)\n",
 			      err);
+		return err;
+	}
+
+	if (shared_fdb) {
+		err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
+							      dev1->priv.eswitch);
+		if (err)
+			mlx5_core_err(dev0, "Can't enable single FDB mode\n");
+		else
+			mlx5_core_info(dev0, "Operation mode is single FDB\n");
+	}
+
+	if (err) {
+		MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+		if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
+			mlx5_core_err(dev0,
+				      "Failed to deactivate RoCE LAG; driver restart required\n");
+	}
+
 	return err;
 }
 
 int mlx5_activate_lag(struct mlx5_lag *ldev,
 		      struct lag_tracker *tracker,
-		      u8 flags)
+		      u8 flags,
+		      bool shared_fdb)
 {
 	bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 	int err;
 
-	err = mlx5_create_lag(ldev, tracker);
+	err = mlx5_create_lag(ldev, tracker, shared_fdb);
 	if (err) {
 		if (roce_lag) {
 			mlx5_core_err(dev0,
@@ -266,6 +293,7 @@ int mlx5_activate_lag(struct mlx5_lag *l
 	}
 
 	ldev->flags |= flags;
+	ldev->shared_fdb = shared_fdb;
 	return 0;
 }
 
@@ -278,6 +306,12 @@ static int mlx5_deactivate_lag(struct ml
 
 	ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
 
+	if (ldev->shared_fdb) {
+		mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
+							 ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
+		ldev->shared_fdb = false;
+	}
+
 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
 	if (err) {
@@ -333,6 +367,10 @@ static void mlx5_lag_remove_devices(stru
 		if (!ldev->pf[i].dev)
 			continue;
 
+		if (ldev->pf[i].dev->priv.flags &
+		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+			continue;
+
 		ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
 	}
@@ -342,12 +380,15 @@ static void mlx5_disable_lag(struct mlx5
 {
 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+	bool shared_fdb = ldev->shared_fdb;
 	bool roce_lag;
 	int err;
 
 	roce_lag = __mlx5_lag_is_roce(ldev);
 
-	if (roce_lag) {
+	if (shared_fdb) {
+		mlx5_lag_remove_devices(ldev);
+	} else if (roce_lag) {
 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
 			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 			mlx5_rescan_drivers_locked(dev0);
@@ -359,8 +400,34 @@ static void mlx5_disable_lag(struct mlx5
 	if (err)
 		return;
 
-	if (roce_lag)
+	if (shared_fdb || roce_lag)
 		mlx5_lag_add_devices(ldev);
+
+	if (shared_fdb) {
+		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+			mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+		if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+			mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+	}
+}
+
+static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
+{
+	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+
+	if (is_mdev_switchdev_mode(dev0) &&
+	    is_mdev_switchdev_mode(dev1) &&
+	    mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
+	    mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
+	    mlx5_devcom_is_paired(dev0->priv.devcom,
+				  MLX5_DEVCOM_ESW_OFFLOADS) &&
+	    MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
+	    MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
+	    MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
+		return true;
+
+	return false;
 }
 
 static void mlx5_do_bond(struct mlx5_lag *ldev)
@@ -380,6 +447,8 @@ static void mlx5_do_bond(struct mlx5_lag
 	}
 
 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
+		bool shared_fdb = mlx5_shared_fdb_supported(ldev);
+
 		roce_lag = !mlx5_sriov_is_enabled(dev0) &&
 			   !mlx5_sriov_is_enabled(dev1);
 
@@ -389,23 +458,40 @@ static void mlx5_do_bond(struct mlx5_lag
 			   dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
 #endif
 
-		if (roce_lag)
+		if (shared_fdb || roce_lag)
 			mlx5_lag_remove_devices(ldev);
 
 		err = mlx5_activate_lag(ldev, &tracker,
 					roce_lag ? MLX5_LAG_FLAG_ROCE :
-					MLX5_LAG_FLAG_SRIOV);
+						   MLX5_LAG_FLAG_SRIOV,
+					shared_fdb);
 		if (err) {
-			if (roce_lag)
+			if (shared_fdb || roce_lag)
 				mlx5_lag_add_devices(ldev);
 
 			return;
-		}
-
-		if (roce_lag) {
+		} else if (roce_lag) {
 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 			mlx5_rescan_drivers_locked(dev0);
 			mlx5_nic_vport_enable_roce(dev1);
+		} else if (shared_fdb) {
+			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+			mlx5_rescan_drivers_locked(dev0);
+
+			err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+			if (!err)
+				err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+
+			if (err) {
+				dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+				mlx5_rescan_drivers_locked(dev0);
+				mlx5_deactivate_lag(ldev);
+				mlx5_lag_add_devices(ldev);
+				mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+				mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+				mlx5_core_err(dev0, "Failed to enable lag\n");
+				return;
+			}
 		}
 	} else if (do_bond && __mlx5_lag_is_active(ldev)) {
 		mlx5_modify_lag(ldev, &tracker);
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -73,7 +73,8 @@ void mlx5_modify_lag(struct mlx5_lag *ld
 		     struct lag_tracker *tracker);
 int mlx5_activate_lag(struct mlx5_lag *ldev,
 		      struct lag_tracker *tracker,
-		      u8 flags);
+		      u8 flags,
+		      bool shared_fdb);
 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
 				struct net_device *ndev);
 
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -161,7 +161,7 @@ static void mlx5_lag_fib_route_event(str
 		struct lag_tracker tracker;
 
 		tracker = ldev->tracker;
-		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
+		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
 	}
 
 	mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);