Blob Blame History Raw
From: Amir Tzin <amirtz@nvidia.com>
Date: Wed, 13 Oct 2021 09:07:13 +0300
Subject: net/mlx5: Read timeout values from DTOR
Patch-mainline: v5.16-rc1
Git-commit: 32def4120e4876b5367ad58eb3a641bf6915979b
References: jsc#SLE-19253

Replace hard coded timeouts with values stored by firmware in default
timeouts register (DTOR). Timeouts are read during driver load. If DTOR
is not supported by firmware then fallback to hard coded defaults
instead.

Signed-off-by: Amir Tzin <amirtz@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/health.h      |    1 
 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c |    7 +
 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c |    7 +
 drivers/net/ethernet/mellanox/mlx5/core/fw.c             |    9 +
 drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c       |   16 +--
 drivers/net/ethernet/mellanox/mlx5/core/health.c         |   21 +---
 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c       |   68 ++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h       |   13 ++
 drivers/net/ethernet/mellanox/mlx5/core/main.c           |    6 +
 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c      |   16 +--
 10 files changed, 124 insertions(+), 40 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -32,7 +32,6 @@ void mlx5e_reporter_rq_cqe_err(struct ml
 void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
 
 #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
-#define MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC 2000
 
 struct mlx5e_err_ctx {
 	int (*recover)(void *ctx);
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -6,6 +6,7 @@
 #include "txrx.h"
 #include "devlink.h"
 #include "ptp.h"
+#include "lib/tout.h"
 
 static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
 {
@@ -32,8 +33,10 @@ out:
 
 static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq)
 {
-	unsigned long exp_time = jiffies +
-				 msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC);
+	struct mlx5_core_dev *dev = icosq->channel->mdev;
+	unsigned long exp_time;
+
+	exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
 
 	while (time_before(jiffies, exp_time)) {
 		if (icosq->cc == icosq->pc)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -4,11 +4,14 @@
 #include "health.h"
 #include "en/ptp.h"
 #include "en/devlink.h"
+#include "lib/tout.h"
 
 static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
 {
-	unsigned long exp_time = jiffies +
-				 msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC);
+	struct mlx5_core_dev *dev = sq->mdev;
+	unsigned long exp_time;
+
+	exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
 
 	while (time_before(jiffies, exp_time)) {
 		if (sq->cc == sq->pc)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include "mlx5_core.h"
 #include "../../mlxfw/mlxfw.h"
+#include "lib/tout.h"
 #include "accel/tls.h"
 
 enum {
@@ -317,10 +318,9 @@ int mlx5_cmd_force_teardown_hca(struct m
 	return 0;
 }
 
-#define MLX5_FAST_TEARDOWN_WAIT_MS   3000
 int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
 {
-	unsigned long end, delay_ms = MLX5_FAST_TEARDOWN_WAIT_MS;
+	unsigned long end, delay_ms = mlx5_tout_ms(dev, TEARDOWN);
 	u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {};
 	int state;
@@ -618,17 +618,18 @@ static void mlx5_fsm_release(struct mlxf
 			 fwhandle, 0);
 }
 
-#define MLX5_FSM_REACTIVATE_TOUT 5000 /* msecs */
 static int mlx5_fsm_reactivate(struct mlxfw_dev *mlxfw_dev, u8 *status)
 {
-	unsigned long exp_time = jiffies + msecs_to_jiffies(MLX5_FSM_REACTIVATE_TOUT);
 	struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
 		container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
 	struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
 	u32 out[MLX5_ST_SZ_DW(mirc_reg)];
 	u32 in[MLX5_ST_SZ_DW(mirc_reg)];
+	unsigned long exp_time;
 	int err;
 
+	exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FSM_REACTIVATE));
+
 	if (!MLX5_CAP_MCAM_REG2(dev, mirc))
 		return -EOPNOTSUPP;
 
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -3,6 +3,7 @@
 
 #include "fw_reset.h"
 #include "diag/fw_tracer.h"
+#include "lib/tout.h"
 
 enum {
 	MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
@@ -228,8 +229,6 @@ static void mlx5_sync_reset_request_even
 		mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
 }
 
-#define MLX5_PCI_LINK_UP_TIMEOUT 2000
-
 static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
 {
 	struct pci_bus *bridge_bus = dev->pdev->bus;
@@ -286,7 +285,7 @@ static int mlx5_pci_link_toggle(struct m
 		goto restore;
 	}
 
-	timeout = jiffies + msecs_to_jiffies(MLX5_PCI_LINK_UP_TIMEOUT);
+	timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, PCI_TOGGLE));
 	do {
 		err = pci_read_config_word(bridge, cap + PCI_EXP_LNKSTA, &reg16);
 		if (err)
@@ -299,8 +298,8 @@ static int mlx5_pci_link_toggle(struct m
 	if (reg16 & PCI_EXP_LNKSTA_DLLLA) {
 		mlx5_core_info(dev, "PCI Link up\n");
 	} else {
-		mlx5_core_err(dev, "PCI link not ready (0x%04x) after %d ms\n",
-			      reg16, MLX5_PCI_LINK_UP_TIMEOUT);
+		mlx5_core_err(dev, "PCI link not ready (0x%04x) after %llu ms\n",
+			      reg16, mlx5_tout_ms(dev, PCI_TOGGLE));
 		err = -ETIMEDOUT;
 	}
 
@@ -395,16 +394,15 @@ static int fw_reset_event_notifier(struc
 	return NOTIFY_OK;
 }
 
-#define MLX5_FW_RESET_TIMEOUT_MSEC 5000
 int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev)
 {
-	unsigned long timeout = msecs_to_jiffies(MLX5_FW_RESET_TIMEOUT_MSEC);
+	unsigned long timeout = msecs_to_jiffies(mlx5_tout_ms(dev, PCI_SYNC_UPDATE));
 	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
 	int err;
 
 	if (!wait_for_completion_timeout(&fw_reset->done, timeout)) {
-		mlx5_core_warn(dev, "FW sync reset timeout after %d seconds\n",
-			       MLX5_FW_RESET_TIMEOUT_MSEC / 1000);
+		mlx5_core_warn(dev, "FW sync reset timeout after %llu seconds\n",
+			       mlx5_tout_ms(dev, PCI_SYNC_UPDATE) / 1000);
 		err = -ETIMEDOUT;
 		goto out;
 	}
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -40,10 +40,10 @@
 #include "lib/eq.h"
 #include "lib/mlx5.h"
 #include "lib/pci_vsc.h"
+#include "lib/tout.h"
 #include "diag/fw_tracer.h"
 
 enum {
-	MLX5_HEALTH_POLL_INTERVAL	= 2 * HZ,
 	MAX_MISSES			= 3,
 };
 
@@ -219,11 +219,9 @@ unlock:
 	mutex_unlock(&dev->intf_state_mutex);
 }
 
-#define MLX5_CRDUMP_WAIT_MS	60000
-#define MLX5_FW_RESET_WAIT_MS	1000
 void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
 {
-	unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS;
+	unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE);
 	int lock = -EBUSY;
 
 	mutex_lock(&dev->intf_state_mutex);
@@ -237,7 +235,7 @@ void mlx5_error_sw_reset(struct mlx5_cor
 		lock = lock_sem_sw_reset(dev, true);
 
 		if (lock == -EBUSY) {
-			delay_ms = MLX5_CRDUMP_WAIT_MS;
+			delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP);
 			goto recover_from_sw_reset;
 		}
 		/* Execute SW reset */
@@ -307,13 +305,11 @@ static void mlx5_handle_bad_state(struct
 	mlx5_disable_device(dev);
 }
 
-/* How much time to wait until health resetting the driver (in msecs) */
-#define MLX5_RECOVERY_WAIT_MSECS 60000
 int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
 {
 	unsigned long end;
 
-	end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS);
+	end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET));
 	while (sensor_pci_not_working(dev)) {
 		if (time_after(jiffies, end))
 			return -ETIMEDOUT;
@@ -674,13 +670,13 @@ static void mlx5_fw_reporters_destroy(st
 		devlink_health_reporter_destroy(health->fw_fatal_reporter);
 }
 
-static unsigned long get_next_poll_jiffies(void)
+static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev)
 {
 	unsigned long next;
 
 	get_random_bytes(&next, sizeof(next));
 	next %= HZ;
-	next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
+	next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL));
 
 	return next;
 }
@@ -740,11 +736,12 @@ static void poll_health(struct timer_lis
 		queue_work(health->wq, &health->report_work);
 
 out:
-	mod_timer(&health->timer, get_next_poll_jiffies());
+	mod_timer(&health->timer, get_next_poll_jiffies(dev));
 }
 
 void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 {
+	u64 poll_interval_ms =  mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL);
 	struct mlx5_core_health *health = &dev->priv.health;
 
 	timer_setup(&health->timer, poll_health, 0);
@@ -753,7 +750,7 @@ void mlx5_start_health_poll(struct mlx5_
 	health->health = &dev->iseg->health;
 	health->health_counter = &dev->iseg->health_counter;
 
-	health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL);
+	health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
 	add_timer(&health->timer);
 }
 
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
@@ -13,7 +13,17 @@ static const u32 tout_def_sw_val[MAX_TIM
 	[MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000,
 	[MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2,
 	[MLX5_TO_FW_INIT_MS] = 2000,
-	[MLX5_TO_CMD_MS] = 60000
+	[MLX5_TO_CMD_MS] = 60000,
+	[MLX5_TO_PCI_TOGGLE_MS] =  2000,
+	[MLX5_TO_HEALTH_POLL_INTERVAL_MS] =  2000,
+	[MLX5_TO_FULL_CRDUMP_MS] = 60000,
+	[MLX5_TO_FW_RESET_MS] = 60000,
+	[MLX5_TO_FLUSH_ON_ERROR_MS] = 2000,
+	[MLX5_TO_PCI_SYNC_UPDATE_MS] = 5000,
+	[MLX5_TO_TEARDOWN_MS] = 3000,
+	[MLX5_TO_FSM_REACTIVATE_MS] = 5000,
+	[MLX5_TO_RECLAIM_PAGES_MS] = 5000,
+	[MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000
 };
 
 static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type)
@@ -94,3 +104,59 @@ u64 _mlx5_tout_ms(struct mlx5_core_dev *
 {
 	return dev->timeouts->to[type];
 }
+
+#define MLX5_TIMEOUT_QUERY(fld, reg_out) \
+	({ \
+	struct mlx5_ifc_default_timeout_bits *time_field; \
+	u32 to_multi, to_value; \
+	u64 to_val_ms; \
+	\
+	time_field = MLX5_ADDR_OF(dtor_reg, reg_out, fld); \
+	to_multi = MLX5_GET(default_timeout, time_field, to_multiplier); \
+	to_value = MLX5_GET(default_timeout, time_field, to_value); \
+	to_val_ms = tout_convert_reg_field_to_ms(to_multi, to_value); \
+	to_val_ms; \
+	})
+
+#define MLX5_TIMEOUT_FILL(fld, reg_out, dev, to_type, to_extra) \
+	({ \
+	u64 fw_to = MLX5_TIMEOUT_QUERY(fld, reg_out); \
+	tout_set(dev, fw_to + (to_extra), to_type); \
+	fw_to; \
+	})
+
+static int tout_query_dtor(struct mlx5_core_dev *dev)
+{
+	u64 pcie_toggle_to_val, tear_down_to_val;
+	u32 out[MLX5_ST_SZ_DW(dtor_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(dtor_reg)] = {};
+	int err;
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_DTOR, 0, 0);
+	if (err)
+		return err;
+
+	pcie_toggle_to_val = MLX5_TIMEOUT_FILL(pcie_toggle_to, out, dev, MLX5_TO_PCI_TOGGLE_MS, 0);
+	MLX5_TIMEOUT_FILL(fw_reset_to, out, dev, MLX5_TO_FW_RESET_MS, pcie_toggle_to_val);
+
+	tear_down_to_val = MLX5_TIMEOUT_FILL(tear_down_to, out, dev, MLX5_TO_TEARDOWN_MS, 0);
+	MLX5_TIMEOUT_FILL(pci_sync_update_to, out, dev, MLX5_TO_PCI_SYNC_UPDATE_MS,
+			  tear_down_to_val);
+
+	MLX5_TIMEOUT_FILL(health_poll_to, out, dev, MLX5_TO_HEALTH_POLL_INTERVAL_MS, 0);
+	MLX5_TIMEOUT_FILL(full_crdump_to, out, dev, MLX5_TO_FULL_CRDUMP_MS, 0);
+	MLX5_TIMEOUT_FILL(flush_on_err_to, out, dev, MLX5_TO_FLUSH_ON_ERROR_MS, 0);
+	MLX5_TIMEOUT_FILL(fsm_reactivate_to, out, dev, MLX5_TO_FSM_REACTIVATE_MS, 0);
+	MLX5_TIMEOUT_FILL(reclaim_pages_to, out, dev, MLX5_TO_RECLAIM_PAGES_MS, 0);
+	MLX5_TIMEOUT_FILL(reclaim_vfs_pages_to, out, dev, MLX5_TO_RECLAIM_VFS_PAGES_MS, 0);
+
+	return 0;
+}
+
+int mlx5_tout_query_dtor(struct mlx5_core_dev *dev)
+{
+	if (tout_is_supported(dev))
+		return tout_query_dtor(dev);
+
+	return 0;
+}
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
@@ -14,6 +14,18 @@ enum mlx5_timeouts_types {
 	MLX5_TO_FW_INIT_MS,
 	MLX5_TO_CMD_MS,
 
+	/* DTOR timeouts */
+	MLX5_TO_PCI_TOGGLE_MS,
+	MLX5_TO_HEALTH_POLL_INTERVAL_MS,
+	MLX5_TO_FULL_CRDUMP_MS,
+	MLX5_TO_FW_RESET_MS,
+	MLX5_TO_FLUSH_ON_ERROR_MS,
+	MLX5_TO_PCI_SYNC_UPDATE_MS,
+	MLX5_TO_TEARDOWN_MS,
+	MLX5_TO_FSM_REACTIVATE_MS,
+	MLX5_TO_RECLAIM_PAGES_MS,
+	MLX5_TO_RECLAIM_VFS_PAGES_MS,
+
 	MAX_TIMEOUT_TYPES
 };
 
@@ -21,6 +33,7 @@ struct mlx5_core_dev;
 int mlx5_tout_init(struct mlx5_core_dev *dev);
 void mlx5_tout_cleanup(struct mlx5_core_dev *dev);
 void mlx5_tout_query_iseg(struct mlx5_core_dev *dev);
+int mlx5_tout_query_dtor(struct mlx5_core_dev *dev);
 u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type);
 
 #define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS)
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1020,6 +1020,12 @@ static int mlx5_function_setup(struct ml
 		goto err_disable_hca;
 	}
 
+	err = mlx5_tout_query_dtor(dev);
+	if (err) {
+		mlx5_core_err(dev, "failed to read dtor\n");
+		goto reclaim_boot_pages;
+	}
+
 	err = set_hca_ctrl(dev);
 	if (err) {
 		mlx5_core_err(dev, "set_hca_ctrl failed\n");
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -38,6 +38,7 @@
 #include <linux/xarray.h>
 #include "mlx5_core.h"
 #include "lib/eq.h"
+#include "lib/tout.h"
 
 enum {
 	MLX5_PAGES_CANT_GIVE	= 0,
@@ -65,11 +66,6 @@ struct fw_page {
 };
 
 enum {
-	MAX_RECLAIM_TIME_MSECS	= 5000,
-	MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60,
-};
-
-enum {
 	MLX5_MAX_RECLAIM_TIME_MILI	= 5000,
 	MLX5_NUM_4K_IN_PAGE		= PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
 };
@@ -641,7 +637,8 @@ static int optimal_reclaimed_pages(void)
 static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev,
 				   struct rb_root *root, u16 func_id)
 {
-	unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
+	u64 recl_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_PAGES));
+	unsigned long end = jiffies + recl_pages_to_jiffies;
 
 	while (!RB_EMPTY_ROOT(root)) {
 		int nclaimed;
@@ -656,7 +653,7 @@ static int mlx5_reclaim_root_pages(struc
 		}
 
 		if (nclaimed)
-			end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
+			end = jiffies + recl_pages_to_jiffies;
 
 		if (time_after(jiffies, end)) {
 			mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
@@ -727,7 +724,8 @@ void mlx5_pagealloc_stop(struct mlx5_cor
 
 int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages)
 {
-	unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
+	u64 recl_vf_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_VFS_PAGES));
+	unsigned long end = jiffies + recl_vf_pages_to_jiffies;
 	int prev_pages = *pages;
 
 	/* In case of internal error we will free the pages manually later */
@@ -743,7 +741,7 @@ int mlx5_wait_for_pages(struct mlx5_core
 			return -ETIMEDOUT;
 		}
 		if (*pages < prev_pages) {
-			end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
+			end = jiffies + recl_vf_pages_to_jiffies;
 			prev_pages = *pages;
 		}
 		msleep(50);