Blob Blame History Raw
From: Aya Levin <ayal@mellanox.com>
Date: Tue, 25 Jun 2019 21:42:27 +0300
Subject: net/mlx5e: Report and recover from rx timeout
Patch-mainline: v5.4-rc1
Git-commit: 32c57fb26863b48982e33aa95f3b5b23f24b1feb
References: jsc#SLE-8464

Add support for report and recovery from rx timeout. On driver open we
post NOP work request on the rx channels to trigger napi in order to
fillup the rx rings. In case napi wasn't scheduled due to a lost
interrupt, perform EQ recovery.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/health.h      |    1 
 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c |   32 +++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c        |    1 
 3 files changed, 34 insertions(+)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -19,6 +19,7 @@ int mlx5e_reporter_named_obj_nest_end(st
 int mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
 void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
 void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
 
 #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
 
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -115,6 +115,38 @@ void mlx5e_reporter_icosq_cqe_err(struct
 	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
 }
 
+static int mlx5e_rx_reporter_timeout_recover(void *ctx)
+{
+	struct mlx5e_icosq *icosq;
+	struct mlx5_eq_comp *eq;
+	struct mlx5e_rq *rq;
+	int err;
+
+	rq = ctx;
+	icosq = &rq->channel->icosq;
+	eq = rq->cq.mcq.eq;
+	err = mlx5e_health_channel_eq_recover(eq, rq->channel);
+	if (err)
+		clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
+
+	return err;
+}
+
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
+{
+	struct mlx5e_icosq *icosq = &rq->channel->icosq;
+	struct mlx5e_priv *priv = rq->channel->priv;
+	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+	struct mlx5e_err_ctx err_ctx = {};
+
+	err_ctx.ctx = rq;
+	err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
+	sprintf(err_str, "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n",
+		icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn);
+
+	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
 static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
 {
 	return err_ctx->recover(err_ctx->ctx);
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -808,6 +808,7 @@ int mlx5e_wait_for_min_rx_wqes(struct ml
 	netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
 		    c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
 
+	mlx5e_reporter_rx_timeout(rq);
 	return -ETIMEDOUT;
 }