Blob Blame History Raw
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Tue, 9 Jan 2018 16:21:16 +0200
Subject: net/mlx5e: Dump xmit error completions
Patch-mainline: v4.17-rc1
Git-commit: 16cc14d817338fc297970d2d9d146c88ec87474d
References: bsc#1103990 FATE#326006

Monitor and dump xmit error completions. In addition, add err_cqe
counter to track the number of error completion per send queue.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c |    3 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |    2 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    |   19 +++++++++++++++++++
 3 files changed, 24 insertions(+)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -60,6 +60,7 @@ static const struct counter_desc sw_stat
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
@@ -153,6 +154,7 @@ static void mlx5e_grp_sw_update_stats(st
 			s->tx_queue_stopped	+= sq_stats->stopped;
 			s->tx_queue_wake	+= sq_stats->wake;
 			s->tx_queue_dropped	+= sq_stats->dropped;
+			s->tx_cqe_err		+= sq_stats->cqe_err;
 			s->tx_xmit_more		+= sq_stats->xmit_more;
 			s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
 			s->tx_csum_none		+= sq_stats->csum_none;
@@ -1103,6 +1105,7 @@ static const struct counter_desc sq_stat
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
 };
 
 static const struct counter_desc ch_stats_desc[] = {
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -78,6 +78,7 @@ struct mlx5e_sw_stats {
 	u64 tx_queue_wake;
 	u64 tx_queue_dropped;
 	u64 tx_xmit_more;
+	u64 tx_cqe_err;
 	u64 rx_wqe_err;
 	u64 rx_mpwqe_filler;
 	u64 rx_buff_alloc_err;
@@ -197,6 +198,7 @@ struct mlx5e_sq_stats {
 	u64 stopped;
 	u64 wake;
 	u64 dropped;
+	u64 cqe_err;
 };
 
 struct mlx5e_ch_stats {
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -417,6 +417,18 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *s
 	return mlx5e_sq_xmit(sq, skb, wqe, pi);
 }
 
+static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
+				 struct mlx5_err_cqe *err_cqe)
+{
+	u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq);
+
+	netdev_err(sq->channel->netdev,
+		   "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
+		   sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome,
+		   err_cqe->vendor_err_synd);
+	mlx5_dump_err_cqe(sq->cq.mdev, err_cqe);
+}
+
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
 	struct mlx5e_txqsq *sq;
@@ -456,6 +468,13 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *c
 
 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
+		if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) {
+			if (!sq->stats.cqe_err)
+				mlx5e_dump_error_cqe(sq,
+						     (struct mlx5_err_cqe *)cqe);
+			sq->stats.cqe_err++;
+		}
+
 		do {
 			struct mlx5e_tx_wqe_info *wi;
 			struct sk_buff *skb;