Blob Blame History Raw
From: Tony Lu <tony.ly@linux.alibaba.com>
Date: Fri, 17 Sep 2021 16:40:06 +0800
Subject: virtio_net: introduce TX timeout watchdog
Patch-mainline: v5.16-rc1
Git-commit: a520794b063b9567799533c4fec230d860f5c9e5
References: jsc#PED-370

This implements ndo_tx_timeout handler and put this into stats. When
there is something wrong to send out packets, we could notice tx timeout
events and total timeout counter.

We have suffered send timeout issues due to the backends hung. With this,
we can find the details, and collect the counters by monitor systems.

Signed-off-by: Tony Lu <tony.ly@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/virtio_net.c |   22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -80,6 +80,7 @@ struct virtnet_sq_stats {
 	u64 xdp_tx;
 	u64 xdp_tx_drops;
 	u64 kicks;
+	u64 tx_timeouts;
 };
 
 struct virtnet_rq_stats {
@@ -103,6 +104,7 @@ static const struct virtnet_stat_desc vi
 	{ "xdp_tx",		VIRTNET_SQ_STAT(xdp_tx) },
 	{ "xdp_tx_drops",	VIRTNET_SQ_STAT(xdp_tx_drops) },
 	{ "kicks",		VIRTNET_SQ_STAT(kicks) },
+	{ "tx_timeouts",	VIRTNET_SQ_STAT(tx_timeouts) },
 };
 
 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
@@ -1859,7 +1861,7 @@ static void virtnet_stats(struct net_dev
 	int i;
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
-		u64 tpackets, tbytes, rpackets, rbytes, rdrops;
+		u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops;
 		struct receive_queue *rq = &vi->rq[i];
 		struct send_queue *sq = &vi->sq[i];
 
@@ -1867,6 +1869,7 @@ static void virtnet_stats(struct net_dev
 			start = u64_stats_fetch_begin_irq(&sq->stats.syncp);
 			tpackets = sq->stats.packets;
 			tbytes   = sq->stats.bytes;
+			terrors  = sq->stats.tx_timeouts;
 		} while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start));
 
 		do {
@@ -1881,6 +1884,7 @@ static void virtnet_stats(struct net_dev
 		tot->rx_bytes   += rbytes;
 		tot->tx_bytes   += tbytes;
 		tot->rx_dropped += rdrops;
+		tot->tx_errors  += terrors;
 	}
 
 	tot->tx_dropped = dev->stats.tx_dropped;
@@ -2662,6 +2666,21 @@ static int virtnet_set_features(struct n
 	return 0;
 }
 
+static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+	struct virtnet_info *priv = netdev_priv(dev);
+	struct send_queue *sq = &priv->sq[txqueue];
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue);
+
+	u64_stats_update_begin(&sq->stats.syncp);
+	sq->stats.tx_timeouts++;
+	u64_stats_update_end(&sq->stats.syncp);
+
+	netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n",
+		   txqueue, sq->name, sq->vq->index, sq->vq->name,
+		   jiffies_to_usecs(jiffies - txq->trans_start));
+}
+
 static const struct net_device_ops virtnet_netdev = {
 	.ndo_open            = virtnet_open,
 	.ndo_stop   	     = virtnet_close,
@@ -2677,6 +2696,7 @@ static const struct net_device_ops virtn
 	.ndo_features_check	= passthru_features_check,
 	.ndo_get_phys_port_name	= virtnet_get_phys_port_name,
 	.ndo_set_features	= virtnet_set_features,
+	.ndo_tx_timeout		= virtnet_tx_timeout,
 };
 
 static void virtnet_config_changed_work(struct work_struct *work)