From b1f06bf0a3127f5bbcbc7eca9ae69b0af96dcd47 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Thu, 5 Sep 2019 05:15:42 -0400
Subject: [PATCH 09/10] forcedeth: use per cpu to collect xmit/recv statistics
Git-commit: f4b633b911fd3b4cbe1dc065e8fb064078d0889d
Patch-mainline: v5.3
References: git-fixes
When testing with a background iperf pushing 1Gbit/sec traffic and running
both ifconfig and netstat to collect statistics, some deadlocks occurred.
Ifconfig and netstat will call nv_get_stats64 to get software xmit/recv
statistics. In the commit f5d827aece36 ("forcedeth: implement
ndo_get_stats64() API"), the normal tx/rx variables is to collect tx/rx
statistics. The fix is to replace normal tx/rx variables with per
cpu 64-bit variable to collect xmit/recv statistics. The per cpu variable
will avoid deadlocks and provide fast efficient statistics updates.
In nv_probe, the per cpu variable is initialized. In nv_remove, this
per cpu variable is freed.
In xmit/recv process, this per cpu variable will be updated.
In nv_get_stats64, this per cpu variable on each cpu is added up. Then
the driver can get xmit/recv packets statistics.
A test runs for several days with this commit, the deadlocks disappear
and the performance is better.
Tested:
- iperf SMP x86_64 ->
Client connecting to 1.1.1.108, TCP port 5001
TCP window size: 85.0 KByte (default)
------------------------------------------------------------
[ 3] local 1.1.1.105 port 38888 connected with 1.1.1.108 port 5001
[ ID] Interval Transfer Bandwidth
[ 3] 0.0-10.0 sec 1.10 GBytes 943 Mbits/sec
ifconfig results:
enp0s9 Link encap:Ethernet HWaddr 00:21:28:6f:de:0f
inet addr:1.1.1.105 Bcast:0.0.0.0 Mask:255.255.255.0
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:5774764531 errors:0 dropped:0 overruns:0 frame:0
TX packets:633534193 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:7646159340904 (7.6 TB) TX bytes:11425340407722 (11.4 TB)
netstat results:
Kernel Interface table
Iface MTU Met RX-OK RX-ERR RX-DRP RX-OVR TX-OK TX-ERR TX-DRP TX-OVR Flg
...
enp0s9 1500 0 5774764531 0 0 0 633534193 0 0 0 BMRU
...
Fixes: f5d827aece36 ("forcedeth: implement ndo_get_stats64() API")
CC: Joe Jin <joe.jin@oracle.com>
CC: JUNXIAO_BI <junxiao.bi@oracle.com>
Reported-and-tested-by: Nan san <nan.1986san@gmail.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Denis Kirjanov <denis.kirjanov@suse.com>
---
drivers/net/ethernet/nvidia/forcedeth.c | 143 ++++++++++++++++++++++----------
1 file changed, 99 insertions(+), 44 deletions(-)
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index aa912f43e15f..c73380efe11f 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -725,6 +725,21 @@ struct nv_skb_map {
struct nv_skb_map *next_tx_ctx;
};
+struct nv_txrx_stats {
+ u64 stat_rx_packets;
+ u64 stat_rx_bytes; /* not always available in HW */
+ u64 stat_rx_missed_errors;
+ u64 stat_rx_dropped;
+ u64 stat_tx_packets; /* not always available in HW */
+ u64 stat_tx_bytes;
+ u64 stat_tx_dropped;
+};
+
+#define nv_txrx_stats_inc(member) \
+ __this_cpu_inc(np->txrx_stats->member)
+#define nv_txrx_stats_add(member, count) \
+ __this_cpu_add(np->txrx_stats->member, (count))
+
/*
* SMP locking:
* All hardware access under netdev_priv(dev)->lock, except the performance
@@ -809,10 +824,7 @@ struct fe_priv {
/* RX software stats */
struct u64_stats_sync swstats_rx_syncp;
- u64 stat_rx_packets;
- u64 stat_rx_bytes; /* not always available in HW */
- u64 stat_rx_missed_errors;
- u64 stat_rx_dropped;
+ struct nv_txrx_stats __percpu *txrx_stats;
/* media detection workaround.
* Locking: Within irq hander or disable_irq+spin_lock(&np->lock);
@@ -838,9 +850,6 @@ struct fe_priv {
/* TX software stats */
struct u64_stats_sync swstats_tx_syncp;
- u64 stat_tx_packets; /* not always available in HW */
- u64 stat_tx_bytes;
- u64 stat_tx_dropped;
/* msi/msi-x fields */
u32 msi_flags;
@@ -1727,6 +1736,39 @@ static void nv_update_stats(struct net_device *dev)
}
}
+static void nv_get_stats(int cpu, struct fe_priv *np,
+ struct rtnl_link_stats64 *storage)
+{
+ struct nv_txrx_stats *src = per_cpu_ptr(np->txrx_stats, cpu);
+ unsigned int syncp_start;
+ u64 rx_packets, rx_bytes, rx_dropped, rx_missed_errors;
+ u64 tx_packets, tx_bytes, tx_dropped;
+
+ do {
+ syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp);
+ rx_packets = src->stat_rx_packets;
+ rx_bytes = src->stat_rx_bytes;
+ rx_dropped = src->stat_rx_dropped;
+ rx_missed_errors = src->stat_rx_missed_errors;
+ } while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start));
+
+ storage->rx_packets += rx_packets;
+ storage->rx_bytes += rx_bytes;
+ storage->rx_dropped += rx_dropped;
+ storage->rx_missed_errors += rx_missed_errors;
+
+ do {
+ syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp);
+ tx_packets = src->stat_tx_packets;
+ tx_bytes = src->stat_tx_bytes;
+ tx_dropped = src->stat_tx_dropped;
+ } while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start));
+
+ storage->tx_packets += tx_packets;
+ storage->tx_bytes += tx_bytes;
+ storage->tx_dropped += tx_dropped;
+}
+
/*
* nv_get_stats64: dev->ndo_get_stats64 function
* Get latest stats value from the nic.
@@ -1739,7 +1781,7 @@ nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage)
__releases(&netdev_priv(dev)->hwstats_lock)
{
struct fe_priv *np = netdev_priv(dev);
- unsigned int syncp_start;
+ int cpu;
/*
* Note: because HW stats are not always available and for
@@ -1752,20 +1794,8 @@ nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage)
*/
/* software stats */
- do {
- syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp);
- storage->rx_packets = np->stat_rx_packets;
- storage->rx_bytes = np->stat_rx_bytes;
- storage->rx_dropped = np->stat_rx_dropped;
- storage->rx_missed_errors = np->stat_rx_missed_errors;
- } while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start));
-
- do {
- syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp);
- storage->tx_packets = np->stat_tx_packets;
- storage->tx_bytes = np->stat_tx_bytes;
- storage->tx_dropped = np->stat_tx_dropped;
- } while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start));
+ for_each_online_cpu(cpu)
+ nv_get_stats(cpu, np, storage);
/* If the nic supports hw counters then retrieve latest values */
if (np->driver_data & DEV_HAS_STATISTICS_V123) {
@@ -1833,7 +1863,7 @@ static int nv_alloc_rx(struct net_device *dev)
} else {
packet_dropped:
u64_stats_update_begin(&np->swstats_rx_syncp);
- np->stat_rx_dropped++;
+ nv_txrx_stats_inc(stat_rx_dropped);
u64_stats_update_end(&np->swstats_rx_syncp);
return 1;
}
@@ -1875,7 +1905,7 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
} else {
packet_dropped:
u64_stats_update_begin(&np->swstats_rx_syncp);
- np->stat_rx_dropped++;
+ nv_txrx_stats_inc(stat_rx_dropped);
u64_stats_update_end(&np->swstats_rx_syncp);
return 1;
}
@@ -2016,7 +2046,7 @@ static void nv_drain_tx(struct net_device *dev)
}
if (nv_release_txskb(np, &np->tx_skb[i])) {
u64_stats_update_begin(&np->swstats_tx_syncp);
- np->stat_tx_dropped++;
+ nv_txrx_stats_inc(stat_tx_dropped);
u64_stats_update_end(&np->swstats_tx_syncp);
}
np->tx_skb[i].dma = 0;
@@ -2231,7 +2261,7 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
/* on DMA mapping error - drop the packet */
dev_kfree_skb_any(skb);
u64_stats_update_begin(&np->swstats_tx_syncp);
- np->stat_tx_dropped++;
+ nv_txrx_stats_inc(stat_tx_dropped);
u64_stats_update_end(&np->swstats_tx_syncp);
return NETDEV_TX_OK;
}
@@ -2278,7 +2308,7 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
dev_kfree_skb_any(skb);
np->put_tx_ctx = start_tx_ctx;
u64_stats_update_begin(&np->swstats_tx_syncp);
- np->stat_tx_dropped++;
+ nv_txrx_stats_inc(stat_tx_dropped);
u64_stats_update_end(&np->swstats_tx_syncp);
return NETDEV_TX_OK;
}
@@ -2380,7 +2410,7 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
/* on DMA mapping error - drop the packet */
dev_kfree_skb_any(skb);
u64_stats_update_begin(&np->swstats_tx_syncp);
- np->stat_tx_dropped++;
+ nv_txrx_stats_inc(stat_tx_dropped);
u64_stats_update_end(&np->swstats_tx_syncp);
return NETDEV_TX_OK;
}
@@ -2428,7 +2458,7 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
dev_kfree_skb_any(skb);
np->put_tx_ctx = start_tx_ctx;
u64_stats_update_begin(&np->swstats_tx_syncp);
- np->stat_tx_dropped++;
+ nv_txrx_stats_inc(stat_tx_dropped);
u64_stats_update_end(&np->swstats_tx_syncp);
return NETDEV_TX_OK;
}
@@ -2547,9 +2577,12 @@ static int nv_tx_done(struct net_device *dev, int limit)
&& !(flags & NV_TX_RETRYCOUNT_MASK))
nv_legacybackoff_reseed(dev);
} else {
+ unsigned int len;
+
u64_stats_update_begin(&np->swstats_tx_syncp);
- np->stat_tx_packets++;
- np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+ nv_txrx_stats_inc(stat_tx_packets);
+ len = np->get_tx_ctx->skb->len;
+ nv_txrx_stats_add(stat_tx_bytes, len);
u64_stats_update_end(&np->swstats_tx_syncp);
}
bytes_compl += np->get_tx_ctx->skb->len;
@@ -2564,9 +2597,12 @@ static int nv_tx_done(struct net_device *dev, int limit)
&& !(flags & NV_TX2_RETRYCOUNT_MASK))
nv_legacybackoff_reseed(dev);
} else {
+ unsigned int len;
+
u64_stats_update_begin(&np->swstats_tx_syncp);
- np->stat_tx_packets++;
- np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+ nv_txrx_stats_inc(stat_tx_packets);
+ len = np->get_tx_ctx->skb->len;
+ nv_txrx_stats_add(stat_tx_bytes, len);
u64_stats_update_end(&np->swstats_tx_syncp);
}
bytes_compl += np->get_tx_ctx->skb->len;
@@ -2614,9 +2650,12 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit)
nv_legacybackoff_reseed(dev);
}
} else {
+ unsigned int len;
+
u64_stats_update_begin(&np->swstats_tx_syncp);
- np->stat_tx_packets++;
- np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+ nv_txrx_stats_inc(stat_tx_packets);
+ len = np->get_tx_ctx->skb->len;
+ nv_txrx_stats_add(stat_tx_bytes, len);
u64_stats_update_end(&np->swstats_tx_syncp);
}
@@ -2793,6 +2832,15 @@ static int nv_getlen(struct net_device *dev, void *packet, int datalen)
}
}
+static void rx_missing_handler(u32 flags, struct fe_priv *np)
+{
+ if (flags & NV_RX_MISSEDFRAME) {
+ u64_stats_update_begin(&np->swstats_rx_syncp);
+ nv_txrx_stats_inc(stat_rx_missed_errors);
+ u64_stats_update_end(&np->swstats_rx_syncp);
+ }
+}
+
static int nv_rx_process(struct net_device *dev, int limit)
{
struct fe_priv *np = netdev_priv(dev);
@@ -2835,11 +2883,7 @@ static int nv_rx_process(struct net_device *dev, int limit)
}
/* the rest are hard errors */
else {
- if (flags & NV_RX_MISSEDFRAME) {
- u64_stats_update_begin(&np->swstats_rx_syncp);
- np->stat_rx_missed_errors++;
- u64_stats_update_end(&np->swstats_rx_syncp);
- }
+ rx_missing_handler(flags, np);
dev_kfree_skb(skb);
goto next_pkt;
}
@@ -2883,8 +2927,8 @@ static int nv_rx_process(struct net_device *dev, int limit)
skb->protocol = eth_type_trans(skb, dev);
napi_gro_receive(&np->napi, skb);
u64_stats_update_begin(&np->swstats_rx_syncp);
- np->stat_rx_packets++;
- np->stat_rx_bytes += len;
+ nv_txrx_stats_inc(stat_rx_packets);
+ nv_txrx_stats_add(stat_rx_bytes, len);
u64_stats_update_end(&np->swstats_rx_syncp);
next_pkt:
if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
@@ -2969,8 +3013,8 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
}
napi_gro_receive(&np->napi, skb);
u64_stats_update_begin(&np->swstats_rx_syncp);
- np->stat_rx_packets++;
- np->stat_rx_bytes += len;
+ nv_txrx_stats_inc(stat_rx_packets);
+ nv_txrx_stats_add(stat_rx_bytes, len);
u64_stats_update_end(&np->swstats_rx_syncp);
} else {
dev_kfree_skb(skb);
@@ -5626,6 +5670,12 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
SET_NETDEV_DEV(dev, &pci_dev->dev);
u64_stats_init(&np->swstats_rx_syncp);
u64_stats_init(&np->swstats_tx_syncp);
+ np->txrx_stats = alloc_percpu(struct nv_txrx_stats);
+ if (!np->txrx_stats) {
+ pr_err("np->txrx_stats, alloc memory error.\n");
+ err = -ENOMEM;
+ goto out_alloc_percpu;
+ }
setup_timer(&np->oom_kick, nv_do_rx_refill, (unsigned long)dev);
setup_timer(&np->nic_poll, nv_do_nic_poll, (unsigned long)dev);
@@ -6032,6 +6082,8 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
out_disable:
pci_disable_device(pci_dev);
out_free:
+ free_percpu(np->txrx_stats);
+out_alloc_percpu:
free_netdev(dev);
out:
return err;
@@ -6077,6 +6129,9 @@ static void nv_restore_mac_addr(struct pci_dev *pci_dev)
static void nv_remove(struct pci_dev *pci_dev)
{
struct net_device *dev = pci_get_drvdata(pci_dev);
+ struct fe_priv *np = netdev_priv(dev);
+
+ free_percpu(np->txrx_stats);
unregister_netdev(dev);
--
2.16.4