Blob Blame History Raw
From: Niklas Cassel <niklas.cassel@axis.com>
Subject: net: stmmac: fix broken dma_interrupt handling for multi-queues
Patch-mainline: v4.16-rc1
Git-commit: 5a6a0445d1edb28fc89fd12b49cda2d5114e2665
References: git-fixes

There is nothing that says that number of TX queues == number of RX
queues. E.g. the ARTPEC-6 SoC has 2 TX queues and 1 RX queue.

This code is obviously wrong:
for (chan = 0; chan < tx_channel_count; chan++) {
    struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];

priv->rx_queue has size MTL_MAX_RX_QUEUES, so this will send an
uninitialized napi_struct to __napi_schedule(), causing us to
crash in net_rx_action(), because napi_struct->poll is zero.

[12846.759880] Unable to handle kernel NULL pointer dereference at virtual address 00000000
[12846.768014] pgd = (ptrval)
[12846.770742] [00000000] *pgd=39ec7831, *pte=00000000, *ppte=00000000
[12846.777023] Internal error: Oops: 80000007 [#1] PREEMPT SMP ARM
[12846.782942] Modules linked in:
[12846.785998] CPU: 0 PID: 161 Comm: dropbear Not tainted 4.15.0-rc2-00285-gf5fb5f2f39a7 #36
[12846.794177] Hardware name: Axis ARTPEC-6 Platform
[12846.798879] task: (ptrval) task.stack: (ptrval)
[12846.803407] PC is at 0x0
[12846.805942] LR is at net_rx_action+0x274/0x43c
[12846.810383] pc : [<00000000>]    lr : [<80bff064>]    psr: 200e0113
[12846.816648] sp : b90d9ae8  ip : b90d9ae8  fp : b90d9b44
[12846.821871] r10: 00000008  r9 : 0013250e  r8 : 00000100
[12846.827094] r7 : 0000012c  r6 : 00000000  r5 : 00000001  r4 : bac84900
[12846.833619] r3 : 00000000  r2 : b90d9b08  r1 : 00000000  r0 : bac84900

Since each DMA channel can be used for rx and tx simultaneously,
the current code should probably be rewritten so that napi_struct is
embedded in a new struct stmmac_channel.
That way, stmmac_poll() can call stmmac_tx_clean() on just the tx queue
where we got the IRQ, instead of looping through all tx queues.
This is also how the xgbe driver does it (another driver for this IP).

Fixes: c22a3f48ef99 ("net: stmmac: adding multiple napi mechanism")
Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Denis Kirjanov <dkirjanov@suse.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |   54 ++++++++++++++++++----
 1 file changed, 46 insertions(+), 8 deletions(-)

--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1987,22 +1987,60 @@ static void stmmac_set_dma_operation_mod
 static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 {
 	u32 tx_channel_count = priv->plat->tx_queues_to_use;
-	int status;
+	u32 rx_channel_count = priv->plat->rx_queues_to_use;
+	u32 channels_to_check = tx_channel_count > rx_channel_count ?
+		tx_channel_count : rx_channel_count;
 	u32 chan;
+	bool poll_scheduled = false;
+	int status[channels_to_check];
 
-	for (chan = 0; chan < tx_channel_count; chan++) {
-		struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
+	/* Each DMA channel can be used for rx and tx simultaneously, yet
+	 * napi_struct is embedded in struct stmmac_rx_queue rather than in a
+	 * stmmac_channel struct.
+	 * Because of this, stmmac_poll currently checks (and possibly wakes)
+	 * all tx queues rather than just a single tx queue.
+	 */
+	for (chan = 0; chan < channels_to_check; chan++)
+		status[chan] = priv->hw->dma->dma_interrupt(priv->ioaddr,
+				&priv->xstats,
+				chan);
+
+	for (chan = 0; chan < rx_channel_count; chan++) {
+		if (likely(status[chan] & handle_rx)) {
+			struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
 
-		status = priv->hw->dma->dma_interrupt(priv->ioaddr,
-						      &priv->xstats, chan);
-		if (likely((status & handle_rx)) || (status & handle_tx)) {
 			if (likely(napi_schedule_prep(&rx_q->napi))) {
 				stmmac_disable_dma_irq(priv, chan);
 				__napi_schedule(&rx_q->napi);
+				poll_scheduled = true;
 			}
 		}
+	}
 
-		if (unlikely(status & tx_hard_error_bump_tc)) {
+	/* If we scheduled poll, we already know that tx queues will be checked.
+	 * If we didn't schedule poll, see if any DMA channel (used by tx) has a
+	 * completed transmission, if so, call stmmac_poll (once).
+	 */
+	if (!poll_scheduled) {
+		for (chan = 0; chan < tx_channel_count; chan++) {
+			if (status[chan] & handle_tx) {
+				/* It doesn't matter what rx queue we choose
+				 * here. We use 0 since it always exists.
+				 */
+				struct stmmac_rx_queue *rx_q =
+					&priv->rx_queue[0];
+
+				if (likely(napi_schedule_prep(&rx_q->napi))) {
+					stmmac_disable_dma_irq(priv, chan);
+					__napi_schedule(&rx_q->napi);
+				}
+				break;
+			}
+		}
+	}
+
+	for (chan = 0; chan < tx_channel_count; chan++) {
+		if (unlikely(status[chan] & tx_hard_error_bump_tc)) {
 			/* Try to bump up the dma threshold on this failure */
 			if (unlikely(priv->xstats.threshold != SF_DMA_MODE) &&
 			    (tc <= 256)) {
@@ -2019,7 +2057,7 @@ static void stmmac_dma_interrupt(struct
 								    chan);
 				priv->xstats.threshold = tc;
 			}
-		} else if (unlikely(status == tx_hard_error)) {
+		} else if (unlikely(status[chan] == tx_hard_error)) {
 			stmmac_tx_err(priv, chan);
 		}
 	}