Jiri Slaby 7aae47
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Jiri Slaby 7aae47
Date: Sat, 1 Apr 2023 19:59:48 +0200
Jiri Slaby 7aae47
Subject: [PATCH] spi: fsl-cpm: Use 16 bit mode for large transfers with even
Jiri Slaby 7aae47
 size
Jiri Slaby 7aae47
References: bsc#1012628
Jiri Slaby 7aae47
Patch-mainline: 6.3.3
Jiri Slaby 7aae47
Git-commit: fc96ec826bced75cc6b9c07a4ac44bbf651337ab
Jiri Slaby 7aae47
Jiri Slaby 7aae47
commit fc96ec826bced75cc6b9c07a4ac44bbf651337ab upstream.
Jiri Slaby 7aae47
Jiri Slaby 7aae47
On CPM, the RISC core is a lot more efficiant when doing transfers
Jiri Slaby 7aae47
in 16-bits chunks than in 8-bits chunks, but unfortunately the
Jiri Slaby 7aae47
words need to be byte swapped as seen in a previous commit.
Jiri Slaby 7aae47
Jiri Slaby 7aae47
So, for large tranfers with an even size, allocate a temporary tx
Jiri Slaby 7aae47
buffer and byte-swap data before and after transfer.
Jiri Slaby 7aae47
Jiri Slaby 7aae47
This change allows setting higher speed for transfer. For instance
Jiri Slaby 7aae47
on an MPC 8xx (CPM1 comms RISC processor), the documentation tells
Jiri Slaby 7aae47
that transfer in byte mode at 1 kbit/s uses 0.200% of CPM load
Jiri Slaby 7aae47
at 25 MHz while a word transfer at the same speed uses 0.032%
Jiri Slaby 7aae47
of CPM load. This means the speed can be 6 times higher in
Jiri Slaby 7aae47
word mode for the same CPM load.
Jiri Slaby 7aae47
Jiri Slaby 7aae47
For the time being, only do it on CPM1 as there must be a
Jiri Slaby 7aae47
trade-off between the CPM load reduction and the CPU load required
Jiri Slaby 7aae47
to byte swap the data.
Jiri Slaby 7aae47
Jiri Slaby 7aae47
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Jiri Slaby 7aae47
Link: https://lore.kernel.org/r/f2e981f20f92dd28983c3949702a09248c23845c.1680371809.git.christophe.leroy@csgroup.eu
Jiri Slaby 7aae47
Signed-off-by: Mark Brown <broonie@kernel.org>
Jiri Slaby 7aae47
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Jiri Slaby 7aae47
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Jiri Slaby 7aae47
---
Jiri Slaby 7aae47
 drivers/spi/spi-fsl-cpm.c | 23 +++++++++++++++++++++++
Jiri Slaby 7aae47
 drivers/spi/spi-fsl-spi.c |  3 +++
Jiri Slaby 7aae47
 2 files changed, 26 insertions(+)
Jiri Slaby 7aae47
Jiri Slaby 7aae47
diff --git a/drivers/spi/spi-fsl-cpm.c b/drivers/spi/spi-fsl-cpm.c
Jiri Slaby 7aae47
index 17a44d4f..38452089 100644
Jiri Slaby 7aae47
--- a/drivers/spi/spi-fsl-cpm.c
Jiri Slaby 7aae47
+++ b/drivers/spi/spi-fsl-cpm.c
Jiri Slaby 7aae47
@@ -21,6 +21,7 @@
Jiri Slaby 7aae47
 #include <linux/spi/spi.h>
Jiri Slaby 7aae47
 #include <linux/types.h>
Jiri Slaby 7aae47
 #include <linux/platform_device.h>
Jiri Slaby 7aae47
+#include <linux/byteorder/generic.h>
Jiri Slaby 7aae47
 
Jiri Slaby 7aae47
 #include "spi-fsl-cpm.h"
Jiri Slaby 7aae47
 #include "spi-fsl-lib.h"
Jiri Slaby 7aae47
@@ -120,6 +121,21 @@ int fsl_spi_cpm_bufs(struct mpc8xxx_spi *mspi,
Jiri Slaby 7aae47
 		mspi->rx_dma = mspi->dma_dummy_rx;
Jiri Slaby 7aae47
 		mspi->map_rx_dma = 0;
Jiri Slaby 7aae47
 	}
Jiri Slaby 7aae47
+	if (t->bits_per_word == 16 && t->tx_buf) {
Jiri Slaby 7aae47
+		const u16 *src = t->tx_buf;
Jiri Slaby 7aae47
+		u16 *dst;
Jiri Slaby 7aae47
+		int i;
Jiri Slaby 7aae47
+
Jiri Slaby 7aae47
+		dst = kmalloc(t->len, GFP_KERNEL);
Jiri Slaby 7aae47
+		if (!dst)
Jiri Slaby 7aae47
+			return -ENOMEM;
Jiri Slaby 7aae47
+
Jiri Slaby 7aae47
+		for (i = 0; i < t->len >> 1; i++)
Jiri Slaby 7aae47
+			dst[i] = cpu_to_le16p(src + i);
Jiri Slaby 7aae47
+
Jiri Slaby 7aae47
+		mspi->tx = dst;
Jiri Slaby 7aae47
+		mspi->map_tx_dma = 1;
Jiri Slaby 7aae47
+	}
Jiri Slaby 7aae47
 
Jiri Slaby 7aae47
 	if (mspi->map_tx_dma) {
Jiri Slaby 7aae47
 		void *nonconst_tx = (void *)mspi->tx; /* shut up gcc */
Jiri Slaby 7aae47
@@ -173,6 +189,13 @@ void fsl_spi_cpm_bufs_complete(struct mpc8xxx_spi *mspi)
Jiri Slaby 7aae47
 	if (mspi->map_rx_dma)
Jiri Slaby 7aae47
 		dma_unmap_single(dev, mspi->rx_dma, t->len, DMA_FROM_DEVICE);
Jiri Slaby 7aae47
 	mspi->xfer_in_progress = NULL;
Jiri Slaby 7aae47
+
Jiri Slaby 7aae47
+	if (t->bits_per_word == 16 && t->rx_buf) {
Jiri Slaby 7aae47
+		int i;
Jiri Slaby 7aae47
+
Jiri Slaby 7aae47
+		for (i = 0; i < t->len; i += 2)
Jiri Slaby 7aae47
+			le16_to_cpus(t->rx_buf + i);
Jiri Slaby 7aae47
+	}
Jiri Slaby 7aae47
 }
Jiri Slaby 7aae47
 EXPORT_SYMBOL_GPL(fsl_spi_cpm_bufs_complete);
Jiri Slaby 7aae47
 
Jiri Slaby 7aae47
diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
Jiri Slaby 7aae47
index 7e0aca62..b14f430a 100644
Jiri Slaby 7aae47
--- a/drivers/spi/spi-fsl-spi.c
Jiri Slaby 7aae47
+++ b/drivers/spi/spi-fsl-spi.c
Jiri Slaby 7aae47
@@ -351,6 +351,9 @@ static int fsl_spi_prepare_message(struct spi_controller *ctlr,
Jiri Slaby 7aae47
 				return -EINVAL;
Jiri Slaby 7aae47
 			if (t->bits_per_word == 16 || t->bits_per_word == 32)
Jiri Slaby 7aae47
 				t->bits_per_word = 8; /* pretend its 8 bits */
Jiri Slaby 7aae47
+			if (t->bits_per_word == 8 && t->len >= 256 &&
Jiri Slaby 7aae47
+			    (mpc8xxx_spi->flags & SPI_CPM1))
Jiri Slaby 7aae47
+				t->bits_per_word = 16;
Jiri Slaby 7aae47
 		}
Jiri Slaby 7aae47
 	}
Jiri Slaby 7aae47
 	return fsl_spi_setup_transfer(m->spi, first);
Jiri Slaby 7aae47
-- 
Jiri Slaby 7aae47
2.35.3
Jiri Slaby 7aae47