diff mbox series

[2/4] spi: axi-spi-engine: don't repeat mode config for offload

Message ID 20250428-adi-main-v1-2-4b8a1b88a212@baylibre.com
State New
Headers show
Series spi: axi-spi-engine: offload instruction optimization | expand

Commit Message

David Lechner April 28, 2025, 8:58 p.m. UTC
Add an optimization to avoid repeating the config instruction in each
SPI message when using SPI offloading. Instead, the instruction is
run once when the SPI offload trigger is enabled.

This is done to allow higher sample rates for ADCs using this SPI
controller.

Signed-off-by: David Lechner <dlechner@baylibre.com>
---
 drivers/spi/spi-axi-spi-engine.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

Comments

Nuno Sá April 29, 2025, 9:08 a.m. UTC | #1
Hi David,

The whole series LGTM but I do have a small concern (maybe an hypothetical one)
in this one...


On Mon, 2025-04-28 at 15:58 -0500, David Lechner wrote:
> Add an optimization to avoid repeating the config instruction in each
> SPI message when using SPI offloading. Instead, the instruction is
> run once when the SPI offload trigger is enabled.
> 
> This is done to allow higher sample rates for ADCs using this SPI
> controller.
> 
> Signed-off-by: David Lechner <dlechner@baylibre.com>
> ---
>  drivers/spi/spi-axi-spi-engine.c | 33 ++++++++++++++++++++++++++++++---
>  1 file changed, 30 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-
> engine.c
> index
> d040deffa9bb9bdcb67bcc8af0a1cfad2e4f6041..05ef2589f8dc0bdaa1b3bb3a459670d174f8
> 21a2 100644
> --- a/drivers/spi/spi-axi-spi-engine.c
> +++ b/drivers/spi/spi-axi-spi-engine.c
> @@ -141,6 +141,7 @@ struct spi_engine_offload {
>  	struct spi_engine *spi_engine;
>  	unsigned long flags;
>  	unsigned int offload_num;
> +	unsigned int spi_mode_config;
>  };
>  
>  struct spi_engine {
> @@ -284,6 +285,7 @@ static void spi_engine_compile_message(struct spi_message
> *msg, bool dry,
>  {
>  	struct spi_device *spi = msg->spi;
>  	struct spi_controller *host = spi->controller;
> +	struct spi_engine_offload *priv;
>  	struct spi_transfer *xfer;
>  	int clk_div, new_clk_div, inst_ns;
>  	bool keep_cs = false;
> @@ -297,9 +299,18 @@ static void spi_engine_compile_message(struct spi_message
> *msg, bool dry,
>  
>  	clk_div = 1;
>  
> -	spi_engine_program_add_cmd(p, dry,
> -		SPI_ENGINE_CMD_WRITE(SPI_ENGINE_CMD_REG_CONFIG,
> -			spi_engine_get_config(spi)));
> +	/*
> +	 * As an optimization, SPI offload sets once this when the offload is
> +	 * enabled instead of repeating the instruction in each message.
> +	 */
> +	if (msg->offload) {
> +		priv = msg->offload->priv;
> +		priv->spi_mode_config = spi_engine_get_config(spi);
> +	} else {
> +		spi_engine_program_add_cmd(p, dry,
> +			SPI_ENGINE_CMD_WRITE(SPI_ENGINE_CMD_REG_CONFIG,
> +				spi_engine_get_config(spi)));
> +	}
>  
>  	xfer = list_first_entry(&msg->transfers, struct spi_transfer,
> transfer_list);
>  	spi_engine_gen_cs(p, dry, spi, !xfer->cs_off);
> @@ -842,6 +853,22 @@ static int spi_engine_trigger_enable(struct spi_offload
> *offload)
>  	struct spi_engine_offload *priv = offload->priv;
>  	struct spi_engine *spi_engine = priv->spi_engine;
>  	unsigned int reg;
> +	int ret;
> +
> +	writel_relaxed(SPI_ENGINE_CMD_SYNC(0),
> +		spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
> +
> +	writel_relaxed(SPI_ENGINE_CMD_WRITE(SPI_ENGINE_CMD_REG_CONFIG,
> +					    priv->spi_mode_config),
> +		       spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
> +
> +	writel_relaxed(SPI_ENGINE_CMD_SYNC(1),
> +		spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
> +

I would assume that SPI_ENGINE_CMD_SYNC(0) + SPI_ENGINE_CMD_SYNC(1) should be
executed in order by the core? I think all this relaxed API's don't give any
guarantee about store completion so, in theory, we could have SYNC(0) after
SYNC(1). Even the full barrier variants don't guarantee this I believe [1].
There's ioremap_np() variant but likely not supported in many platforms. Doing a
read() before SYNC(1) should be all we need to guarantee proper ordering here.

Or maybe this is all theoretical and not an issue on the platforms this driver
is supported but meh, just raising the possibility.


[1]: https://elixir.bootlin.com/linux/v6.12-rc6/source/Documentation/driver-api/device-io.rst#L299

- Nuno Sá

> +	ret = readl_relaxed_poll_timeout(spi_engine->base +
> SPI_ENGINE_REG_SYNC_ID,
> +					 reg, reg == 1, 1, 1000);
> +	if (ret)
> +		return ret;
>  
>  	reg = readl_relaxed(spi_engine->base +
>  			    SPI_ENGINE_REG_OFFLOAD_CTRL(priv->offload_num));
Nuno Sá April 29, 2025, 3:40 p.m. UTC | #2
On Tue, 2025-04-29 at 10:24 -0500, David Lechner wrote:
> On 4/29/25 4:08 AM, Nuno Sá wrote:
> > Hi David,
> > 
> > The whole series LGTM but I do have a small concern (maybe an hypothetical
> > one)
> > in this one...
> > 
> > 
> > On Mon, 2025-04-28 at 15:58 -0500, David Lechner wrote:
> 
> 
> ...
> 
> > > +
> > > +	writel_relaxed(SPI_ENGINE_CMD_SYNC(0),
> > > +		spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
> > > +
> > > +	writel_relaxed(SPI_ENGINE_CMD_WRITE(SPI_ENGINE_CMD_REG_CONFIG,
> > > +					    priv->spi_mode_config),
> > > +		       spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
> > > +
> > > +	writel_relaxed(SPI_ENGINE_CMD_SYNC(1),
> > > +		spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
> > > +
> > 
> > I would assume that SPI_ENGINE_CMD_SYNC(0) + SPI_ENGINE_CMD_SYNC(1) should
> > be
> > executed in order by the core? I think all this relaxed API's don't give any
> > guarantee about store completion so, in theory, we could have SYNC(0) after
> > SYNC(1). Even the full barrier variants don't guarantee this I believe [1].
> > There's ioremap_np() variant but likely not supported in many platforms.
> > Doing a
> > read() before SYNC(1) should be all we need to guarantee proper ordering
> > here.
> > 
> > Or maybe this is all theoretical and not an issue on the platforms this
> > driver
> > is supported but meh, just raising the possibility.
> > 
> > 
> > [1]:
> > https://elixir.bootlin.com/linux/v6.12-rc6/source/Documentation/driver-api/device-io.rst#L299
> > 
> 
> The way I am reading this, relaxed isn't "no barriers", just "weaker
> barriers".

Yes, sometimes just compiler ones. Bad phrasing from me...

> Quoting from the linked doc:
> 
> 	On architectures that require an expensive barrier for serializing
> 	against DMA, these “relaxed” versions of the MMIO accessors only
> 	serialize against each other, but contain a less expensive barrier
> 	operation.
> 
> So that sounds like we should not have a problem to me. (And if there is a
> problem, then it would affect other parts of the driver, like when we load
> the fifos with tx data in a loop.)

Well that just ensures that they are issued by the order you wrote them but it
does not guarantee that they end up in the same order on the peripheral itself.

Anyways, likely not an issue on the arches we care and as you mentioned it,
there are other places of the driver where this could matter. So, I guess not
material for this series.

- Nuno Sá
diff mbox series

Patch

diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c
index d040deffa9bb9bdcb67bcc8af0a1cfad2e4f6041..05ef2589f8dc0bdaa1b3bb3a459670d174f821a2 100644
--- a/drivers/spi/spi-axi-spi-engine.c
+++ b/drivers/spi/spi-axi-spi-engine.c
@@ -141,6 +141,7 @@  struct spi_engine_offload {
 	struct spi_engine *spi_engine;
 	unsigned long flags;
 	unsigned int offload_num;
+	unsigned int spi_mode_config;
 };
 
 struct spi_engine {
@@ -284,6 +285,7 @@  static void spi_engine_compile_message(struct spi_message *msg, bool dry,
 {
 	struct spi_device *spi = msg->spi;
 	struct spi_controller *host = spi->controller;
+	struct spi_engine_offload *priv;
 	struct spi_transfer *xfer;
 	int clk_div, new_clk_div, inst_ns;
 	bool keep_cs = false;
@@ -297,9 +299,18 @@  static void spi_engine_compile_message(struct spi_message *msg, bool dry,
 
 	clk_div = 1;
 
-	spi_engine_program_add_cmd(p, dry,
-		SPI_ENGINE_CMD_WRITE(SPI_ENGINE_CMD_REG_CONFIG,
-			spi_engine_get_config(spi)));
+	/*
+	 * As an optimization, SPI offload sets once this when the offload is
+	 * enabled instead of repeating the instruction in each message.
+	 */
+	if (msg->offload) {
+		priv = msg->offload->priv;
+		priv->spi_mode_config = spi_engine_get_config(spi);
+	} else {
+		spi_engine_program_add_cmd(p, dry,
+			SPI_ENGINE_CMD_WRITE(SPI_ENGINE_CMD_REG_CONFIG,
+				spi_engine_get_config(spi)));
+	}
 
 	xfer = list_first_entry(&msg->transfers, struct spi_transfer, transfer_list);
 	spi_engine_gen_cs(p, dry, spi, !xfer->cs_off);
@@ -842,6 +853,22 @@  static int spi_engine_trigger_enable(struct spi_offload *offload)
 	struct spi_engine_offload *priv = offload->priv;
 	struct spi_engine *spi_engine = priv->spi_engine;
 	unsigned int reg;
+	int ret;
+
+	writel_relaxed(SPI_ENGINE_CMD_SYNC(0),
+		spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
+
+	writel_relaxed(SPI_ENGINE_CMD_WRITE(SPI_ENGINE_CMD_REG_CONFIG,
+					    priv->spi_mode_config),
+		       spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
+
+	writel_relaxed(SPI_ENGINE_CMD_SYNC(1),
+		spi_engine->base + SPI_ENGINE_REG_CMD_FIFO);
+
+	ret = readl_relaxed_poll_timeout(spi_engine->base + SPI_ENGINE_REG_SYNC_ID,
+					 reg, reg == 1, 1, 1000);
+	if (ret)
+		return ret;
 
 	reg = readl_relaxed(spi_engine->base +
 			    SPI_ENGINE_REG_OFFLOAD_CTRL(priv->offload_num));