diff mbox series

[5.4,40/62] spi: spi-geni-qcom: Fix geni_spi_isr() NULL dereference in timeout case

Message ID 20210115122000.333323971@linuxfoundation.org
State New
Headers show
Series None | expand

Commit Message

Greg Kroah-Hartman Jan. 15, 2021, 12:28 p.m. UTC
From: Douglas Anderson <dianders@chromium.org>

commit 4aa1464acbe3697710279a4bd65cb4801ed30425 upstream.

In commit 7ba9bdcb91f6 ("spi: spi-geni-qcom: Don't keep a local state
variable") we changed handle_fifo_timeout() so that we set
"mas->cur_xfer" to NULL to make absolutely sure that we don't mess
with the buffers from the previous transfer in the timeout case.

Unfortunately, this caused the IRQ handler to dereference NULL in some
cases.  One case:

  CPU0                           CPU1
  ----                           ----
                                 setup_fifo_xfer()
                                  geni_se_setup_m_cmd()
                                 <hardware starts transfer>
                                 <transfer completes in hardware>
                                 <hardware sets M_RX_FIFO_WATERMARK_EN in m_irq>
                                 ...
                                 handle_fifo_timeout()
                                  spin_lock_irq(mas->lock)
                                  mas->cur_xfer = NULL
                                  geni_se_cancel_m_cmd()
                                  spin_unlock_irq(mas->lock)

  geni_spi_isr()
   spin_lock(mas->lock)
   if (m_irq & M_RX_FIFO_WATERMARK_EN)
    geni_spi_handle_rx()
     mas->cur_xfer NULL dereference!

tl;dr: Seriously delayed interrupts for RX/TX can lead to timeout
handling setting mas->cur_xfer to NULL.

Let's check for the NULL transfer in the TX and RX cases and reset the
watermark or clear out the fifo respectively to put the hardware back
into a sane state.

NOTE: things still could get confused if we get timeouts all the way
through handle_fifo_timeout() and then start a new transfer because
interrupts from the old transfer / cancel / abort could still be
pending.  A future patch will help this corner case.

Fixes: 561de45f72bd ("spi: spi-geni-qcom: Add SPI driver support for GENI based QUP")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Link: https://lore.kernel.org/r/20201217142842.v3.1.I99ee04f0cb823415df59bd4f550d6ff5756e43d6@changeid
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/spi/spi-geni-qcom.c |   14 ++++++++++++++
 1 file changed, 14 insertions(+)

Comments

Nathan Chancellor Jan. 16, 2021, 6:48 p.m. UTC | #1
On Fri, Jan 15, 2021 at 01:28:02PM +0100, Greg Kroah-Hartman wrote:
> From: Douglas Anderson <dianders@chromium.org>

> 

> commit 4aa1464acbe3697710279a4bd65cb4801ed30425 upstream.

> 

> In commit 7ba9bdcb91f6 ("spi: spi-geni-qcom: Don't keep a local state

> variable") we changed handle_fifo_timeout() so that we set

> "mas->cur_xfer" to NULL to make absolutely sure that we don't mess

> with the buffers from the previous transfer in the timeout case.

> 

> Unfortunately, this caused the IRQ handler to dereference NULL in some

> cases.  One case:

> 

>   CPU0                           CPU1

>   ----                           ----

>                                  setup_fifo_xfer()

>                                   geni_se_setup_m_cmd()

>                                  <hardware starts transfer>

>                                  <transfer completes in hardware>

>                                  <hardware sets M_RX_FIFO_WATERMARK_EN in m_irq>

>                                  ...

>                                  handle_fifo_timeout()

>                                   spin_lock_irq(mas->lock)

>                                   mas->cur_xfer = NULL

>                                   geni_se_cancel_m_cmd()

>                                   spin_unlock_irq(mas->lock)

> 

>   geni_spi_isr()

>    spin_lock(mas->lock)

>    if (m_irq & M_RX_FIFO_WATERMARK_EN)

>     geni_spi_handle_rx()

>      mas->cur_xfer NULL dereference!

> 

> tl;dr: Seriously delayed interrupts for RX/TX can lead to timeout

> handling setting mas->cur_xfer to NULL.

> 

> Let's check for the NULL transfer in the TX and RX cases and reset the

> watermark or clear out the fifo respectively to put the hardware back

> into a sane state.

> 

> NOTE: things still could get confused if we get timeouts all the way

> through handle_fifo_timeout() and then start a new transfer because

> interrupts from the old transfer / cancel / abort could still be

> pending.  A future patch will help this corner case.

> 

> Fixes: 561de45f72bd ("spi: spi-geni-qcom: Add SPI driver support for GENI based QUP")

> Signed-off-by: Douglas Anderson <dianders@chromium.org>

> Reviewed-by: Stephen Boyd <swboyd@chromium.org>

> Link: https://lore.kernel.org/r/20201217142842.v3.1.I99ee04f0cb823415df59bd4f550d6ff5756e43d6@changeid

> Signed-off-by: Mark Brown <broonie@kernel.org>

> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

> 

> ---

>  drivers/spi/spi-geni-qcom.c |   14 ++++++++++++++

>  1 file changed, 14 insertions(+)

> 

> --- a/drivers/spi/spi-geni-qcom.c

> +++ b/drivers/spi/spi-geni-qcom.c

> @@ -415,6 +415,12 @@ static void geni_spi_handle_tx(struct sp

>  	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);

>  	unsigned int i = 0;

>  

> +	/* Stop the watermark IRQ if nothing to send */

> +	if (!mas->cur_xfer) {

> +		writel(0, se->base + SE_GENI_TX_WATERMARK_REG);

> +		return false;

> +	}

> +

>  	max_bytes = (mas->tx_fifo_depth - mas->tx_wm) * bytes_per_fifo_word;

>  	if (mas->tx_rem_bytes < max_bytes)

>  		max_bytes = mas->tx_rem_bytes;

> @@ -454,6 +460,14 @@ static void geni_spi_handle_rx(struct sp

>  		if (rx_last_byte_valid && rx_last_byte_valid < 4)

>  			rx_bytes -= bytes_per_fifo_word - rx_last_byte_valid;

>  	}

> +

> +	/* Clear out the FIFO and bail if nowhere to put it */

> +	if (!mas->cur_xfer) {

> +		for (i = 0; i < DIV_ROUND_UP(rx_bytes, bytes_per_fifo_word); i++)

> +			readl(se->base + SE_GENI_RX_FIFOn);

> +		return;

> +	}

> +

>  	if (mas->rx_rem_bytes < rx_bytes)

>  		rx_bytes = mas->rx_rem_bytes;

>  

> 

> 


This commit breaks the build with clang:

drivers/spi/spi-geni-qcom.c:421:3: error: void function
'geni_spi_handle_tx' should not return a value [-Wreturn-type]
                return false;
                ^      ~~~~~
1 error generated.

It looks like commit 6d66507d9b55 ("spi: spi-geni-qcom: Don't wait to
start 1st transfer if transmitting") would resolve this.

It might be worth picking up commit 172aad81a882 ("kbuild: enforce
-Werror=return-type") so that GCC behaves like clang does.

Cheers,
Nathan
Greg Kroah-Hartman Jan. 17, 2021, 12:54 p.m. UTC | #2
On Sat, Jan 16, 2021 at 11:48:51AM -0700, Nathan Chancellor wrote:
> On Fri, Jan 15, 2021 at 01:28:02PM +0100, Greg Kroah-Hartman wrote:

> > From: Douglas Anderson <dianders@chromium.org>

> > 

> > commit 4aa1464acbe3697710279a4bd65cb4801ed30425 upstream.

> > 

> > In commit 7ba9bdcb91f6 ("spi: spi-geni-qcom: Don't keep a local state

> > variable") we changed handle_fifo_timeout() so that we set

> > "mas->cur_xfer" to NULL to make absolutely sure that we don't mess

> > with the buffers from the previous transfer in the timeout case.

> > 

> > Unfortunately, this caused the IRQ handler to dereference NULL in some

> > cases.  One case:

> > 

> >   CPU0                           CPU1

> >   ----                           ----

> >                                  setup_fifo_xfer()

> >                                   geni_se_setup_m_cmd()

> >                                  <hardware starts transfer>

> >                                  <transfer completes in hardware>

> >                                  <hardware sets M_RX_FIFO_WATERMARK_EN in m_irq>

> >                                  ...

> >                                  handle_fifo_timeout()

> >                                   spin_lock_irq(mas->lock)

> >                                   mas->cur_xfer = NULL

> >                                   geni_se_cancel_m_cmd()

> >                                   spin_unlock_irq(mas->lock)

> > 

> >   geni_spi_isr()

> >    spin_lock(mas->lock)

> >    if (m_irq & M_RX_FIFO_WATERMARK_EN)

> >     geni_spi_handle_rx()

> >      mas->cur_xfer NULL dereference!

> > 

> > tl;dr: Seriously delayed interrupts for RX/TX can lead to timeout

> > handling setting mas->cur_xfer to NULL.

> > 

> > Let's check for the NULL transfer in the TX and RX cases and reset the

> > watermark or clear out the fifo respectively to put the hardware back

> > into a sane state.

> > 

> > NOTE: things still could get confused if we get timeouts all the way

> > through handle_fifo_timeout() and then start a new transfer because

> > interrupts from the old transfer / cancel / abort could still be

> > pending.  A future patch will help this corner case.

> > 

> > Fixes: 561de45f72bd ("spi: spi-geni-qcom: Add SPI driver support for GENI based QUP")

> > Signed-off-by: Douglas Anderson <dianders@chromium.org>

> > Reviewed-by: Stephen Boyd <swboyd@chromium.org>

> > Link: https://lore.kernel.org/r/20201217142842.v3.1.I99ee04f0cb823415df59bd4f550d6ff5756e43d6@changeid

> > Signed-off-by: Mark Brown <broonie@kernel.org>

> > Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

> > 

> > ---

> >  drivers/spi/spi-geni-qcom.c |   14 ++++++++++++++

> >  1 file changed, 14 insertions(+)

> > 

> > --- a/drivers/spi/spi-geni-qcom.c

> > +++ b/drivers/spi/spi-geni-qcom.c

> > @@ -415,6 +415,12 @@ static void geni_spi_handle_tx(struct sp

> >  	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);

> >  	unsigned int i = 0;

> >  

> > +	/* Stop the watermark IRQ if nothing to send */

> > +	if (!mas->cur_xfer) {

> > +		writel(0, se->base + SE_GENI_TX_WATERMARK_REG);

> > +		return false;

> > +	}

> > +

> >  	max_bytes = (mas->tx_fifo_depth - mas->tx_wm) * bytes_per_fifo_word;

> >  	if (mas->tx_rem_bytes < max_bytes)

> >  		max_bytes = mas->tx_rem_bytes;

> > @@ -454,6 +460,14 @@ static void geni_spi_handle_rx(struct sp

> >  		if (rx_last_byte_valid && rx_last_byte_valid < 4)

> >  			rx_bytes -= bytes_per_fifo_word - rx_last_byte_valid;

> >  	}

> > +

> > +	/* Clear out the FIFO and bail if nowhere to put it */

> > +	if (!mas->cur_xfer) {

> > +		for (i = 0; i < DIV_ROUND_UP(rx_bytes, bytes_per_fifo_word); i++)

> > +			readl(se->base + SE_GENI_RX_FIFOn);

> > +		return;

> > +	}

> > +

> >  	if (mas->rx_rem_bytes < rx_bytes)

> >  		rx_bytes = mas->rx_rem_bytes;

> >  

> > 

> > 

> 

> This commit breaks the build with clang:

> 

> drivers/spi/spi-geni-qcom.c:421:3: error: void function

> 'geni_spi_handle_tx' should not return a value [-Wreturn-type]

>                 return false;

>                 ^      ~~~~~

> 1 error generated.

> 

> It looks like commit 6d66507d9b55 ("spi: spi-geni-qcom: Don't wait to

> start 1st transfer if transmitting") would resolve this.

> 

> It might be worth picking up commit 172aad81a882 ("kbuild: enforce

> -Werror=return-type") so that GCC behaves like clang does.


Argh, I thought I had dropped this before, but no.  Good catch, I've
dropped it now.

And yes, that might be a good patch to backport (the gcc one), I'll
queue that up next round, thanks.

greg k-h
diff mbox series

Patch

--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -415,6 +415,12 @@  static void geni_spi_handle_tx(struct sp
 	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);
 	unsigned int i = 0;
 
+	/* Stop the watermark IRQ if nothing to send */
+	if (!mas->cur_xfer) {
+		writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
+		return false;
+	}
+
 	max_bytes = (mas->tx_fifo_depth - mas->tx_wm) * bytes_per_fifo_word;
 	if (mas->tx_rem_bytes < max_bytes)
 		max_bytes = mas->tx_rem_bytes;
@@ -454,6 +460,14 @@  static void geni_spi_handle_rx(struct sp
 		if (rx_last_byte_valid && rx_last_byte_valid < 4)
 			rx_bytes -= bytes_per_fifo_word - rx_last_byte_valid;
 	}
+
+	/* Clear out the FIFO and bail if nowhere to put it */
+	if (!mas->cur_xfer) {
+		for (i = 0; i < DIV_ROUND_UP(rx_bytes, bytes_per_fifo_word); i++)
+			readl(se->base + SE_GENI_RX_FIFOn);
+		return;
+	}
+
 	if (mas->rx_rem_bytes < rx_bytes)
 		rx_bytes = mas->rx_rem_bytes;