diff mbox series

[RFT,v2,2/3] mmc: tmio: Add data timeout error detection

Message ID 20201120150647.123237-3-wsa+renesas@sang-engineering.com
State Superseded
Headers show
Series mmc: tmio: honor busy timeouts properly | expand

Commit Message

Wolfram Sang Nov. 20, 2020, 3:06 p.m. UTC
From: Masaharu Hayakawa <masaharu.hayakawa.ry@renesas.com>

Currently, busy timeout is not checked for data transfer command. But,
if the temperature condition changes, the data cannot be acquired
correctly and timeout may occur. Also, we could reproduce an issue by
using mmc_test driver (e.g. "Correct xfer_size at write (start
failure)"). Therefore, this adds timeout error check.

Signed-off-by: Masaharu Hayakawa <masaharu.hayakawa.ry@renesas.com>
[saito: rework commit message.]
Signed-off-by: Takeshi Saito <takeshi.saito.xv@renesas.com>
[shimoda: rebase, add commit description]
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 drivers/mmc/host/tmio_mmc.h      |  6 ++++--
 drivers/mmc/host/tmio_mmc_core.c | 17 +++++++++++------
 2 files changed, 15 insertions(+), 8 deletions(-)

Comments

Yoshihiro Shimoda Nov. 24, 2020, 12:41 p.m. UTC | #1
Hi Wolfram-san,

Thank you for the patch!

> From: Wolfram Sang, Sent: Saturday, November 21, 2020 12:07 AM

> 

> From: Masaharu Hayakawa <masaharu.hayakawa.ry@renesas.com>

> 

> Currently, busy timeout is not checked for data transfer command. But,

> if the temperature condition changes, the data cannot be acquired

> correctly and timeout may occur. Also, we could reproduce an issue by

> using mmc_test driver (e.g. "Correct xfer_size at write (start

> failure)"). Therefore, this adds timeout error check.

> 

> Signed-off-by: Masaharu Hayakawa <masaharu.hayakawa.ry@renesas.com>

> [saito: rework commit message.]

> Signed-off-by: Takeshi Saito <takeshi.saito.xv@renesas.com>

> [shimoda: rebase, add commit description]

> Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>

> Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>

> ---

>  drivers/mmc/host/tmio_mmc.h      |  6 ++++--

>  drivers/mmc/host/tmio_mmc_core.c | 17 +++++++++++------

>  2 files changed, 15 insertions(+), 8 deletions(-)

> 

> diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h

> index 819198af17f4..15cf850e75d3 100644

> --- a/drivers/mmc/host/tmio_mmc.h

> +++ b/drivers/mmc/host/tmio_mmc.h

> @@ -100,8 +100,10 @@

>  /* This is the mask used at reset by the chip */

>  #define TMIO_MASK_INIT_RCAR2	0x8b7f031d /* Initial value for R-Car Gen2+ */

>  #define TMIO_MASK_ALL           0x837f031d

> -#define TMIO_MASK_READOP  (TMIO_STAT_RXRDY | TMIO_STAT_DATAEND)

> -#define TMIO_MASK_WRITEOP (TMIO_STAT_TXRQ | TMIO_STAT_DATAEND)

> +#define TMIO_MASK_READOP  (TMIO_STAT_RXRDY | TMIO_STAT_DATAEND | \

> +			   TMIO_STAT_DATATIMEOUT)

> +#define TMIO_MASK_WRITEOP (TMIO_STAT_TXRQ | TMIO_STAT_DATAEND | \

> +			   TMIO_STAT_DATATIMEOUT)


I talked Saito-san about this patch locally and we can drop these lines
because this driver can detect data timeout in Access End interrupt
and the driver didn't enable error related interrupts like CRCFAIL.

>  #define TMIO_MASK_CMD     (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT | \

>  		TMIO_STAT_CARD_REMOVE | TMIO_STAT_CARD_INSERT)

>  #define TMIO_MASK_IRQ     (TMIO_MASK_READOP | TMIO_MASK_WRITEOP | TMIO_MASK_CMD)

> diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c

> index 4727fcfdf95f..541a0cf4a9fa 100644

> --- a/drivers/mmc/host/tmio_mmc_core.c

> +++ b/drivers/mmc/host/tmio_mmc_core.c

> @@ -477,8 +477,10 @@ static void tmio_mmc_data_irq(struct tmio_mmc_host *host, unsigned int stat)

>  	if (!data)

>  		goto out;

> 

> -	if (stat & TMIO_STAT_CRCFAIL || stat & TMIO_STAT_STOPBIT_ERR ||

> -	    stat & TMIO_STAT_TXUNDERRUN)

> +	if (stat & TMIO_STAT_DATATIMEOUT)

> +		data->error = -ETIMEDOUT;


The following commit [1] is a BSP local patch though,
we need to set -EILSEQ to retune a card for R-Car Gen3 [2]
by MMC core driver [3].

[1]
https://github.com/renesas-rcar/linux-bsp/commit/6f7519552fbed1474561ff423acb967eb03994e3
Remarks (please look the [2] below at first):
 - The patch checks whether a card is removed or not by using MMC_CAP_NONREMOVABLE and
   get_cd() to avoid retuning by MMC core driver for R-Car Gen3.
 - The patch also change the tmio_mmc_cmd_irq() when CMDTIMEOUT happens for R-Car Gen3.
   But, for upstream, we should make a separated patch for it.
 - These "for R-Car Gen3" means I'm thinking we need additional condition:
    1) to set -EILSRQ or -ETIMEDOUT for R-Car Gen3
    2) to set -ETIMEDOUT anyway for other SoCs.
   # These are complex conditions a little though...

[2]
This is related to R-Car Gen3 tuning flow (e.g. Figure 70.32).

[3]
---
drivers/mmc/core/core.c:
void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
{
        struct mmc_command *cmd = mrq->cmd;
        int err = cmd->error;

        /* Flag re-tuning needed on CRC errors */
        if (cmd->opcode != MMC_SEND_TUNING_BLOCK &&
            cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200 &&
            !host->retune_crc_disable &&
            (err == -EILSEQ || (mrq->sbc && mrq->sbc->error == -EILSEQ) ||
            (mrq->data && mrq->data->error == -EILSEQ) ||              <-- here
            (mrq->stop && mrq->stop->error == -EILSEQ)))
                mmc_retune_needed(host);
---

> +	else if (stat & TMIO_STAT_CRCFAIL || stat & TMIO_STAT_STOPBIT_ERR ||

> +		 stat & TMIO_STAT_TXUNDERRUN)

>  		data->error = -EILSEQ;

>  	if (host->dma_on && (data->flags & MMC_DATA_WRITE)) {

>  		u32 status = sd_ctrl_read16_and_16_as_32(host, CTL_STATUS);

> @@ -501,11 +503,13 @@ static void tmio_mmc_data_irq(struct tmio_mmc_host *host, unsigned int stat)

>  		}

> 

>  		if (done) {

> -			tmio_mmc_disable_mmc_irqs(host, TMIO_STAT_DATAEND);

> +			tmio_mmc_disable_mmc_irqs(host, TMIO_STAT_DATAEND |

> +						  TMIO_STAT_DATATIMEOUT);

>  			tmio_mmc_dataend_dma(host);

>  		}

>  	} else if (host->dma_on && (data->flags & MMC_DATA_READ)) {

> -		tmio_mmc_disable_mmc_irqs(host, TMIO_STAT_DATAEND);

> +		tmio_mmc_disable_mmc_irqs(host, TMIO_STAT_DATAEND |

> +					  TMIO_STAT_DATATIMEOUT);

>  		tmio_mmc_dataend_dma(host);

>  	} else {

>  		tmio_mmc_do_data_irq(host);

> @@ -619,8 +623,9 @@ static bool __tmio_mmc_sdcard_irq(struct tmio_mmc_host *host, int ireg,

>  	}

> 

>  	/* Data transfer completion */

> -	if (ireg & TMIO_STAT_DATAEND) {

> -		tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_DATAEND);

> +	if (ireg & (TMIO_STAT_DATAEND | TMIO_STAT_DATATIMEOUT)) {

> +		tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_DATAEND |

> +				      TMIO_STAT_DATATIMEOUT);


So, we can also drop these adding "TMIO_STAT_DATATIMEOUT".

Best regards,
Yoshihiro Shimoda
Wolfram Sang Nov. 25, 2020, 8:56 a.m. UTC | #2
Hi Shimoda-san,

> I talked Saito-san about this patch locally and we can drop these lines

> because this driver can detect data timeout in Access End interrupt

> and the driver didn't enable error related interrupts like CRCFAIL.


I see. I already wondered why the BSP patch
6f7519552fbed1474561ff423acb967eb03994e3 did not have these lines.

> The following commit [1] is a BSP local patch though,

> we need to set -EILSEQ to retune a card for R-Car Gen3 [2]

> by MMC core driver [3].


So, if there is a non-removable eMMC or a SD card inserted, then we need
to EILSEQ to enforce a retune. Otherwise it is a data timeout. Is my
understanding correct?

I wonder, though, if "Gen3" is a complete description? There are SDHI
instances on Gen2 which can also do SDR104. Won't they need the same
treatment? Then we could say that every SDHI which has an SCC will need
this treatment.

>  - The patch also change the tmio_mmc_cmd_irq() when CMDTIMEOUT happens for R-Car Gen3.

>    But, for upstream, we should make a separated patch for it.


I am sorry. I don't fully understand. Why does the change to
tmio_mmc_cmd_irq() need a seperate patch?

>  - These "for R-Car Gen3" means I'm thinking we need additional condition:

>     1) to set -EILSRQ or -ETIMEDOUT for R-Car Gen3

>     2) to set -ETIMEDOUT anyway for other SoCs.

>    # These are complex conditions a little though...


Well, from what I understood this sounds not too hard. Let's hope I just
got it correctly :)

However, there is something in this patch which makes mmc_test #15 work,
though. We still want this in this series, or do you think it is better
to move it to a seperate series?

Kind regards,

   Wolfram
Yoshihiro Shimoda Nov. 25, 2020, 11:05 a.m. UTC | #3
Hi Wolfram-san,

> From: Wolfram Sang, Sent: Wednesday, November 25, 2020 5:56 PM

<snip>
> > The following commit [1] is a BSP local patch though,

> > we need to set -EILSEQ to retune a card for R-Car Gen3 [2]

> > by MMC core driver [3].

> 

> So, if there is a non-removable eMMC or a SD card inserted, then we need

> to EILSEQ to enforce a retune. Otherwise it is a data timeout. Is my

> understanding correct?


Yes.

> I wonder, though, if "Gen3" is a complete description? There are SDHI

> instances on Gen2 which can also do SDR104. Won't they need the same

> treatment? Then we could say that every SDHI which has an SCC will need

> this treatment.


Hmm, since Gen2 datasheet didn't update the flowchart so that I thought
we need special handling for "Gen3". But, I'll ask hardware team whether
my thought is correct or not.

> >  - The patch also change the tmio_mmc_cmd_irq() when CMDTIMEOUT happens for R-Car Gen3.

> >    But, for upstream, we should make a separated patch for it.

> 

> I am sorry. I don't fully understand. Why does the change to

> tmio_mmc_cmd_irq() need a seperate patch?


The patch subject is "Add data timeout error detection".
That's why I thought so.

> >  - These "for R-Car Gen3" means I'm thinking we need additional condition:

> >     1) to set -EILSRQ or -ETIMEDOUT for R-Car Gen3

> >     2) to set -ETIMEDOUT anyway for other SoCs.

> >    # These are complex conditions a little though...

> 

> Well, from what I understood this sounds not too hard. Let's hope I just

> got it correctly :)


I got it :)

> However, there is something in this patch which makes mmc_test #15 work,

> though. We still want this in this series, or do you think it is better

> to move it to a seperate series?


Thank you for the comments. I realized mmc_test #15 failed if tmio driver
returned -EILSEQ when I applied the BSP patch as the following:
---
[  206.016193] mmc0: Starting tests of card mmc0:0001...
[  206.022708] mmc0: Test case 15. Proper xfer_size at write (start failure)...
[  206.854082] mmc0: Result: ERROR (-84)
[  206.858632] mmc0: Tests completed.
---

So, I'm thinking retuning -EILSEQ in cmd/data timeout should be a separate series now.
# Perhaps we need other way to follow the retuning sequence for R-Car Gen3...

Best regards,
Yoshihiro Shimoda
diff mbox series

Patch

diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 819198af17f4..15cf850e75d3 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -100,8 +100,10 @@ 
 /* This is the mask used at reset by the chip */
 #define TMIO_MASK_INIT_RCAR2	0x8b7f031d /* Initial value for R-Car Gen2+ */
 #define TMIO_MASK_ALL           0x837f031d
-#define TMIO_MASK_READOP  (TMIO_STAT_RXRDY | TMIO_STAT_DATAEND)
-#define TMIO_MASK_WRITEOP (TMIO_STAT_TXRQ | TMIO_STAT_DATAEND)
+#define TMIO_MASK_READOP  (TMIO_STAT_RXRDY | TMIO_STAT_DATAEND | \
+			   TMIO_STAT_DATATIMEOUT)
+#define TMIO_MASK_WRITEOP (TMIO_STAT_TXRQ | TMIO_STAT_DATAEND | \
+			   TMIO_STAT_DATATIMEOUT)
 #define TMIO_MASK_CMD     (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT | \
 		TMIO_STAT_CARD_REMOVE | TMIO_STAT_CARD_INSERT)
 #define TMIO_MASK_IRQ     (TMIO_MASK_READOP | TMIO_MASK_WRITEOP | TMIO_MASK_CMD)
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index 4727fcfdf95f..541a0cf4a9fa 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -477,8 +477,10 @@  static void tmio_mmc_data_irq(struct tmio_mmc_host *host, unsigned int stat)
 	if (!data)
 		goto out;
 
-	if (stat & TMIO_STAT_CRCFAIL || stat & TMIO_STAT_STOPBIT_ERR ||
-	    stat & TMIO_STAT_TXUNDERRUN)
+	if (stat & TMIO_STAT_DATATIMEOUT)
+		data->error = -ETIMEDOUT;
+	else if (stat & TMIO_STAT_CRCFAIL || stat & TMIO_STAT_STOPBIT_ERR ||
+		 stat & TMIO_STAT_TXUNDERRUN)
 		data->error = -EILSEQ;
 	if (host->dma_on && (data->flags & MMC_DATA_WRITE)) {
 		u32 status = sd_ctrl_read16_and_16_as_32(host, CTL_STATUS);
@@ -501,11 +503,13 @@  static void tmio_mmc_data_irq(struct tmio_mmc_host *host, unsigned int stat)
 		}
 
 		if (done) {
-			tmio_mmc_disable_mmc_irqs(host, TMIO_STAT_DATAEND);
+			tmio_mmc_disable_mmc_irqs(host, TMIO_STAT_DATAEND |
+						  TMIO_STAT_DATATIMEOUT);
 			tmio_mmc_dataend_dma(host);
 		}
 	} else if (host->dma_on && (data->flags & MMC_DATA_READ)) {
-		tmio_mmc_disable_mmc_irqs(host, TMIO_STAT_DATAEND);
+		tmio_mmc_disable_mmc_irqs(host, TMIO_STAT_DATAEND |
+					  TMIO_STAT_DATATIMEOUT);
 		tmio_mmc_dataend_dma(host);
 	} else {
 		tmio_mmc_do_data_irq(host);
@@ -619,8 +623,9 @@  static bool __tmio_mmc_sdcard_irq(struct tmio_mmc_host *host, int ireg,
 	}
 
 	/* Data transfer completion */
-	if (ireg & TMIO_STAT_DATAEND) {
-		tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_DATAEND);
+	if (ireg & (TMIO_STAT_DATAEND | TMIO_STAT_DATATIMEOUT)) {
+		tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_DATAEND |
+				      TMIO_STAT_DATATIMEOUT);
 		tmio_mmc_data_irq(host, status);
 		return true;
 	}