diff mbox

[PATCHv2,3/5] mmc: shdci-bcm2835: add efficient back-to-back write workaround

Message ID 1414651017-3545-4-git-send-email-sbranden@broadcom.com
State New
Headers show

Commit Message

Scott Branden Oct. 30, 2014, 6:36 a.m. UTC
The bcm2835 has clock domain issues when back to back writes to certain
registers are written.  The existing driver works around this issue with
udelay.  A more efficient method is to store the 8 and 16 bit writes
to the registers affected and then write them as 32 bits at the appropriate
time.

Signed-off-by: Scott Branden <sbranden@broadcom.com>
---
 drivers/mmc/host/sdhci-bcm2835.c |  103 ++++++++++++++++++++------------------
 1 file changed, 55 insertions(+), 48 deletions(-)

Comments

Stephen Warren Nov. 5, 2014, 4:57 a.m. UTC | #1
On 10/30/2014 12:36 AM, Scott Branden wrote:
> The bcm2835 has clock domain issues when back to back writes to certain
> registers are written.  The existing driver works around this issue with
> udelay.  A more efficient method is to store the 8 and 16 bit writes
> to the registers affected and then write them as 32 bits at the appropriate
> time.

> diff --git a/drivers/mmc/host/sdhci-bcm2835.c b/drivers/mmc/host/sdhci-bcm2835.c

>  static void bcm2835_sdhci_writew(struct sdhci_host *host, u16 val, int reg)
>  {
>  	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
> -	struct bcm2835_sdhci *bcm2835_host = pltfm_host->priv;
> -	u32 oldval = (reg == SDHCI_COMMAND) ? bcm2835_host->shadow :
> -		bcm2835_sdhci_readl(host, reg & ~3);
> +	struct bcm2835_sdhci_host *bcm2835_host = pltfm_host->priv;

Is that type change for bcm2835_host really correct?

> +	} else {
> +		/* Read reg, all other registers are not shadowed */
> +		oldval = readl(host->ioaddr + (reg & ~3));

Is there any reason to use readl() directly here rather than calling
bcm2835_readl()? ...

>  static void bcm2835_sdhci_writeb(struct sdhci_host *host, u8 val, int reg)
>  {
> -	u32 oldval = bcm2835_sdhci_readl(host, reg & ~3);
> +	u32 oldval = readl(host->ioaddr + (reg & ~3));

... and here in particular, since this seems like an unrelated change?

>  static int bcm2835_sdhci_probe(struct platform_device *pdev)
>  {
>  	struct sdhci_host *host;
> -	struct bcm2835_sdhci *bcm2835_host;
> +	struct bcm2835_sdhci_host *bcm2835_host;

Is that type change for bcm2835_host really correct?
--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Branden Nov. 5, 2014, 6:55 a.m. UTC | #2
On 14-11-04 08:57 PM, Stephen Warren wrote:
> On 10/30/2014 12:36 AM, Scott Branden wrote:
>> The bcm2835 has clock domain issues when back to back writes to certain
>> registers are written.  The existing driver works around this issue with
>> udelay.  A more efficient method is to store the 8 and 16 bit writes
>> to the registers affected and then write them as 32 bits at the appropriate
>> time.
>
>> diff --git a/drivers/mmc/host/sdhci-bcm2835.c b/drivers/mmc/host/sdhci-bcm2835.c
>
>>   static void bcm2835_sdhci_writew(struct sdhci_host *host, u16 val, int reg)
>>   {
>>   	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
>> -	struct bcm2835_sdhci *bcm2835_host = pltfm_host->priv;
>> -	u32 oldval = (reg == SDHCI_COMMAND) ? bcm2835_host->shadow :
>> -		bcm2835_sdhci_readl(host, reg & ~3);
>> +	struct bcm2835_sdhci_host *bcm2835_host = pltfm_host->priv;
>
> Is that type change for bcm2835_host really correct?
Yes - at the top of the patch the structure has been expanded and named 
appropriately.

-struct bcm2835_sdhci {
-	u32 shadow;
+struct bcm2835_sdhci_host {
+	u32 shadow_cmd;
+	u32 shadow_blk;
  };
>
>> +	} else {
>> +		/* Read reg, all other registers are not shadowed */
>> +		oldval = readl(host->ioaddr + (reg & ~3));
>
> Is there any reason to use readl() directly here rather than calling
> bcm2835_readl()? ...
Yes, bcm2835_readl does not need to be called in read-modify-write and 
shadow register situations and just adds overhead.  All that needs to be 
called is readl.  bcm2835_readl has some existing ugly code in it to 
modify the capabilities register on a read function.  This info never 
needs to be for write as you can't overwrite the capabilities register. 
  I hope to get rid of the capabilities hack in a future patch as this 
should never have been acceptable in upstreamed code to begin with.  The 
capabilities override should have been passed in through a device tree 
entry.
>
>>   static void bcm2835_sdhci_writeb(struct sdhci_host *host, u8 val, int reg)
>>   {
>> -	u32 oldval = bcm2835_sdhci_readl(host, reg & ~3);
>> +	u32 oldval = readl(host->ioaddr + (reg & ~3));
>
> ... and here in particular, since this seems like an unrelated change?
Same situation with bcm2835_readl above.  No need to call in 
read-modify-write situations.
>
>>   static int bcm2835_sdhci_probe(struct platform_device *pdev)
>>   {
>>   	struct sdhci_host *host;
>> -	struct bcm2835_sdhci *bcm2835_host;
>> +	struct bcm2835_sdhci_host *bcm2835_host;
>
> Is that type change for bcm2835_host really correct?
>
yes - structure renamed above
--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stephen Warren Nov. 6, 2014, 4:48 a.m. UTC | #3
On 11/04/2014 11:55 PM, Scott Branden wrote:
> On 14-11-04 08:57 PM, Stephen Warren wrote:
>> On 10/30/2014 12:36 AM, Scott Branden wrote:
>>> The bcm2835 has clock domain issues when back to back writes to certain
>>> registers are written.  The existing driver works around this issue with
>>> udelay.  A more efficient method is to store the 8 and 16 bit writes
>>> to the registers affected and then write them as 32 bits at the
>>> appropriate
>>> time.
>>
>>> diff --git a/drivers/mmc/host/sdhci-bcm2835.c
>>> b/drivers/mmc/host/sdhci-bcm2835.c
>>
>>>   static void bcm2835_sdhci_writew(struct sdhci_host *host, u16 val,
>>> int reg)
>>>   {
>>>       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
>>> -    struct bcm2835_sdhci *bcm2835_host = pltfm_host->priv;
>>> -    u32 oldval = (reg == SDHCI_COMMAND) ? bcm2835_host->shadow :
>>> -        bcm2835_sdhci_readl(host, reg & ~3);
>>> +    struct bcm2835_sdhci_host *bcm2835_host = pltfm_host->priv;
>>
>> Is that type change for bcm2835_host really correct?
>
> Yes - at the top of the patch the structure has been expanded and named
> appropriately.
> 
> -struct bcm2835_sdhci {
> -    u32 shadow;
> +struct bcm2835_sdhci_host {
> +    u32 shadow_cmd;
> +    u32 shadow_blk;
>  };

Ah yes, sorry for missing that.

>>> +    } else {
>>> +        /* Read reg, all other registers are not shadowed */
>>> +        oldval = readl(host->ioaddr + (reg & ~3));
>>
>> Is there any reason to use readl() directly here rather than calling
>> bcm2835_readl()? ...
>
> Yes, bcm2835_readl does not need to be called in read-modify-write and
> shadow register situations and just adds overhead.  All that needs to be
> called is readl.  bcm2835_readl has some existing ugly code in it to
> modify the capabilities register on a read function.  This info never
> needs to be for write as you can't overwrite the capabilities register.

To be honest, it seems better to do all the read/write through
consistent functions. One advantage of bcm2835_readl() is that it
consistently adds on the base address internally so you don't have to
write it out every time manually. Still, the code ought to work fine
after this change, so I guess it's OK.

> I hope to get rid of the capabilities hack in a future patch as this
> should never have been acceptable in upstreamed code to begin with.  The
> capabilities override should have been passed in through a device tree
> entry.

It's a pretty common technique with precedent. I certainly don't agree
that it should be configured by DT. Arguably, DT makes sense to describe
board-to-board variations, but there's almost zero point putting data
into DT that is SoC description rather than board description; just put
it into the driver to avoid continually parsing the same data over and
over from DT just to get back to the same data that could have been
encoded into the driver. If the data varies between similar controllers,
an of_match table can easily be used to parameterize it based on
compatible value.
--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Branden Nov. 7, 2014, 6:29 p.m. UTC | #4
On 14-11-05 08:48 PM, Stephen Warren wrote:
> On 11/04/2014 11:55 PM, Scott Branden wrote:
>> On 14-11-04 08:57 PM, Stephen Warren wrote:
>>> On 10/30/2014 12:36 AM, Scott Branden wrote:
>>>> The bcm2835 has clock domain issues when back to back writes to certain
>>>> registers are written.  The existing driver works around this issue with
>>>> udelay.  A more efficient method is to store the 8 and 16 bit writes
>>>> to the registers affected and then write them as 32 bits at the
>>>> appropriate
>>>> time.
>>>
>>>> diff --git a/drivers/mmc/host/sdhci-bcm2835.c
>>>> b/drivers/mmc/host/sdhci-bcm2835.c
>>>
>>>>    static void bcm2835_sdhci_writew(struct sdhci_host *host, u16 val,
>>>> int reg)
>>>>    {
>>>>        struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
>>>> -    struct bcm2835_sdhci *bcm2835_host = pltfm_host->priv;
>>>> -    u32 oldval = (reg == SDHCI_COMMAND) ? bcm2835_host->shadow :
>>>> -        bcm2835_sdhci_readl(host, reg & ~3);
>>>> +    struct bcm2835_sdhci_host *bcm2835_host = pltfm_host->priv;
>>>
>>> Is that type change for bcm2835_host really correct?
>>
>> Yes - at the top of the patch the structure has been expanded and named
>> appropriately.
>>
>> -struct bcm2835_sdhci {
>> -    u32 shadow;
>> +struct bcm2835_sdhci_host {
>> +    u32 shadow_cmd;
>> +    u32 shadow_blk;
>>   };
>
> Ah yes, sorry for missing that.
>
>>>> +    } else {
>>>> +        /* Read reg, all other registers are not shadowed */
>>>> +        oldval = readl(host->ioaddr + (reg & ~3));
>>>
>>> Is there any reason to use readl() directly here rather than calling
>>> bcm2835_readl()? ...
>>
>> Yes, bcm2835_readl does not need to be called in read-modify-write and
>> shadow register situations and just adds overhead.  All that needs to be
>> called is readl.  bcm2835_readl has some existing ugly code in it to
>> modify the capabilities register on a read function.  This info never
>> needs to be for write as you can't overwrite the capabilities register.
>
> To be honest, it seems better to do all the read/write through
> consistent functions. One advantage of bcm2835_readl() is that it
> consistently adds on the base address internally so you don't have to
> write it out every time manually. Still, the code ought to work fine
> after this change, so I guess it's OK.
>
>> I hope to get rid of the capabilities hack in a future patch as this
>> should never have been acceptable in upstreamed code to begin with.  The
>> capabilities override should have been passed in through a device tree
>> entry.
>
> It's a pretty common technique with precedent. I certainly don't agree
> that it should be configured by DT. Arguably, DT makes sense to describe
> board-to-board variations, but there's almost zero point putting data
> into DT that is SoC description rather than board description; just put
> it into the driver to avoid continually parsing the same data over and
> over from DT just to get back to the same data that could have been
> encoded into the driver. If the data varies between similar controllers,
> an of_match table can easily be used to parameterize it based on
> compatible value.
There is work to be done here or I will be unable to use this driver in 
our chipsets.  Perhaps it will be easier having another driver 
actually... as the DMA seems quite different on RPI.
>

--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/mmc/host/sdhci-bcm2835.c b/drivers/mmc/host/sdhci-bcm2835.c
index b6cb365..f8c450a 100644
--- a/drivers/mmc/host/sdhci-bcm2835.c
+++ b/drivers/mmc/host/sdhci-bcm2835.c
@@ -24,34 +24,9 @@ 
 #include <linux/mmc/host.h>
 #include "sdhci-pltfm.h"
 
-/*
- * 400KHz is max freq for card ID etc. Use that as min card clock. We need to
- * know the min to enable static calculation of max BCM2835_SDHCI_WRITE_DELAY.
- */
-#define MIN_FREQ 400000
-
-/*
- * The Arasan has a bugette whereby it may lose the content of successive
- * writes to registers that are within two SD-card clock cycles of each other
- * (a clock domain crossing problem). It seems, however, that the data
- * register does not have this problem, which is just as well - otherwise we'd
- * have to nobble the DMA engine too.
- *
- * This should probably be dynamically calculated based on the actual card
- * frequency. However, this is the longest we'll have to wait, and doesn't
- * seem to slow access down too much, so the added complexity doesn't seem
- * worth it for now.
- *
- * 1/MIN_FREQ is (max) time per tick of eMMC clock.
- * 2/MIN_FREQ is time for two ticks.
- * Multiply by 1000000 to get uS per two ticks.
- * *1000000 for uSecs.
- * +1 for hack rounding.
- */
-#define BCM2835_SDHCI_WRITE_DELAY	(((2 * 1000000) / MIN_FREQ) + 1)
-
-struct bcm2835_sdhci {
-	u32 shadow;
+struct bcm2835_sdhci_host {
+	u32 shadow_cmd;
+	u32 shadow_blk;
 };
 
 #define REG_OFFSET_IN_BITS(reg) ((reg) << 3 & 0x18)
@@ -80,33 +55,71 @@  static u8 bcm2835_sdhci_readb(struct sdhci_host *host, int reg)
 	return byte;
 }
 
-static void bcm2835_sdhci_writel(struct sdhci_host *host, u32 val, int reg)
+static inline void bcm2835_sdhci_writel(struct sdhci_host *host,
+						u32 val, int reg)
 {
 	writel(val, host->ioaddr + reg);
-
-	udelay(BCM2835_SDHCI_WRITE_DELAY);
 }
 
-
+/*
+ * The Arasan has a bugette whereby it may lose the content of successive
+ * writes to the same register that are within two SD-card clock cycles of
+ * each other (a clock domain crossing problem). The data
+ * register does not have this problem, which is just as well - otherwise we'd
+ * have to nobble the DMA engine too.
+ *
+ * This wouldn't be a problem with the code except that we can only write the
+ * controller with 32-bit writes.  So two different 16-bit registers are
+ * written back to back creates the problem.
+ *
+ * In reality, this only happens when SDHCI_BLOCK_SIZE and SDHCI_BLOCK_COUNT
+ * are written followed by SDHCI_TRANSFER_MODE and SDHCI_COMMAND.
+ * The BLOCK_SIZE and BLOCK_COUNT are meaningless until a command issued so
+ * the work around can be further optimized. We can keep shadow values of
+ * BLOCK_SIZE, BLOCK_COUNT, and TRANSFER_MODE until a COMMAND is issued.
+ * Then, write the BLOCK_SIZE+BLOCK_COUNT in a single 32-bit write followed
+ * by the TRANSFER+COMMAND in another 32-bit write.
+ */
 static void bcm2835_sdhci_writew(struct sdhci_host *host, u16 val, int reg)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct bcm2835_sdhci *bcm2835_host = pltfm_host->priv;
-	u32 oldval = (reg == SDHCI_COMMAND) ? bcm2835_host->shadow :
-		bcm2835_sdhci_readl(host, reg & ~3);
+	struct bcm2835_sdhci_host *bcm2835_host = pltfm_host->priv;
 	u32 word_shift = REG_OFFSET_IN_BITS(reg);
 	u32 mask = 0xffff << word_shift;
-	u32 newval = (oldval & ~mask) | (val << word_shift);
-
-	if (reg == SDHCI_TRANSFER_MODE)
-		bcm2835_host->shadow = newval;
-	else
+	u32 oldval, newval;
+
+	if (reg == SDHCI_COMMAND) {
+		/* Write the block now as we are issuing a command */
+		if (bcm2835_host->shadow_blk != 0) {
+			bcm2835_sdhci_writel(host, bcm2835_host->shadow_blk,
+				SDHCI_BLOCK_SIZE);
+			bcm2835_host->shadow_blk = 0;
+		}
+		oldval = bcm2835_host->shadow_cmd;
+	} else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) {
+		/* Block size and count are stored in shadow reg */
+		oldval = bcm2835_host->shadow_blk;
+	} else {
+		/* Read reg, all other registers are not shadowed */
+		oldval = readl(host->ioaddr + (reg & ~3));
+	}
+	newval = (oldval & ~mask) | (val << word_shift);
+
+	if (reg == SDHCI_TRANSFER_MODE) {
+		/* Save the transfer mode until the command is issued */
+		bcm2835_host->shadow_cmd = newval;
+	} else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) {
+		/* Save the block info until the command is issued */
+		bcm2835_host->shadow_blk = newval;
+	} else {
+		/* Command or other regular 32-bit write */
 		bcm2835_sdhci_writel(host, newval, reg & ~3);
+	}
 }
 
 static void bcm2835_sdhci_writeb(struct sdhci_host *host, u8 val, int reg)
 {
-	u32 oldval = bcm2835_sdhci_readl(host, reg & ~3);
+	u32 oldval = readl(host->ioaddr + (reg & ~3));
 	u32 byte_shift = REG_OFFSET_IN_BITS(reg);
 	u32 mask = 0xff << byte_shift;
 	u32 newval = (oldval & ~mask) | (val << byte_shift);
@@ -114,11 +127,6 @@  static void bcm2835_sdhci_writeb(struct sdhci_host *host, u8 val, int reg)
 	bcm2835_sdhci_writel(host, newval, reg & ~3);
 }
 
-static unsigned int bcm2835_sdhci_get_min_clock(struct sdhci_host *host)
-{
-	return MIN_FREQ;
-}
-
 static const struct sdhci_ops bcm2835_sdhci_ops = {
 	.read_l = bcm2835_sdhci_readl,
 	.read_w = bcm2835_sdhci_readw,
@@ -128,7 +136,6 @@  static const struct sdhci_ops bcm2835_sdhci_ops = {
 	.write_b = bcm2835_sdhci_writeb,
 	.set_clock = sdhci_set_clock,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
-	.get_min_clock = bcm2835_sdhci_get_min_clock,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
 	.set_uhs_signaling = sdhci_set_uhs_signaling,
@@ -143,7 +150,7 @@  static const struct sdhci_pltfm_data bcm2835_sdhci_pdata = {
 static int bcm2835_sdhci_probe(struct platform_device *pdev)
 {
 	struct sdhci_host *host;
-	struct bcm2835_sdhci *bcm2835_host;
+	struct bcm2835_sdhci_host *bcm2835_host;
 	struct sdhci_pltfm_host *pltfm_host;
 	int ret;