From patchwork Sat Mar 7 16:41:04 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Marek Vasut X-Patchwork-Id: 243353 List-Id: U-Boot discussion From: marek.vasut at gmail.com (Marek Vasut) Date: Sat, 7 Mar 2020 17:41:04 +0100 Subject: [PATCH 1/5] common: bouncebuf: Permit passing custom alignment check function Message-ID: <20200307164108.192532-1-marek.vasut+renesas@gmail.com> Add extended version of the bounce_buffer_start(), which permits passing in a custom alignment checker function for the buffer. This is useful e.g. on systems with various DMA restrictions and where the checker function might be more complex than a simple CPU cache alignment check. Signed-off-by: Marek Vasut Cc: Daniel Schwierzeck Cc: Masahiro Yamada Cc: Peng Fan Cc: Simon Glass Cc: Tom Rini --- common/bouncebuf.c | 20 +++++++++++++++----- include/bouncebuf.h | 15 +++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/common/bouncebuf.c b/common/bouncebuf.c index 614eb36c78..0ace152b98 100644 --- a/common/bouncebuf.c +++ b/common/bouncebuf.c @@ -31,17 +31,19 @@ static int addr_aligned(struct bounce_buffer *state) return 1; } -int bounce_buffer_start(struct bounce_buffer *state, void *data, - size_t len, unsigned int flags) +int bounce_buffer_start_extalign(struct bounce_buffer *state, void *data, + size_t len, unsigned int flags, + size_t alignment, + int (*addr_is_aligned)(struct bounce_buffer *state)) { state->user_buffer = data; state->bounce_buffer = data; state->len = len; - state->len_aligned = roundup(len, ARCH_DMA_MINALIGN); + state->len_aligned = roundup(len, alignment); state->flags = flags; - if (!addr_aligned(state)) { - state->bounce_buffer = memalign(ARCH_DMA_MINALIGN, + if (!addr_is_aligned(state)) { + state->bounce_buffer = memalign(alignment, state->len_aligned); if (!state->bounce_buffer) return -ENOMEM; @@ -62,6 +64,14 @@ int bounce_buffer_start(struct bounce_buffer *state, void *data, return 0; } +int bounce_buffer_start(struct bounce_buffer *state, void *data, + size_t len, unsigned int flags) +{ + return bounce_buffer_start_extalign(state, data, len, flags, + ARCH_DMA_MINALIGN, + addr_aligned); +} + int bounce_buffer_stop(struct bounce_buffer *state) { if (state->flags & GEN_BB_WRITE) { diff --git a/include/bouncebuf.h b/include/bouncebuf.h index fd9b0f3b28..7427bd12e2 100644 --- a/include/bouncebuf.h +++ b/include/bouncebuf.h @@ -62,6 +62,21 @@ struct bounce_buffer { */ int bounce_buffer_start(struct bounce_buffer *state, void *data, size_t len, unsigned int flags); + +/** + * bounce_buffer_start() -- Start the bounce buffer session with external align check function + * state: stores state passed between bounce_buffer_{start,stop} + * data: pointer to buffer to be aligned + * len: length of the buffer + * flags: flags describing the transaction, see above. + * alignment: alignment of the newly allocated bounce buffer + * addr_is_aligned: function for checking the alignment instead of the default one + */ +int bounce_buffer_start_extalign(struct bounce_buffer *state, void *data, + size_t len, unsigned int flags, + size_t alignment, + int (*addr_is_aligned)(struct bounce_buffer *state)); + /** * bounce_buffer_stop() -- Finish the bounce buffer session * state: stores state passed between bounce_buffer_{start,stop} From patchwork Sat Mar 7 16:41:05 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Marek Vasut X-Patchwork-Id: 243354 List-Id: U-Boot discussion From: marek.vasut at gmail.com (Marek Vasut) Date: Sat, 7 Mar 2020 17:41:05 +0100 Subject: [PATCH 2/5] ARM: rmobile: Increase malloc area size In-Reply-To: <20200307164108.192532-1-marek.vasut+renesas@gmail.com> References: <20200307164108.192532-1-marek.vasut+renesas@gmail.com> Message-ID: <20200307164108.192532-2-marek.vasut+renesas@gmail.com> Increase the malloc area size significantly to cater for bounce buffer used by the SDHI driver. Signed-off-by: Marek Vasut Cc: Daniel Schwierzeck Cc: Masahiro Yamada Cc: Peng Fan Cc: Simon Glass Cc: Tom Rini --- include/configs/rcar-gen3-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/configs/rcar-gen3-common.h b/include/configs/rcar-gen3-common.h index 6528f1fa62..8f400ba05a 100644 --- a/include/configs/rcar-gen3-common.h +++ b/include/configs/rcar-gen3-common.h @@ -47,7 +47,7 @@ #define CONFIG_SYS_MONITOR_BASE 0x00000000 #define CONFIG_SYS_MONITOR_LEN (1 * 1024 * 1024) -#define CONFIG_SYS_MALLOC_LEN (1 * 1024 * 1024) +#define CONFIG_SYS_MALLOC_LEN (64 * 1024 * 1024) #define CONFIG_SYS_BOOTM_LEN (64 << 20) /* The HF/QSPI layout permits up to 1 MiB large bootloader blob */ From patchwork Sat Mar 7 16:41:06 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Marek Vasut X-Patchwork-Id: 243355 List-Id: U-Boot discussion From: marek.vasut at gmail.com (Marek Vasut) Date: Sat, 7 Mar 2020 17:41:06 +0100 Subject: [PATCH 3/5] mmc: tmio: sdhi: Use bounce buffer to avoid DMA limitations In-Reply-To: <20200307164108.192532-1-marek.vasut+renesas@gmail.com> References: <20200307164108.192532-1-marek.vasut+renesas@gmail.com> Message-ID: <20200307164108.192532-3-marek.vasut+renesas@gmail.com> The R-Car SDHI DMA controller has various restrictions. To work around those restrictions without falling back to PIO, implement bounce buffer with custom alignment check function which tests for those limitations. Signed-off-by: Marek Vasut Cc: Daniel Schwierzeck Cc: Masahiro Yamada Cc: Peng Fan Cc: Simon Glass Cc: Tom Rini --- drivers/mmc/Kconfig | 1 + drivers/mmc/renesas-sdhi.c | 75 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/drivers/mmc/Kconfig b/drivers/mmc/Kconfig index 2f0eedc22f..c69a18007d 100644 --- a/drivers/mmc/Kconfig +++ b/drivers/mmc/Kconfig @@ -347,6 +347,7 @@ config RENESAS_SDHI depends on ARCH_RMOBILE depends on BLK && DM_MMC depends on OF_CONTROL + select BOUNCE_BUFFER help This selects support for the Matsushita SD/MMC Host Controller on Renesas R-Car SoCs. diff --git a/drivers/mmc/renesas-sdhi.c b/drivers/mmc/renesas-sdhi.c index c3b13136f8..20c632c4af 100644 --- a/drivers/mmc/renesas-sdhi.c +++ b/drivers/mmc/renesas-sdhi.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -689,12 +690,86 @@ static int renesas_sdhi_wait_dat0(struct udevice *dev, int state, } #endif +#define RENESAS_SDHI_DMA_ALIGNMENT 128 + +static int renesas_sdhi_addr_aligned(struct bounce_buffer *state) +{ + uintptr_t ubuf = (uintptr_t)state->user_buffer; + + /* Check if start is aligned */ + if (!IS_ALIGNED(ubuf, RENESAS_SDHI_DMA_ALIGNMENT)) { + debug("Unaligned buffer address %p\n", state->user_buffer); + return 0; + } + + /* Check if length is aligned */ + if (state->len != state->len_aligned) { + debug("Unaligned buffer length %zu\n", state->len); + return 0; + } + + /* Check if below 32bit boundary */ + if ((ubuf >> 32) || (ubuf + state->len_aligned) >> 32) { + debug("Buffer above 32bit boundary %p-%p\n", + state->user_buffer, + state->user_buffer + state->len_aligned); + return 0; + } + + /* Aligned */ + return 1; +} + static int renesas_sdhi_send_cmd(struct udevice *dev, struct mmc_cmd *cmd, struct mmc_data *data) { + struct bounce_buffer bbstate; + unsigned int bbflags; + bool bbok = false; + size_t len; + void *buf; int ret; + if (data) { + if (data->flags & MMC_DATA_READ) { + buf = data->dest; + bbflags = GEN_BB_WRITE; + } else { + buf = (void *)data->src; + bbflags = GEN_BB_READ; + } + len = data->blocks * data->blocksize; + + ret = bounce_buffer_start_extalign(&bbstate, buf, len, bbflags, + RENESAS_SDHI_DMA_ALIGNMENT, + renesas_sdhi_addr_aligned); + /* + * If the amount of data to transfer is too large, we can get + * -ENOMEM when starting the bounce buffer. If that happens, + * fall back to PIO as it was before, otherwise use the BB. + */ + if (!ret) { + bbok = true; + if (data->flags & MMC_DATA_READ) + data->dest = bbstate.bounce_buffer; + else + data->src = bbstate.bounce_buffer; + } + } + ret = tmio_sd_send_cmd(dev, cmd, data); + + if (data && bbok) { + buf = bbstate.user_buffer; + + bounce_buffer_stop(&bbstate); + + if (data->flags & MMC_DATA_READ) + data->dest = buf; + else + data->src = buf; + } + if (ret) return ret; From patchwork Sat Mar 7 16:41:07 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Marek Vasut X-Patchwork-Id: 243356 List-Id: U-Boot discussion From: marek.vasut at gmail.com (Marek Vasut) Date: Sat, 7 Mar 2020 17:41:07 +0100 Subject: [PATCH 4/5] mmc: Add option to adjust b_max before long read In-Reply-To: <20200307164108.192532-1-marek.vasut+renesas@gmail.com> References: <20200307164108.192532-1-marek.vasut+renesas@gmail.com> Message-ID: <20200307164108.192532-4-marek.vasut+renesas@gmail.com> Add getter function which permits adjusting the maximum number of blocks that could be read in a single sustained read transfer based on the location of the source/target buffer and length, before such transfer starts. This is mainly useful on systems which have various DMA restrictions for different memory locations, e.g. DMA limited to 32bit addresses, and where a bounce buffer is used to work around such restrictions. Since the U-Boot bounce buffer is mallocated, it's size is limited by the malloc area size, and the read transfer to such a buffer must also be limited. However, as not all areas are limited equally, the b_max should be adjusted accordinly as needed to avoid degrading performance unnecessarily. Signed-off-by: Marek Vasut Cc: Daniel Schwierzeck Cc: Masahiro Yamada Cc: Peng Fan Cc: Simon Glass Cc: Tom Rini --- drivers/mmc/mmc-uclass.c | 16 ++++++++++++++++ drivers/mmc/mmc.c | 16 ++++++++++++++-- include/mmc.h | 16 ++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/mmc-uclass.c b/drivers/mmc/mmc-uclass.c index 0b90a97650..0af9d846d0 100644 --- a/drivers/mmc/mmc-uclass.c +++ b/drivers/mmc/mmc-uclass.c @@ -13,6 +13,22 @@ #include #include "mmc_private.h" +int dm_mmc_get_b_max(struct udevice *dev, void *dst, lbaint_t blkcnt) +{ + struct dm_mmc_ops *ops = mmc_get_ops(dev); + struct mmc *mmc = mmc_get_mmc_dev(dev); + + if (ops->get_b_max) + return ops->get_b_max(dev, dst, blkcnt); + else + return mmc->cfg->b_max; +} + +int mmc_get_b_max(struct mmc *mmc, void *dst, lbaint_t blkcnt) +{ + return dm_mmc_get_b_max(mmc->dev, dst, blkcnt); +} + int dm_mmc_send_cmd(struct udevice *dev, struct mmc_cmd *cmd, struct mmc_data *data) { diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index b50fcbf6cf..85943de222 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -411,6 +411,16 @@ static int mmc_read_blocks(struct mmc *mmc, void *dst, lbaint_t start, return blkcnt; } +#if !CONFIG_IS_ENABLED(DM_MMC) +static int mmc_get_b_max(struct mmc *mmc, void *dst, lbaint_t blkcnt) +{ + if (mmc->cfg->ops->get_b_max) + return mmc->cfg->ops->get_b_max(mmc, dst, blkcnt); + else + return mmc->cfg->b_max; +} +#endif + #if CONFIG_IS_ENABLED(BLK) ulong mmc_bread(struct udevice *dev, lbaint_t start, lbaint_t blkcnt, void *dst) #else @@ -424,6 +434,7 @@ ulong mmc_bread(struct blk_desc *block_dev, lbaint_t start, lbaint_t blkcnt, int dev_num = block_dev->devnum; int err; lbaint_t cur, blocks_todo = blkcnt; + uint b_max; if (blkcnt == 0) return 0; @@ -453,9 +464,10 @@ ulong mmc_bread(struct blk_desc *block_dev, lbaint_t start, lbaint_t blkcnt, return 0; } + b_max = mmc_get_b_max(mmc, dst, blkcnt); + do { - cur = (blocks_todo > mmc->cfg->b_max) ? - mmc->cfg->b_max : blocks_todo; + cur = (blocks_todo > b_max) ? b_max : blocks_todo; if (mmc_read_blocks(mmc, dst, start, cur) != cur) { pr_debug("%s: Failed to read blocks\n", __func__); return 0; diff --git a/include/mmc.h b/include/mmc.h index 71e2e1735a..1485c673ed 100644 --- a/include/mmc.h +++ b/include/mmc.h @@ -478,6 +478,19 @@ struct dm_mmc_ops { * @return 0 if not present, 1 if present, -ve on error */ int (*host_power_cycle)(struct udevice *dev); + + /** + * get_b_max - get maximum length of single transfer + * Called before reading blocks from the card, + * useful for system which have e.g. DMA limits + * on various memory ranges. + * + * @dev: Device to check + * @dst: Destination buffer in memory + * @blkcnt: Total number of blocks in this transfer + * @return maximum number of blocks for this transfer + */ + int (*get_b_max)(struct udevice *dev, void *dst, lbaint_t blkcnt); }; #define mmc_get_ops(dev) ((struct dm_mmc_ops *)(dev)->driver->ops) @@ -490,6 +503,7 @@ int dm_mmc_get_wp(struct udevice *dev); int dm_mmc_execute_tuning(struct udevice *dev, uint opcode); int dm_mmc_wait_dat0(struct udevice *dev, int state, int timeout_us); int dm_mmc_host_power_cycle(struct udevice *dev); +int dm_mmc_get_b_max(struct udevice *dev, void *dst, lbaint_t blkcnt); /* Transition functions for compatibility */ int mmc_set_ios(struct mmc *mmc); @@ -499,6 +513,7 @@ int mmc_execute_tuning(struct mmc *mmc, uint opcode); int mmc_wait_dat0(struct mmc *mmc, int state, int timeout_us); int mmc_set_enhanced_strobe(struct mmc *mmc); int mmc_host_power_cycle(struct mmc *mmc); +int mmc_get_b_max(struct mmc *mmc, void *dst, lbaint_t blkcnt); #else struct mmc_ops { @@ -509,6 +524,7 @@ struct mmc_ops { int (*getcd)(struct mmc *mmc); int (*getwp)(struct mmc *mmc); int (*host_power_cycle)(struct mmc *mmc); + int (*get_b_max)(struct mmc *mmc, void *dst, lbaint_t blkcnt); }; #endif From patchwork Sat Mar 7 16:41:08 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Marek Vasut X-Patchwork-Id: 243357 List-Id: U-Boot discussion From: marek.vasut at gmail.com (Marek Vasut) Date: Sat, 7 Mar 2020 17:41:08 +0100 Subject: [PATCH 5/5] mmc: tmio: sdhi: Implement get_b_max function In-Reply-To: <20200307164108.192532-1-marek.vasut+renesas@gmail.com> References: <20200307164108.192532-1-marek.vasut+renesas@gmail.com> Message-ID: <20200307164108.192532-5-marek.vasut+renesas@gmail.com> Implement get_b_max() for the Renesas R-Car SDHI controller driver, limit the b_max per hardware capabilities such that select Gen2 controllers have 16bit block transfer limit, the rest has 32bit block transfer limit and on Gen3, the block transfer limit on addresses above the 32bit boundary is set to 1/4 of the malloc area. Originally, on Gen3, the block transfers above the 32bit area were limited to PIO only, which resulted in (R8A7795 Salvator-X , HS200 eMMC): => time mmc read 0x0000000700000000 0 0x10000 time: 0.151 seconds => time mmc read 0x0000000700000000 0 0x100000 time: 11.090 seconds with bounce buffer in place and b_max adjustment in place: => time mmc read 0x0000000700000000 0 0x10000 time: 0.156 seconds => time mmc read 0x0000000700000000 0 0x100000 time: 2.349 seconds Note that the bounce buffer does mallocate and free the bounce buffer for every transfer. Experiment which removes this results in further increase of read speed, from 2.349s to 2.156s per 512 MiB of data, which is not such a significant improvement anymore. It might however be interesting to have bounce buffer directly in the MMC core or even block core. Signed-off-by: Marek Vasut Cc: Daniel Schwierzeck Cc: Masahiro Yamada Cc: Peng Fan Cc: Simon Glass Cc: Tom Rini --- drivers/mmc/renesas-sdhi.c | 46 +++++++++++++++++++++++++++++--------- drivers/mmc/tmio-common.h | 1 + 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/drivers/mmc/renesas-sdhi.c b/drivers/mmc/renesas-sdhi.c index 20c632c4af..9359fd3fe9 100644 --- a/drivers/mmc/renesas-sdhi.c +++ b/drivers/mmc/renesas-sdhi.c @@ -692,27 +692,25 @@ static int renesas_sdhi_wait_dat0(struct udevice *dev, int state, #define RENESAS_SDHI_DMA_ALIGNMENT 128 -static int renesas_sdhi_addr_aligned(struct bounce_buffer *state) +static int renesas_sdhi_addr_aligned_gen(uintptr_t ubuf, + size_t len, size_t len_aligned) { - uintptr_t ubuf = (uintptr_t)state->user_buffer; - /* Check if start is aligned */ if (!IS_ALIGNED(ubuf, RENESAS_SDHI_DMA_ALIGNMENT)) { - debug("Unaligned buffer address %p\n", state->user_buffer); + debug("Unaligned buffer address %zu\n", ubuf); return 0; } /* Check if length is aligned */ - if (state->len != state->len_aligned) { - debug("Unaligned buffer length %zu\n", state->len); + if (len != len_aligned) { + debug("Unaligned buffer length %zu\n", len); return 0; } /* Check if below 32bit boundary */ - if ((ubuf >> 32) || (ubuf + state->len_aligned) >> 32) { - debug("Buffer above 32bit boundary %p-%p\n", - state->user_buffer, - state->user_buffer + state->len_aligned); + if ((ubuf >> 32) || (ubuf + len_aligned) >> 32) { + debug("Buffer above 32bit boundary %lx-%lx\n", + ubuf, ubuf + len_aligned); return 0; } @@ -720,6 +718,14 @@ static int renesas_sdhi_addr_aligned(struct bounce_buffer *state) return 1; } +static int renesas_sdhi_addr_aligned(struct bounce_buffer *state) +{ + uintptr_t ubuf = (uintptr_t)state->user_buffer; + + return renesas_sdhi_addr_aligned_gen(ubuf, state->len, + state->len_aligned); +} + static int renesas_sdhi_send_cmd(struct udevice *dev, struct mmc_cmd *cmd, struct mmc_data *data) { @@ -787,6 +793,24 @@ static int renesas_sdhi_send_cmd(struct udevice *dev, struct mmc_cmd *cmd, return 0; } +int renesas_sdhi_get_b_max(struct udevice *dev, void *dst, lbaint_t blkcnt) +{ + struct tmio_sd_priv *priv = dev_get_priv(dev); + struct mmc_uclass_priv *upriv = dev_get_uclass_priv(dev); + struct mmc *mmc = upriv->mmc; + size_t len = blkcnt * mmc->read_bl_len; + size_t len_align = roundup(len, RENESAS_SDHI_DMA_ALIGNMENT); + + if (renesas_sdhi_addr_aligned_gen((uintptr_t)dst, len, len_align)) { + if (priv->quirks & TMIO_SD_CAP_16BIT) + return U16_MAX; + else + return U32_MAX; + } else { + return (CONFIG_SYS_MALLOC_LEN / 4) / mmc->read_bl_len; + } +} + static const struct dm_mmc_ops renesas_sdhi_ops = { .send_cmd = renesas_sdhi_send_cmd, .set_ios = renesas_sdhi_set_ios, @@ -799,6 +823,7 @@ static const struct dm_mmc_ops renesas_sdhi_ops = { #if CONFIG_IS_ENABLED(MMC_UHS_SUPPORT) .wait_dat0 = renesas_sdhi_wait_dat0, #endif + .get_b_max = renesas_sdhi_get_b_max, }; #define RENESAS_GEN2_QUIRKS TMIO_SD_CAP_RCAR_GEN2 @@ -964,6 +989,7 @@ static int renesas_sdhi_probe(struct udevice *dev) return ret; } + priv->quirks = quirks; ret = tmio_sd_probe(dev, quirks); renesas_sdhi_filter_caps(dev); diff --git a/drivers/mmc/tmio-common.h b/drivers/mmc/tmio-common.h index 047458849b..2f671df4bc 100644 --- a/drivers/mmc/tmio-common.h +++ b/drivers/mmc/tmio-common.h @@ -147,6 +147,7 @@ struct tmio_sd_priv { u8 adjust_hs400_calibrate; u8 hs400_bad_tap; const u8 *adjust_hs400_calib_table; + u32 quirks; #endif ulong (*clk_get_rate)(struct tmio_sd_priv *); };