From patchwork Tue Jun 30 10:33:17 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Stefan Roese X-Patchwork-Id: 243124 List-Id: U-Boot discussion From: sr at denx.de (Stefan Roese) Date: Tue, 30 Jun 2020 12:33:17 +0200 Subject: [PATCH v2 2/5] mips: octeon: use mips_mach_early_init() to copy to L2 cache In-Reply-To: <20200630103320.1290545-1-sr@denx.de> References: <20200630103320.1290545-1-sr@denx.de> Message-ID: <20200630103320.1290545-3-sr@denx.de> This patch adds the code to copy itself from bootrom location to a different location (TEXT_BASE) to the Octeon platform. Its used in this case to copy the complete U-Boot image into L2 cache, which greatly improves the bootup time - especially in regard to the very long and complex DDR4 init code. The Kconfig symbol CONFIG_MIPS_MACH_EARLY_INIT is enabled with this patch for Octeon. Signed-off-by: Stefan Roese --- Changes in v2: - Change mips_mach_early_init() as suggested by Daniel to make it easier to understand and smaller - Drop CONFIG_BOARD_SIZE_LIMIT arch/mips/Kconfig | 1 + arch/mips/mach-octeon/lowlevel_init.S | 50 +++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 327fd4848a..bcf6f26457 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -114,6 +114,7 @@ config ARCH_OCTEON select DM select DM_SERIAL select MIPS_L2_CACHE + select MIPS_MACH_EARLY_INIT select MIPS_TUNE_OCTEON3 select ROM_EXCEPTION_VECTORS select SUPPORTS_BIG_ENDIAN diff --git a/arch/mips/mach-octeon/lowlevel_init.S b/arch/mips/mach-octeon/lowlevel_init.S index d9aab38cde..fa87cb4e34 100644 --- a/arch/mips/mach-octeon/lowlevel_init.S +++ b/arch/mips/mach-octeon/lowlevel_init.S @@ -17,3 +17,53 @@ LEAF(lowlevel_init) jr ra nop END(lowlevel_init) + +LEAF(mips_mach_early_init) + + move s0, ra + + bal __dummy + nop + +__dummy: + /* Get the actual address that we are running at */ + PTR_LA a7, __dummy + dsubu t3, ra, a7 /* t3 now has reloc offset */ + + PTR_LA t1, _start + daddu t0, t1, t3 /* t0 now has actual address of _start */ + + /* Calculate end address of copy loop */ + PTR_LA t2, _end + daddiu t2, t2, 0x4000 /* Increase size to include appended DTB */ + daddiu t2, t2, 127 + ins t2, zero, 0, 7 /* Round up to cache line for memcpy */ + + /* Copy ourself to the L2 cache from flash, 32 bytes at a time */ +1: + ld a0, 0(t0) + ld a1, 8(t0) + ld a2, 16(t0) + ld a3, 24(t0) + sd a0, 0(t1) + sd a1, 8(t1) + sd a2, 16(t1) + sd a3, 24(t1) + addiu t0, 32 + addiu t1, 32 + bne t1, t2, 1b + nop + + sync + + /* + * Return to start.S now running from TEXT_BASE, which points + * to DRAM address space, which effectively is L2 cache now. + * This speeds up the init process extremely, especially the + * DDR init code. + */ + dsubu s0, s0, t3 /* Fixup return address with reloc offset */ + jr.hb s0 /* Jump back with hazard barrier */ + nop + + END(mips_mach_early_init)