Message ID | 20200502085944.13444-2-sr@denx.de |
---|---|
State | New |
Headers | show |
Series | mips: Add initial Octeon MIPS64 base support | expand |
sorry for the delay ;) Am 02.05.20 um 10:59 schrieb Stefan Roese: > From: Aaron Williams <awilliams at marvell.com> > > This patch adds very basic support for the Octeon III SoCs. Only > CFI parallel NOR flash and UART is supported for now. > > Please note that the basic Octeon port does not include the DDR3/4 > initialization yet. This will be added in some follow-up patches > later. To still use U-Boot on with this port, the L2 cache (4MiB on > Octeon III CN73xx) is used as RAM. This way, U-Boot can boot to the > prompt on such boards. this patch should come after the common MIPS patches > > Signed-off-by: Aaron Williams <awilliams at marvell.com> > Signed-off-by: Stefan Roese <sr at denx.de> > --- > > MAINTAINERS | 6 + > arch/Kconfig | 1 + > arch/mips/Kconfig | 49 +- > arch/mips/Makefile | 7 + > arch/mips/cpu/Makefile | 4 +- > arch/mips/include/asm/arch-octeon/cavm-reg.h | 42 + > arch/mips/include/asm/arch-octeon/clock.h | 24 + > arch/mips/mach-octeon/Kconfig | 92 ++ > arch/mips/mach-octeon/Makefile | 10 + > arch/mips/mach-octeon/clock.c | 22 + > arch/mips/mach-octeon/cpu.c | 55 + > arch/mips/mach-octeon/dram.c | 27 + > arch/mips/mach-octeon/include/ioremap.h | 30 + > arch/mips/mach-octeon/start.S | 1241 ++++++++++++++++++ > 14 files changed, 1608 insertions(+), 2 deletions(-) > create mode 100644 arch/mips/include/asm/arch-octeon/cavm-reg.h > create mode 100644 arch/mips/include/asm/arch-octeon/clock.h > create mode 100644 arch/mips/mach-octeon/Kconfig > create mode 100644 arch/mips/mach-octeon/Makefile > create mode 100644 arch/mips/mach-octeon/clock.c > create mode 100644 arch/mips/mach-octeon/cpu.c > create mode 100644 arch/mips/mach-octeon/dram.c > create mode 100644 arch/mips/mach-octeon/include/ioremap.h > create mode 100644 arch/mips/mach-octeon/start.S > > diff --git a/MAINTAINERS b/MAINTAINERS > index 66f0b07263..29f2d7328c 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -749,6 +749,12 @@ M: Ezequiel Garcia <ezequiel at collabora.com> > S: Maintained > F: arch/mips/mach-jz47xx/ > > +MIPS Octeon > +M: Aaron Williams <awilliams at marvell.com> > +S: Maintained > +F: arch/mips/mach-octeon/ > +F: arch/mips/include/asm/arch-octeon/ > + > MMC > M: Peng Fan <peng.fan at nxp.com> > S: Maintained > diff --git a/arch/Kconfig b/arch/Kconfig > index 91e049b322..1cd3e1dc0b 100644 > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -37,6 +37,7 @@ config MICROBLAZE > > config MIPS > bool "MIPS architecture" > + select CREATE_ARCH_SYMLINK you should not need that. The path arch/mips/mach-octeon/include/ will be automatically added to the include search paths. Thus move all files in arch/mips/include/asm/arch-octeon/ to arch/mips/mach-octeon/include/ > select HAVE_ARCH_IOREMAP > select HAVE_PRIVATE_LIBGCC > select SUPPORT_OF_CONTROL > diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig > index 48e754cc46..3c7f3eb94f 100644 > --- a/arch/mips/Kconfig > +++ b/arch/mips/Kconfig > @@ -106,6 +106,24 @@ config ARCH_JZ47XX > select OF_CONTROL > select DM > > +config ARCH_OCTEON > + bool "Support Marvell Octeon CN7xxx platforms" > + select DISPLAY_CPUINFO > + select DMA_ADDR_T_64BIT > + select DM > + select DM_SERIAL > + select MIPS_CACHE_COHERENT > + select MIPS_INIT_STACK_IN_SRAM > + select MIPS_L2_CACHE > + select MIPS_TUNE_OCTEON3 > + select ROM_EXCEPTION_VECTORS > + select SUPPORTS_BIG_ENDIAN > + select SUPPORTS_CPU_MIPS64_OCTEON > + select PHYS_64BIT > + select OF_CONTROL > + select OF_LIVE > + imply CMD_DM > + > config MACH_PIC32 > bool "Support Microchip PIC32" > select DM > @@ -160,6 +178,7 @@ source "arch/mips/mach-bmips/Kconfig" > source "arch/mips/mach-jz47xx/Kconfig" > source "arch/mips/mach-pic32/Kconfig" > source "arch/mips/mach-mtmips/Kconfig" > +source "arch/mips/mach-octeon/Kconfig" > > if MIPS > > @@ -233,6 +252,14 @@ config CPU_MIPS64_R6 > Choose this option to build a kernel for release 6 or later of the > MIPS64 architecture. > > +config CPU_MIPS64_OCTEON > + bool "Marvell Octeon series of CPUs" > + depends on SUPPORTS_CPU_MIPS64_OCTEON > + select 64BIT > + help > + Choose this option for Marvell Octeon CPUs. These CPUs are between > + MIPS64 R5 and R6 with other extensions. > + > endchoice > > menu "General setup" > @@ -261,7 +288,7 @@ config MIPS_CM_BASE > config MIPS_CACHE_INDEX_BASE > hex "Index base address for cache initialisation" > default 0x80000000 if CPU_MIPS32 > - default 0xffffffff80000000 if CPU_MIPS64 > + default 0xFFFFFFFFC0000000 if ARCH_OCTEON > help > This is the base address for a memory block, which is used for > initialising the cache lines. This is also the base address of a memory > @@ -342,6 +369,14 @@ config SPL_LOADER_SUPPORT > help > Enable this option if you want to use SPL loaders without DM enabled. > > +config MIPS_CACHE_COHERENT > + bool "Set if MIPS processor is cache coherent" > + help > + Enable this if the MIPS architecture is cache coherent like the > + Marvell Octeon series of SoCs. When this is set, cache flushes > + and invalidates only flush the write buffer since the hardware > + maintains cache coherency. > + > endmenu > > menu "OS boot interface" > @@ -398,6 +433,9 @@ config SUPPORTS_CPU_MIPS64_R2 > config SUPPORTS_CPU_MIPS64_R6 > bool > > +config SUPPORTS_CPU_MIPS64_OCTEON > + bool > + > config CPU_MIPS32 > bool > default y if CPU_MIPS32_R1 || CPU_MIPS32_R2 || CPU_MIPS32_R6 > @@ -405,6 +443,7 @@ config CPU_MIPS32 > config CPU_MIPS64 > bool > default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R6 > + default y if CPU_MIPS64_OCTEON > > config MIPS_TUNE_4KC > bool > @@ -421,6 +460,9 @@ config MIPS_TUNE_34KC > config MIPS_TUNE_74KC > bool > > +config MIPS_TUNE_OCTEON3 > + bool > + > config 32BIT > bool > > @@ -453,6 +495,11 @@ config MIPS_SRAM_INIT > before it can be used. If enabled, a function mips_sram_init() will > be called just before setup_stack_gd. > > +config DMA_ADDR_T_64BIT > + bool > + help > + Select this to enable 64-bit DMA addressing > + > config SYS_DCACHE_SIZE > int > default 0 > diff --git a/arch/mips/Makefile b/arch/mips/Makefile > index af3f227436..fa1ba7855a 100644 > --- a/arch/mips/Makefile > +++ b/arch/mips/Makefile > @@ -1,6 +1,10 @@ > # SPDX-License-Identifier: GPL-2.0+ > > +ifneq ($(CONFIG_ARCH_OCTEON),y) > head-y := arch/mips/cpu/start.o > +else > +head-y := arch/mips/mach-octeon/start.o > +endif > > ifeq ($(CONFIG_SPL_BUILD),y) > ifneq ($(CONFIG_SPL_START_S_PATH),) > @@ -17,6 +21,7 @@ machine-$(CONFIG_ARCH_JZ47XX) += jz47xx > machine-$(CONFIG_MACH_PIC32) += pic32 > machine-$(CONFIG_ARCH_MTMIPS) += mtmips > machine-$(CONFIG_ARCH_MSCC) += mscc > +machine-${CONFIG_ARCH_OCTEON} += octeon > > machdirs := $(patsubst %,arch/mips/mach-%/,$(machine-y)) > libs-y += $(machdirs) > @@ -30,6 +35,7 @@ arch-$(CONFIG_CPU_MIPS32_R6) += -march=mips32r6 -Wa,-mips32r6 > arch-$(CONFIG_CPU_MIPS64_R1) += -march=mips64 -Wa,-mips64 > arch-$(CONFIG_CPU_MIPS64_R2) += -march=mips64r2 -Wa,-mips64r2 > arch-$(CONFIG_CPU_MIPS64_R6) += -march=mips64r6 -Wa,-mips64r6 > +arch-${CONFIG_CPU_MIPS64_OCTEON} += -march=octeon3 > > # Allow extra optimization for specific CPUs/SoCs > tune-$(CONFIG_MIPS_TUNE_4KC) += -mtune=4kc > @@ -37,6 +43,7 @@ tune-$(CONFIG_MIPS_TUNE_14KC) += -mtune=14kc > tune-$(CONFIG_MIPS_TUNE_24KC) += -mtune=24kc > tune-$(CONFIG_MIPS_TUNE_34KC) += -mtune=34kc > tune-$(CONFIG_MIPS_TUNE_74KC) += -mtune=74kc > +tune-${CONFIG_MIPS_TUNE_OCTEON3} += -mtune=octeon3 > > # Include default header files > cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic > diff --git a/arch/mips/cpu/Makefile b/arch/mips/cpu/Makefile > index 6df7bb4e48..732015d6f3 100644 > --- a/arch/mips/cpu/Makefile > +++ b/arch/mips/cpu/Makefile > @@ -1,6 +1,8 @@ > # SPDX-License-Identifier: GPL-2.0+ > > -extra-y = start.o > +ifneq ($(CONFIG_ARCH_OCTEON),y) > +extra-y = start.o > +endif > > obj-y += time.o > obj-y += interrupts.o > diff --git a/arch/mips/include/asm/arch-octeon/cavm-reg.h b/arch/mips/include/asm/arch-octeon/cavm-reg.h > new file mode 100644 > index 0000000000..b961e54956 > --- /dev/null > +++ b/arch/mips/include/asm/arch-octeon/cavm-reg.h > @@ -0,0 +1,42 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Copyright (C) 2020 Marvell International Ltd. > + */ > + > +#ifndef __CAVM_REG_H__ > + > +/* Register offsets */ > +#define CAVM_CIU_FUSE ((u64 *)0x80010100000001a0) > +#define CAVM_MIO_BOOT_REG_CFG0 ((u64 *)0x8001180000000000) > +#define CAVM_RST_BOOT ((u64 *)0x8001180006001600) > + > +/* Register structs */ > + > +/** > + * Register (RSL) rst_boot > + * > + * RST Boot Register > + */ > +union cavm_rst_boot { > + u64 u; > + struct cavm_rst_boot_s { > + u64 chipkill : 1; > + u64 jtcsrdis : 1; > + u64 ejtagdis : 1; > + u64 romen : 1; > + u64 ckill_ppdis : 1; > + u64 jt_tstmode : 1; > + u64 vrm_err : 1; > + u64 reserved_37_56 : 20; > + u64 c_mul : 7; > + u64 pnr_mul : 6; > + u64 reserved_21_23 : 3; > + u64 lboot_oci : 3; > + u64 lboot_ext : 6; > + u64 lboot : 10; > + u64 rboot : 1; > + u64 rboot_pin : 1; > + } s; > +}; > + > +#endif /* __CAVM_REG_H__ */ > diff --git a/arch/mips/include/asm/arch-octeon/clock.h b/arch/mips/include/asm/arch-octeon/clock.h > new file mode 100644 > index 0000000000..a844a222c9 > --- /dev/null > +++ b/arch/mips/include/asm/arch-octeon/clock.h > @@ -0,0 +1,24 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Copyright (C) 2018, 2019 Marvell International Ltd. > + * > + * https://spdx.org/licenses > + */ > + > +#ifndef __CLOCK_H__ > + > +/** System PLL reference clock */ > +#define PLL_REF_CLK 50000000 /* 50 MHz */ > +#define NS_PER_REF_CLK_TICK (1000000000 / PLL_REF_CLK) > + > +/** > + * Returns the I/O clock speed in Hz > + */ > +u64 octeon_get_io_clock(void); > + > +/** > + * Returns the core clock speed in Hz > + */ > +u64 octeon_get_core_clock(void); > + > +#endif /* __CLOCK_H__ */ > diff --git a/arch/mips/mach-octeon/Kconfig b/arch/mips/mach-octeon/Kconfig > new file mode 100644 > index 0000000000..67fcb6058c > --- /dev/null > +++ b/arch/mips/mach-octeon/Kconfig > @@ -0,0 +1,92 @@ > +menu "Octeon platforms" > + depends on ARCH_OCTEON > + > +config SYS_SOC > + string > + default "octeon" > + > +config OCTEON_CN7XXX > + bool "Octeon CN7XXX SoC" > + > +config OCTEON_CN70XX > + bool "Octeon CN70XX SoC" > + select OCTEON_CN7XXX > + > +config OCTEON_CN73XX > + bool "Octeon CN73XX SoC" > + select OCTEON_CN7XXX > + > +config OCTEON_CN78XX > + bool "Octeon CN78XX SoC" > + select OCTEON_CN7XXX > + > +choice > + prompt "Octeon MIPS family select" > + > +config SOC_OCTEON2 > + bool "Octeon II family" > + help > + This selects the Octeon II SoC family this should be added later when needed > + > +config SOC_OCTEON3 > + bool "Octeon III family" > + help > + This selects the Octeon III SoC family CN70xx, CN73XX, CN78xx > + and CNF75XX. > + > +endchoice > + > +config SYS_DCACHE_SIZE > + default 32768 > + > +config SYS_DCACHE_LINE_SIZE > + default 128 > + > +config SYS_ICACHE_SIZE > + default 79872 > + > +config SYS_ICACHE_LINE_SIZE > + default 128 > + > +config OCTEON_BIG_STACK_SIZE > + hex > + default 0x4000 > + help > + This enables a larger stack needed for Octeon 3 DRAM initialization. > + If this is disabled then a part of the L1 cache will be reserved for > + the stack, resulting in a smaller image. If this is true then > + a portion of the TEXT address space will be reserved for the stack. > + Note that this requires that U-Boot MUST be able to fit entirely > + within the L2 cache and cannot be executed from a parallel NOR flash. > + The default size is 16KiB. > + > +config OCTEON_COPY_FROM_FLASH_TO_L2 > + bool > + default y > + help > + Set this for U-Boot to attempt to copy itself from flash memory into > + the L2 cache. This significantly improvess the boot performance. > + > +config OCTEON_L2_MEMCPY_IN_CACHE > + bool > + default y > + help > + If this is set then the memcpy code that is used to copy U-Boot from > + the flash to the L2 cache is written to the L2 cache. This > + significantly speeds up the memcpy operation. > + > +config OCTEON_L2_UBOOT_ADDR > + hex > + default 0xffffffff81000000 > + help > + This specifies the address where U-Boot will be copied into the L2 > + cache. > + > +config OCTEON_L2_MEMCPY_ADDR > + hex > + default 0xffffffff81400000 > + help > + This specifies where U-Boot will place the memcpy routine used for > + copying U-Boot from flash to L2 cache. > + > +endmenu > diff --git a/arch/mips/mach-octeon/Makefile b/arch/mips/mach-octeon/Makefile > new file mode 100644 > index 0000000000..a5fda682a7 > --- /dev/null > +++ b/arch/mips/mach-octeon/Makefile > @@ -0,0 +1,10 @@ > +# (C) Copyright 2019 Marvell, Inc. > +# > +# SPDX-License-Identifier: GPL-2.0+ > +# > + > +extra-y = start.o > + > +obj-y += clock.o > +obj-y += cpu.o > +obj-y += dram.o > diff --git a/arch/mips/mach-octeon/clock.c b/arch/mips/mach-octeon/clock.c > new file mode 100644 > index 0000000000..6e32008641 > --- /dev/null > +++ b/arch/mips/mach-octeon/clock.c > @@ -0,0 +1,22 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2018, 2019 Marvell International Ltd. > + */ > + > +#include <common.h> > +#include <asm/arch/clock.h> > + > +DECLARE_GLOBAL_DATA_PTR; > + > +int octeon_get_timer_freq(void) > +{ > + return gd->cpu_clk; > +} > + > +/** > + * Returns the I/O clock speed in Hz > + */ > +u64 octeon_get_io_clock(void) > +{ > + return gd->bus_clk; > +} > diff --git a/arch/mips/mach-octeon/cpu.c b/arch/mips/mach-octeon/cpu.c > new file mode 100644 > index 0000000000..a1373c6d56 > --- /dev/null > +++ b/arch/mips/mach-octeon/cpu.c > @@ -0,0 +1,55 @@ > +// SPDX-License-Identifier: GPL-2.0+ > +/* > + * Copyright (C) 2020 Marvell International Ltd. > + */ > + > +#include <common.h> > +#include <linux/io.h> > +#include <asm/arch/clock.h> > +#include <asm/arch-octeon/cavm-reg.h> > + > +DECLARE_GLOBAL_DATA_PTR; > + > +static int get_clocks(void) > +{ > + const u64 ref_clock = PLL_REF_CLK; > + union cavm_rst_boot rst_boot; > + > + rst_boot.u = ioread64(CAVM_RST_BOOT); > + gd->cpu_clk = ref_clock * rst_boot.s.c_mul; > + gd->bus_clk = ref_clock * rst_boot.s.pnr_mul; > + > + debug("%s: cpu: %lu, bus: %lu\n", __func__, gd->cpu_clk, gd->bus_clk); > + > + return 0; > +} > + > +/* Early mach init code run from flash */ > +int mach_cpu_init(void) > +{ > + /* Remap boot-bus 0x1fc0.0000 -> 0x1f40.0000 */ > + /* ToDo: Move this to an early running bus (bootbus) DM driver */ > + clrsetbits_be64(CAVM_MIO_BOOT_REG_CFG0, 0xffff, 0x1f40); > + > + /* Get clocks and store them in GD */ > + get_clocks(); > + > + return 0; > +} > + > +/** > + * Returns number of cores > + * > + * @return number of CPU cores for the specified node > + */ > +static int cavm_octeon_num_cores(void) > +{ > + return fls64(ioread64(CAVM_CIU_FUSE) & 0xffffffffffff); > +} > + > +int print_cpuinfo(void) > +{ > + printf("SoC: Octeon CN73xx (%d cores)\n", cavm_octeon_num_cores()); > + > + return 0; > +} > diff --git a/arch/mips/mach-octeon/dram.c b/arch/mips/mach-octeon/dram.c > new file mode 100644 > index 0000000000..c16a73e8e6 > --- /dev/null > +++ b/arch/mips/mach-octeon/dram.c > @@ -0,0 +1,27 @@ > +// SPDX-License-Identifier: GPL-2.0+ > +/* > + * Copyright (C) 2020 Marvell International Ltd. > + */ > + > +#include <common.h> > +#include <dm.h> > +#include <ram.h> > + > +DECLARE_GLOBAL_DATA_PTR; > + > +int dram_init(void) > +{ > + /* > + * No DDR init yet -> run in L2 cache > + */ > + gd->ram_size = (2 << 20); > + gd->bd->bi_dram[0].size = gd->ram_size; > + gd->bd->bi_dram[1].size = 0; > + > + return 0; > +} > + > +ulong board_get_usable_ram_top(ulong total_size) > +{ > + return gd->ram_top; > +} > diff --git a/arch/mips/mach-octeon/include/ioremap.h b/arch/mips/mach-octeon/include/ioremap.h > new file mode 100644 > index 0000000000..59b75008a2 > --- /dev/null > +++ b/arch/mips/mach-octeon/include/ioremap.h > @@ -0,0 +1,30 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef __ASM_MACH_OCTEON_IOREMAP_H > +#define __ASM_MACH_OCTEON_IOREMAP_H > + > +#include <linux/types.h> > + > +/* > + * Allow physical addresses to be fixed up to help peripherals located > + * outside the low 32-bit range -- generic pass-through version. > + */ > +static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, > + phys_addr_t size) > +{ > + return phys_addr; > +} > + > +static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size, > + unsigned long flags) > +{ > + return (void __iomem *)(XKPHYS | offset); > +} > + > +static inline int plat_iounmap(const volatile void __iomem *addr) > +{ > + return 0; > +} > + > +#define _page_cachable_default _CACHE_CACHABLE_NONCOHERENT > + > +#endif /* __ASM_MACH_OCTEON_IOREMAP_H */ > diff --git a/arch/mips/mach-octeon/start.S b/arch/mips/mach-octeon/start.S > new file mode 100644 > index 0000000000..acb967201a > --- /dev/null > +++ b/arch/mips/mach-octeon/start.S > @@ -0,0 +1,1241 @@ > +/* SPDX-License-Identifier: GPL-2.0+ */ > +/* > + * Startup Code for OCTEON 64-bit CPU-core > + * > + * Copyright (c) 2003 Wolfgang Denk <wd at denx.de> > + * Copyright 2004, 2005, 2010 - 2015 Cavium Inc.. > + */ > + > +#include <asm-offsets.h> > +#include <config.h> > +#include <asm/regdef.h> > +#include <asm/mipsregs.h> > +#include <asm/asm.h> > + > +#define BOOT_VECTOR_NUM_WORDS 8 > + > +#define OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET 0x70 > +#define OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET 0x78 > + > +#define OCTEON_BOOT_MOVEABLE_MAGIC1_RAW 0xdb00110ad358eacd > +#define OCTEON_BOOT_MOVEABLE_MAGIC1 OCTEON_BOOT_MOVEABLE_MAGIC1_RAW > + > +#define OCTEON_CIU_SOFT_RST 0x8001070000000740 > + > +#define OCTEON_L2C_WPAR_PP0 0x8001180080840000 > +#define OCTEON_MIO_BOOT_BASE 0x8001180000000000 > +#define OCTEON_MIO_BOOT_REG_CFG0_OFF 0x0000 > +#define OCTEON_MIO_BOOT_LOC_CFG0_OFF 0x0080 > +#define OCTEON_MIO_BOOT_LOC_ADR_OFF 0x0090 > +#define OCTEON_MIO_BOOT_LOC_DAT_OFF 0x0098 > +#define OCTEON_MIO_RST_BOOT 0x8001180000001600 > +#define OCTEON_MIO_BOOT_REG_CFG0 0x8001180000000000 > +#define OCTEON_MIO_BOOT_REG_TIM0 0x8001180000000040 > +#define OCTEON_MIO_BOOT_LOC_CFG0 0x8001180000000080 > +#define OCTEON_MIO_BOOT_LOC_ADR 0x8001180000000090 > +#define OCTEON_MIO_BOOT_LOC_DAT 0x8001180000000098 > +#define OCTEON_MIO_FUSE_DAT3 0x8001180000001418 > +#define OCTEON_L2D_FUS3 0x80011800800007B8 > +#define OCTEON_LMC0_DDR_PLL_CTL 0x8001180088000258 > + > +#define OCTEON_RST 0x8001180006000000 > +#define OCTEON_RST_BOOT_OFFSET 0x1600 > +#define OCTEON_RST_SOFT_RST_OFFSET 0x1680 > +#define OCTEON_RST_COLD_DATAX_OFFSET(X) (0x17C0 + (X) * 8) > +#define OCTEON_RST_BOOT 0x8001180006001600 > +#define OCTEON_RST_SOFT_RST 0x8001180006001680 > +#define OCTEON_RST_COLD_DATAX(X) (0x80011800060017C0 + (X) * 8) > + > +#define OCTEON_OCX_COM_NODE 0x8001180011000000 > +#define OCTEON_L2C_OCI_CTL 0x8001180080800020 > +#define OCTEON_L2C_TAD_CTL 0x8001180080800018 > +#define OCTEON_L2C_CTL 0x8001180080800000 > + > +#define OCTEON_DBG_DATA 0x80011F00000001E8 > +#define OCTEON_PCI_READ_CMD_E 0x80011F0000001188 > +#define OCTEON_NPEI_DBG_DATA 0x80011F0000008510 > +#define OCTEON_CIU_WDOG(X) (0x8001070000000500 + (X) * 8) > +#define OCTEON_CIU_PP_POKE(X) (0x8001070000000580 + (X) * 8) > +#define OCTEON_CIU3_WDOG(X) (0x8001010000020000 + (X) * 8) > +#define OCTEON_CIU3_PP_POKE(X) (0x8001010000030000 + (X) * 8) > +#define OCTEON_OCX_COM_LINKX_CTL(X) (0x8001180011000020 + (X) * 8) > +#define OCTEON_SLI_CTL_STATUS 0x80011F0000028570 > +#define OCTEON_GSERX_SCRATCH(X) (0x8001180090000020 + (X) * 0x1000000) > + > +/** PRID for CN56XX */ > +#define OCTEON_PRID_CN56XX 0x04 > +/** PRID for CN52XX */ > +#define OCTEON_PRID_CN52XX 0x07 > +/** PRID for CN63XX */ > +#define OCTEON_PRID_CN63XX 0x90 > +/** PRID for CN68XX */ > +#define OCTEON_PRID_CN68XX 0x91 > +/** PRID for CN66XX */ > +#define OCTEON_PRID_CN66XX 0x92 > +/** PRID for CN61XX */ > +#define OCTEON_PRID_CN61XX 0x93 > +/** PRID for CNF71XX */ > +#define OCTEON_PRID_CNF71XX 0x94 > +/** PRID for CN78XX */ > +#define OCTEON_PRID_CN78XX 0x95 > +/** PRID for CN70XX */ > +#define OCTEON_PRID_CN70XX 0x96 > +/** PRID for CN73XX */ > +#define OCTEON_PRID_CN73XX 0x97 > +/** PRID for CNF75XX */ > +#define OCTEON_PRID_CNF75XX 0x98 > + > +/* func argument is used to create a mark, must be unique */ > +#define GETOFFSET(reg, func) \ > + .balign 8; \ > + bal func ##_mark; \ > + nop; \ > + .dword .; \ > +func ##_mark: \ > + ld reg, 0(ra); \ > + dsubu reg, ra, reg; > + > +#define JAL(func) \ > + .balign 8; \ > + bal func ##_mark; \ > + nop; \ > + .dword .; \ > +func ##_mark: \ > + ld t8, 0(ra); \ > + dsubu t8, ra, t8; \ > + dla t9, func; \ > + daddu t9, t9, t8; \ > + jalr t9; \ > + nop; > + > + .set arch=octeon3 > + .set noreorder > + > + .macro uhi_mips_exception > + move k0, t9 # preserve t9 in k0 > + move k1, a0 # preserve a0 in k1 > + li t9, 15 # UHI exception operation > + li a0, 0 # Use hard register context > + sdbbp 1 # Invoke UHI operation > + .endm > + > + .macro setup_stack_gd > + li t0, -16 > + PTR_LI t1, big_stack_start > + and sp, t1, t0 # force 16 byte alignment > + PTR_SUBU \ > + sp, sp, GD_SIZE # reserve space for gd > + and sp, sp, t0 # force 16 byte alignment > + move k0, sp # save gd pointer > +#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \ > + !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F) > + li t2, CONFIG_VAL(SYS_MALLOC_F_LEN) > + PTR_SUBU \ > + sp, sp, t2 # reserve space for early malloc > + and sp, sp, t0 # force 16 byte alignment > +#endif > + move fp, sp > + > + /* Clear gd */ > + move t0, k0 > +1: > + PTR_S zero, 0(t0) > + PTR_ADDIU t0, PTRSIZE > + blt t0, t1, 1b > + nop > + > +#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \ > + !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F) > + PTR_S sp, GD_MALLOC_BASE(k0) # gd->malloc_base offset > +#endif > + .endm > + > +/* Saved register usage: > + * s0: not used > + * s1: not used > + * s2: Address U-Boot loaded into in L2 cache > + * s3: Start address > + * s4: flags > + * 1: booting from RAM > + * 2: executing out of cache > + * 4: booting from flash > + * s5: u-boot size (data end - _start) > + * s6: offset in flash. > + * s7: _start physical address > + * s8: > + */ > + > +ENTRY(_start) > + /* U-Boot entry point */ > + b reset > + > + /* The above jump instruction/nop are considered part of the > + * bootloader_header_t structure but are not changed when the header is > + * updated. > + */ > + > + /* Leave room for bootloader_header_t header at start of binary. This > + * header is used to identify the board the bootloader is for, what > + * address it is linked at, failsafe/normal, etc. It also contains a > + * CRC of the entire image. > + */ > + > +#if defined(CONFIG_ROM_EXCEPTION_VECTORS) > + /* > + * Exception vector entry points. When running from ROM, an exception > + * cannot be handled. Halt execution and transfer control to debugger, > + * if one is attached. > + */ > + .org 0x200 > + /* TLB refill, 32 bit task */ > + uhi_mips_exception > + > + .org 0x280 > + /* XTLB refill, 64 bit task */ > + uhi_mips_exception > + > + .org 0x300 > + /* Cache error exception */ > + uhi_mips_exception > + > + .org 0x380 > + /* General exception */ > + uhi_mips_exception > + > + .org 0x400 > + /* Catch interrupt exceptions */ > + uhi_mips_exception > + > + .org 0x480 > + /* EJTAG debug exception */ > +1: b 1b > + nop > + > + .org 0x500 > +#endif > + > +/* Reserve extra space so that when we use the boot bus local memory > + * segment to remap the debug exception vector we don't overwrite > + * anything useful > + */ > + > +/* Basic exception handler (dump registers) in all ASM. When using the TLB for > + * mapping u-boot C code, we can't branch to that C code for exception handling > + * (TLB is disabled for some exceptions. > + */ > + > +/* RESET/start here */ > + .balign 8 > +reset: > + nop > + synci 0(zero) > + mfc0 k0, CP0_STATUS > + ori k0, 0x00E0 /* enable 64 bit mode for CSR access */ > + mtc0 k0, CP0_STATUS > + > + /* Save the address we're booting from, strip off low bits */ > + bal 1f > + nop > +1: > + move s3, ra > + dins s3, zero, 0, 12 > + > + /* Disable boot bus moveable regions */ > + PTR_LI k0, OCTEON_MIO_BOOT_LOC_CFG0 > + sd zero, 0(k0) > + sd zero, 8(k0) > + > + /* Disable the watchdog timer > + * First we check if we're running on CN78XX, CN73XX or CNF75XX to see > + * if we use CIU3 or CIU. > + */ > + mfc0 t0, CP0_PRID > + ext t0, t0, 8, 8 > + /* Assume CIU */ > + PTR_LI t1, OCTEON_CIU_WDOG(0) > + PTR_LI t2, OCTEON_CIU_PP_POKE(0) > + blt t0, OCTEON_PRID_CN78XX, wd_use_ciu > + nop > + beq t0, OCTEON_PRID_CN70XX, wd_use_ciu > + nop > + /* Use CIU3 */ > + PTR_LI t1, OCTEON_CIU3_WDOG(0) > + PTR_LI t2, OCTEON_CIU3_PP_POKE(0) > +wd_use_ciu: > + sd zero, 0(t2) /* Pet the dog */ > + sd zero, 0(t1) /* Disable watchdog timer */ > + > + /* Errata: CN76XX has a node ID of 3. change it to zero here. > + * This needs to be done before we relocate to L2 as addresses change > + * For 76XX pass 1.X we need to zero out the OCX_COM_NODE[ID], > + * L2C_OCI_CTL[GKSEGNODE] and CP0 of Root.CvmMemCtl2[KSEGNODE]. > + */ > + mfc0 a4, CP0_PRID > + /* Check for 78xx pass 1.x processor ID */ > + andi a4, 0xffff > + blt a4, (OCTEON_PRID_CN78XX << 8), 1f > + nop > + > + /* Zero out alternate package for now */ > + dins a4, zero, 6, 1 > + bge a4, ((OCTEON_PRID_CN78XX << 8) | 0x08), 1f > + nop > + > + /* 78xx or 76xx here, first check for bug #27141 */ > + PTR_LI a5, OCTEON_SLI_CTL_STATUS > + ld a6, 0(a5) > + andi a7, a4, 0xff > + andi a6, a6, 0xff > + > + beq a6, a7, not_bug27141 > + nop > + > + /* core 0 proc_id rev_id field does not match SLI_CTL_STATUS rev_id */ > + /* We just hit bug #27141. Need to reset the chip and try again */ > + > + PTR_LI a4, OCTEON_RST_SOFT_RST > + ori a5, zero, 0x1 /* set the reset bit */ > + > +reset_78xx_27141: > + sync > + synci 0(zero) > + cache 9, 0(zero) > + sd a5, 0(a4) > + wait > + b reset_78xx_27141 > + nop > + > +not_bug27141: > + /* 76XX pass 1.x has the node number set to 3 */ > + mfc0 a4, CP0_EBASE > + ext a4, a4, 0, 10 > + bne a4, 0x180, 1f /* Branch if not node 3 core 0 */ > + nop > + > + /* Clear OCX_COM_NODE[ID] */ > + PTR_LI a5, OCTEON_OCX_COM_NODE > + ld a4, 0(a5) > + dins a4, zero, 0, 2 > + sd a4, 0(a5) > + ld zero, 0(a5) > + > + /* Clear L2C_OCI_CTL[GKSEGNODE] */ > + PTR_LI a5, OCTEON_L2C_OCI_CTL > + ld a4, 0(a5) > + dins a4, zero, 4, 2 > + sd a4, 0(a5) > + ld zero, 0(a5) > + > + /* Clear CP0 Root.CvmMemCtl2[KSEGNODE] */ > + dmfc0 a4, CP0_CVMMEMCTL2 > + dins a4, zero, 12, 2 > + dmtc0 a4, CP0_CVMMEMCTL2 > + > + /* Put the flash address in the start of the EBASE register to > + * enable our exception handler but only for core 0. > + */ > + mfc0 a4, CP0_EBASE > + dext a4, a4, 0, 10 > + bnez a4, no_flash > + /* OK in delay slot */ > + dext a6, a6, 0, 16 /* Get the base address in flash */ > + sll a6, a6, 16 > + mtc0 a6, CP0_EBASE /* Enable exceptions */ > + > +no_flash: > + /* Zero out various registers */ > + mtc0 zero, CP0_DEPC > + mtc0 zero, CP0_EPC > + mtc0 zero, CP0_CAUSE > + mfc0 a4, CP0_PRID > + ext a4, a4, 8, 8 > + mtc0 zero, CP0_DESAVE > + > + /* The following are only available on Octeon 2 or later */ > + mtc0 zero, CP0_KSCRATCH1 > + mtc0 zero, CP0_KSCRATCH2 > + mtc0 zero, CP0_KSCRATCH3 > + mtc0 zero, CP0_USERLOCAL > + > + /* Turn off ROMEN bit to disable ROM */ > + PTR_LI a1, OCTEON_MIO_RST_BOOT > + /* For OCTEON 3 we use RST_BOOT instead of MIO_RST_BOOT. > + * The difference is bits 24-26 are 6 instead of 0 for the address. > + */ > + /* For Octeon 2 and CN70XX we can ignore the watchdog */ > + blt a4, OCTEON_PRID_CN78XX, watchdog_ok > + nop > + > + PTR_LI a1, OCTEON_RST_BOOT > + > + beq a4, OCTEON_PRID_CN70XX, watchdog_ok > + nop > + > + ld a2, 0(a1) > + /* There is a bug where some registers don't get properly reset when > + * the watchdog timer causes a reset. In this case we need to force > + * a reset. > + */ > + bbit0 a2, 11, watchdog_ok /* Skip if watchdog not hit */ > + dins a2, zero, 2, 18 /* Don't clear LBOOT, LBOOT_EXT or LBOOT_OCI */ > + /* Clear bit indicating reset due to watchdog */ > + ori a2, 1 << 11 > + sd a2, 0(a1) > + > + /* Disable watchdog */ > + PTR_LI a1, OCTEON_CIU3_PP_POKE(0) > + sd zero, 0(a1) > + PTR_LI a1, OCTEON_CIU3_WDOG(0) > + sd zero, 0(a1) > + > + /* Record this in the GSER0_SCRATCH register in bit 11 */ > + PTR_LI a1, OCTEON_GSERX_SCRATCH(0) > + ld a2, 0(a1) > + ori a2, 1 << 11 > + sd a2, 0(a1) > + > + PTR_LI a1, OCTEON_RST_SOFT_RST > + li a2, 1 > + sd a2, 0(a1) > + wait > + > + /* We should never get here */ > + > +watchdog_ok: > + ld a2, 0(a1) > + /* Don't clear LBOOT/LBOOT_EXT or LBOOT_OCI */ > + dins a2, zero, 2, 18 > + dins a2, zero, 60, 1 /* Clear ROMEN bit */ > + sd a2, 0(a1) > + > + /* Start of Octeon setup */ > + > + /* Check what core we are - if core 0, branch to init tlb > + * loop in flash. Otherwise, look up address of init tlb > + * loop that was saved in the boot vector block. > + */ > + mfc0 a0, CP0_EBASE > + andi a0, EBASE_CPUNUM /* get core */ > + beqz a0, InitTLBStart_local > + nop > + > + break > + /* We should never get here - non-zero cores now go directly to > + * tlb init from the boot stub in movable region. > + */ > + > + .globl InitTLBStart > +InitTLBStart: > +InitTLBStart_local: > + /* If we don't have working memory yet configure a bunch of > + * scratch memory, and set the stack pointer to the top > + * of it. This allows us to go to C code without having > + * memory set up > + * > + * Warning: do not change SCRATCH_STACK_LINES as this can impact the > + * transition from start.S to crti.asm. crti requires 590 bytes of > + * stack space. > + */ > + cache 1,0(zero) /* Clear Dcache so cvmseg works right */ > +#if CONFIG_OCTEON_BIG_STACK_SIZE > + rdhwr v0, $0 > + bnez v0, 1f > + nop > + PTR_LA sp, big_stack_start - 16 > + b stack_clear_done > + nop > +1: > +#endif > +#define SCRATCH_STACK_LINES 0x36 /* MAX is 0x36 */ > + dmfc0 v0, CP0_CVMMEMCTL > + dins v0, zero, 0, 9 > + /* setup SCRATCH_STACK_LINES scratch lines of scratch */ > + ori v0, 0x100 | SCRATCH_STACK_LINES > + dmtc0 v0, CP0_CVMMEMCTL > + /* set stack to top of scratch memory */ > + li sp, 0xffffffffffff8000 + (SCRATCH_STACK_LINES * 128) > + /* Clear scratch for CN63XX pass 2.0 errata Core-15169*/ > + li t0, 0xffffffffffff8000 > +clear_scratch: > + sd zero, 0(t0) > + addiu t0, 8 > + bne t0, sp, clear_scratch > + nop > + > + /* This code run on all cores - core 0 from flash, > + * the rest from DRAM. When booting from PCI, non-zero cores > + * come directly here from the boot vector - no earlier code in this > + * file is executed. > + */ > + > + /* Some generic initialization is done here as well, as we need this > + * done on all cores even when booting from PCI > + */ > +stack_clear_done: > + /* Clear watch registers. */ > + mtc0 zero, CP0_WATCHLO > + mtc0 zero, CP0_WATCHHI > + > + /* STATUS register */ > + mfc0 k0, CP0_STATUS > + li k1, ~ST0_IE > + and k0, k1 > + mtc0 k0, CP0_STATUS > + > + /* CAUSE register */ > + mtc0 zero, CP0_CAUSE > + > + /* Init Timer */ > + dmtc0 zero, CP0_COUNT > + dmtc0 zero, CP0_COMPARE > + > + > + mfc0 a5, CP0_STATUS > + li v0, 0xE0 /* enable 64 bit mode for CSR access */ > + or v0, v0, a5 > + mtc0 v0, CP0_STATUS > + > + > + dli v0, 1 << 29 /* Enable large physical address support in TLB */ > + mtc0 v0, CP0_PAGEGRAIN > + > +InitTLB: > + dmtc0 zero, CP0_ENTRYLO0 > + dmtc0 zero, CP0_ENTRYLO1 > + mtc0 zero, CP0_PAGEMASK > + dmtc0 zero, CP0_CONTEXT > + /* Use an offset into kseg0 so we won't conflict with Mips1 legacy > + * TLB clearing > + */ > + PTR_LI v0, 0xFFFFFFFF90000000 > + mfc0 a0, CP0_CONFIG1 > + srl a0, a0, 25 > + /* Check if config4 reg present */ > + mfc0 a1, CP0_CONFIG3 > + bbit0 a1, 31, 2f > + and a0, a0, 0x3F /* a0 now has the max mmu entry index */ > + mfc0 a1, CP0_CONFIG4 > + bbit0 a1, 14, 2f /* check config4[MMUExtDef] */ > + nop > + /* append config4[MMUSizeExt] to most significant bit of > + * config1[MMUSize-1] > + */ > + ins a0, a1, 6, 8 > + and a0, a0, 0x3fff /* a0 now includes max entries for cn6xxx */ > +2: > + dmtc0 zero, CP0_XCONTEXT > + mtc0 zero, CP0_WIRED > + > +InitTLBloop: > + dmtc0 v0, CP0_ENTRYHI > + tlbp > + mfc0 v1, CP0_INDEX > + daddiu v0, v0, 1<<13 > + bgez v1, InitTLBloop > + > + mtc0 a0, CP0_INDEX > + tlbwi > + bnez a0, InitTLBloop > + daddiu a0, -1 > + > + mthi zero > + mtlo zero > + > + /* Set up status register */ > + mfc0 v0, CP0_STATUS > + /* Enable COP0 and COP2 access */ > + li a4, (1 << 28) | (1 << 30) > + or v0, a4 > + > + /* Must leave BEV set here, as DRAM is not configured for core 0. > + * Also, BEV must be 1 later on when the exception base address is set. > + */ > + > + /* Mask all interrupts */ > + ins v0, zero, 0, 16 > + /* Clear NMI (used to start cores other than core 0) */ > + ori v0, 0xE4 /* enable 64 bit, disable interrupts */ > + mtc0 v0, CP0_STATUS > + > + dli v0,0xE000000F /* enable all readhw locations */ > + mtc0 v0, CP0_HWRENA > + > + dmfc0 v0, CP0_CVMCTL > + ori v0, 1<<14 /* enable fixup of unaligned mem access */ > + dmtc0 v0, CP0_CVMCTL > + > + /* Setup scratch memory. This is also done in > + * cvmx_user_app_init, and this code will be removed > + * from the bootloader in the near future. > + */ > + > + /* Set L2C_LAD_CTL[MAXLFB] = 0 on CN73XX */ > + mfc0 a4, CP0_PRID > + ext a4, a4, 8, 8 > + blt a4, OCTEON_PRID_CN73XX, 72f > + nop > + PTR_LI v0, OCTEON_L2C_TAD_CTL > + ld t1, 0(v0) > + dins t1, zero, 0, 4 > + sd t1, 0(v0) > + ld zero, 0(v0) > + > +72: > + > + /* clear these to avoid immediate interrupt in noperf mode */ > + dmtc0 zero, CP0_COMPARE /* clear timer interrupt */ > + dmtc0 zero, CP0_COUNT /* clear timer interrupt */ > + dmtc0 zero, CP0_PERF_CNT0 /* clear perfCnt0 */ > + dmtc0 zero, CP0_PERF_CNT1 /* clear perfCnt1 */ > + dmtc0 zero, CP0_PERF_CNT2 > + dmtc0 zero, CP0_PERF_CNT3 > + > + /* If we're running on a node other than 0 then we need to set KSEGNODE > + * to 0. The nice thing with this code is that it also autodetects if > + * we're running on a processor that supports CVMMEMCTL2 or not since > + * only processors that have this will have a non-zero node ID. Because > + * of this there's no need to check if we're running on a 78XX. > + */ > + mfc0 t1, CP0_EBASE > + dext t1, t1, 7, 3 /* Extract node number */ > + beqz t1, is_node0 /* If non-zero then we're not node 0 */ > + nop > + dmfc0 t1, CP0_CVMMEMCTL2 > + dins t1, zero, 12, 4 > + dmtc0 t1, CP0_CVMMEMCTL2 > +is_node0: > + > + /* Set up TLB mappings for u-boot code in flash. */ > + > + /* Use a bal to get the current PC into ra. Since this bal is to > + * the address immediately following the delay slot, the ra is > + * the address of the label. We then use this to get the actual > + * address that we are executing from. > + */ > + bal __dummy > + nop > + > +__dummy: > + /* Get the actual address that we are running at */ > + PTR_LA a6, _start /* Linked address of _start */ > + PTR_LA a7, __dummy > + dsubu t0, a7, a6 /* offset of __dummy label from _start*/ > + dsubu a7, ra, t0 /* a7 now has actual address of _start*/ > + > + /* Save actual _start address in s7. This is where we > + * are executing from, as opposed to where the code is > + * linked. > + */ > + move s7, a7 > + move s4, zero > + > + /* s7 has actual address of _start. If this is > + * on the boot bus, it will be between 0xBFC000000 and 0xBFFFFFFF. > + * If it is on the boot bus, use 0xBFC00000 as the physical address > + * for the TLB mapping, as we will be adjusting the boot bus > + * to make this adjustment. > + * If we are running from DRAM (remote-boot), then we want to use the > + * real address in DRAM. > + */ > + > + /* Check to see if we are running from flash - we expect that to > + * be 0xffffffffb0000000-0xffffffffbfffffff > + * (0x10000000-0x1fffffff, unmapped/uncached) > + */ > + dli t2, 0xffffffffb0000000 > + dsubu t2, s7 > + slt s4, s7, t2 > + bltz t2, uboot_in_flash > + nop > + > + /* If we're not core 0 then we don't care about cache */ > + mfc0 t2, CP0_EBASE > + andi t2, EBASE_CPUNUM > + bnez t2, uboot_in_ram > + nop > + > + /* Find out if we're OCTEON I or OCTEON + which don't support running > + * out of cache. > + */ > + mfc0 t2, CP0_PRID > + ext t2, t2, 8, 8 > + li s4, 1 > + blt t2, 0x90, uboot_in_ram > + nop > + > + /* U-Boot can be executing either in RAM or L2 cache. Now we need to > + * check if DRAM is initialized. The way we do that is to look at > + * the reset bit of the LMC0_DDR_PLL_CTL register (bit 7) > + */ > + PTR_LI t2, OCTEON_LMC0_DDR_PLL_CTL > + ld t2, 0(t2) > + bbit1 t2, 7, uboot_in_ram > + nop > + > + /* We must be executing out of cache */ > + b uboot_in_ram > + li s4, 2 > + > +uboot_in_flash: > + /* Set s4 to 4 to indicate we're running in FLASH */ > + li s4, 4 > + > +#if defined(CONFIG_OCTEON_DISABLE_L2_CACHE_INDEX_ALIASING) > + /* By default, L2C index aliasing is enabled. In some cases it may > + * need to be disabled. The L2C index aliasing can only be disabled > + * if U-Boot is running out of L2 cache and the L2 cache has not been > + * used to store anything. > + */ > + PTR_LI t1, OCTEON_L2C_CTL > + ld t2, 0(t1) > + ori t2, 1 > + sd t2, 0(t1) > +#endif > + > + /* Use BFC00000 as physical address for TLB mappings when booting > + * from flash, as we will adjust the boot bus mappings to make this > + * mapping correct. > + */ > + dli a7, 0xFFFFFFFFBFC00000 > + dsubu s6, s7, a7 /* Save flash offset in s6 */ > + > +#if defined(CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2) > + /* For OCTEON II we check to see if the L2 cache is big enough to hold > + * U-Boot. If it is big enough then we copy ourself from flash to the > + * L2 cache in order to speed up execution. > + */ > + > + /* Check for OCTEON 2 */ > + mfc0 t1, CP0_PRID > + ext t1, t1, 8, 8 > + /* Get number of L2 cache sets */ > + beq t1, OCTEON_PRID_CNF71XX, got_l2_sets /* CNF71XX */ > + li t2, 1 << 9 > + beq t1, OCTEON_PRID_CN78XX, got_l2_sets /* CN78XX */ > + li t2, 1 << 13 > + beq t1, OCTEON_PRID_CN70XX, got_l2_sets /* CN70XX */ > + li t2, 1 << 10 > + beq t1, OCTEON_PRID_CN73XX, got_l2_sets /* CN73XX */ > + li t2, 1 << 11 > + beq t1, OCTEON_PRID_CNF75XX, got_l2_sets /* CNF75XX */ > + li t2, 1 << 11 > + b l2_cache_too_small /* Unknown OCTEON model */ > + nop > + > +got_l2_sets: > + /* Get number of associations */ > + PTR_LI t0, OCTEON_MIO_FUSE_DAT3 > + ld t0, 0(t0) > + dext t0, t0, 32, 3 > + > + beq t1, OCTEON_PRID_CN70XX, process_70xx_l2sets > + nop > + /* 0 = 16-way, 1 = 12-way, 2 = 8-way, 3 = 4-way, 4-7 reserved */ > + beqz t0, got_l2_ways > + li t3, 16 > + beq t0, 1, got_l2_ways > + li t3, 12 > + beq t0, 2, got_l2_ways > + li t3, 8 > + beq t0, 3, got_l2_ways > + li t3, 4 > + b l2_cache_too_small > + nop > + > +process_70xx_l2sets: > + /* For 70XX, the number of ways is defined as: > + * 0 - full cache (4-way) 512K > + * 1 - 3/4 ways (3-way) 384K > + * 2 - 1/2 ways (2-way) 256K > + * 3 - 1/4 ways (1-way) 128K > + * 4-7 illegal (aliased to 0-3) > + */ > + andi t0, 3 > + beqz t0, got_l2_ways > + li t3, 4 > + beq t0, 1, got_l2_ways > + li t3, 3 > + beq t0, 2, got_l2_ways > + li t3, 2 > + li t3, 1 > + > +got_l2_ways: > + dmul a1, t2, t3 /* Calculate cache size */ > + dsll a1, 7 /* Ways * Sets * cache line sz (128) */ > + daddiu a1, a1, -128 /* Adjust cache size for copy code */ > + > + /* Calculate size of U-Boot image */ > + /* > + * "uboot_end - _start" is not correct, as the image also > + * includes the DTB appended to the end (OF_EMBED is deprecated). > + * Lets use a defined max for now here. > + */ > + PTR_LI s5, CONFIG_BOARD_SIZE_LIMIT > + > + daddu t2, s5, s7 /* t2 = end address */ > + daddiu t2, t2, 127 > + ins t2, zero, 0, 7 /* Round up to cache line for memcpy */ > + > + slt t1, a1, s5 /* See if we're bigger than the L2 cache */ > + bnez t1, l2_cache_too_small > + nop > + /* Address we plan to load at in the L2 cache */ > + PTR_LI t9, CONFIG_OCTEON_L2_UBOOT_ADDR > +# ifdef CONFIG_OCTEON_L2_MEMCPY_IN_CACHE > + /* Enable all ways for PP0. Authentik ROM may have disabled these */ > + PTR_LI a1, OCTEON_L2C_WPAR_PP0 > + sd zero, 0(a1) > + > + /* Address to place our memcpy code */ > + PTR_LI a0, CONFIG_OCTEON_L2_MEMCPY_ADDR > + /* The following code writes a simple memcpy routine into the cache > + * to copy ourself from flash into the L2 cache. This makes the > + * memcpy routine a lot faster since each instruction can potentially > + * require four read cycles to flash over the boot bus. > + */ > + /* Zero cache line in the L2 cache */ > + zcb (a0) > + synci 0(zero) > + dli a1, 0xdd840000dd850008 /* ld a0, 0(t0); ld a1, 8(t0) */ > + sd a1, 0(a0) > + dli a1, 0xdd860010dd870018 /* ld a2, 16(t0); ld a3, 24(t0) */ > + sd a1, 8(a0) > + dli a1, 0xfda40000fda50008 /* sd a0, 0(t1); sd a1, 8(t1) */ > + sd a1, 16(a0) > + dli a1, 0xfda60010fda70018 /* sd a2, 16(t1); sd a3, 24(t1) */ > + sd a1, 24(a0) > + dli a1, 0x258c0020158efff6 /* addiu t0, 32; bne t0, t2, -40 */ > + sd a1, 32(a0) > + dli a1, 0x25ad002003e00008 /* addiu t1, 32; jr ra */ > + sd a1, 40(a0) > + sd zero, 48(a0) /* nop; nop */ > + > + /* Synchronize the caches */ > + sync > + synci 0(zero) > + > + move t0, s7 > + move t1, t9 > + > + /* Do the memcpy operation in L2 cache to copy ourself from flash > + * to the L2 cache. > + */ > + jalr a0 > + nop > + > +# else > + /* Copy ourself to the L2 cache from flash, 32 bytes at a time */ > + /* This code is now written to the L2 cache using the code above */ > +1: > + ld a0, 0(t0) > + ld a1, 8(t0) > + ld a2, 16(t0) > + ld a3, 24(t0) > + sd a0, 0(t1) > + sd a1, 8(t1) > + sd a2, 16(t1) > + sd a3, 24(t1) > + addiu t0, 32 > + bne t0, t2, 1b > + addiu t1, 32 > +# endif /* CONFIG_OCTEON_L2_MEMCPY_IN_CACHE */ > + > + /* Adjust the start address of U-Boot and the global pointer */ > + subu t0, s7, t9 /* t0 = address difference */ > + move s7, t9 /* Update physical address */ > + move s2, t9 > + sync > + synci 0(zero) > + > + /* Now we branch to the L2 cache. We first get our PC then adjust it > + */ > + bal 3f > + nop > +3: > + /* Don't add any instructions here! */ > + subu t9, ra, t0 > + /* Give ourself 16 bytes */ > + addiu t9, 0x10 > + > + jal t9 /* Branch to address in L2 cache */ > + > + nop > + nop > + /* Add instructions after here */ > + > + move a7, s7 > + > + b uboot_in_ram > + ori s4, 2 /* Running out of L2 cache */ > + > +l2_cache_too_small: /* We go here if we can't copy ourself to L2 */ > +#endif /* CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2 */ > + > + /* This code is only executed if booting from flash. */ > + /* For flash boot (_not_ RAM boot), we do a workaround for > + * an LLM errata on CN38XX and CN58XX parts. > + */ > + > +uboot_in_ram: > + /* U-boot address is now in reg a7, and is 4 MByte aligned. > + * (boot bus addressing has been adjusted to make this happen for flash, > + * and for DRAM this alignment must be provided by the remote boot > + * utility. > + */ > + /* See if we're in KSEG0 range, if so set EBASE register to handle > + * exceptions. > + */ > + dli a1, 0x20000000 > + bge a7, a1, 1f > + nop > + /* Convert our physical address to KSEG0 */ > + PTR_LI a1, 0xffffffff80000000 > + or a1, a1, a7 > + mtc0 a1, CP0_EBASE > +1: > + /* U-boot now starts at 0xBFC00000. Use a single 4 MByte TLB mapping > + * to map u-boot. > + */ > + move a0, a6 /* Virtual addr in a0 */ > + dins a0, zero, 0, 16 /* Zero out offset bits */ > + move a1, a7 /* Physical addr in a1 */ > + > + /* Now we need to remove the MIPS address space bits. For this we > + * need to determine if it is a 32 bit compatibility address or not. > + */ > + > + /* 'lowest' address in compatibility space */ > + PTR_LI t0, 0xffffffff80000000 > + dsubu t0, t0, a1 > + bltz t0, compat_space > + nop > + > + /* We have a xkphys address, so strip off top bit */ > + b addr_fixup_done > + dins a1, zero, 63, 1 > + > +compat_space: > + PTR_LI a2, 0x1fffffff > + and a1, a1, a2 /* Mask phy addr to remove address space bits */ > + > +addr_fixup_done: > + /* Currenty the u-boot image size is limited to 4 MBytes. In order to > + * support larger images the flash mapping will need to be changed to > + * be able to access more than that before C code is run. Until that > + * is done, we just use a 4 MByte mapping for the secondary cores as > + * well. > + */ > + /* page size (only support 4 Meg binary size for now for core 0) > + * This limitation is due to the fact that the boot vector is > + * 0xBFC00000 which only makes 4MB available. Later more flash > + * address space will be available after U-Boot has been copied to > + * RAM. For now assume that it is in flash. > + */ > + li a2, 2*1024*1024 > + > + mfc0 a4, CP0_EBASE > + andi a4, EBASE_CPUNUM /* get core */ > + beqz a4, core_0_tlb > + nop > + > + /* Now determine how big a mapping to use for secondary cores, > + * which need to map all of u-boot + heap in DRAM > + */ > + /* Here we look at the alignment of the the physical address, > + * and use the largest page size possible. In some cases > + * this can result in an oversize mapping, but for secondary cores > + * this mapping is very short lived. > + */ > + > + /* Physical address in a1 */ > + li a2, 1 > +1: > + sll a2, 1 > + and a5, a1, a2 > + beqz a5, 1b > + nop > + > + /* a2 now contains largest page size we can use */ > +core_0_tlb: > + JAL(single_tlb_setup) > + > + /* Check if we're running from cache */ > + bbit1 s4, 1, uboot_in_cache > + nop > + > + /* If we are already running from ram, we don't need to muck > + * with boot bus mappings. > + */ > + PTR_LI t2, 0xffffffffb0000000 > + dsubu t2, s7 > + /* See if our starting address is lower than the boot bus */ > + bgez t2, uboot_in_ram2 /* If yes, booting from RAM */ > + nop > + > +uboot_in_cache: > +#if CONFIG_OCTEON_BIG_STACK_SIZE > + /* The large stack is only for core 0. For all other cores we need to > + * use the L1 cache otherwise the other cores will stomp on top of each > + * other unless even more space is reserved for the stack space for > + * each core. With potentially 96 cores this gets excessive. > + */ > + mfc0 v0, CP0_EBASE > + andi a0, EBASE_CPUNUM > + bnez a0, no_big_stack > + nop > + PTR_LA sp, big_stack_start > + daddiu sp, -16 > + > +no_big_stack: > +#endif > + /* We now have the TLB set up, so we need to remap the boot bus. > + * This is tricky, as we are running from flash, and will be changing > + * the addressing of the flash. > + */ > + /* Enable movable boot bus region 0, at address 0x10000000 */ > + PTR_LI a4, OCTEON_MIO_BOOT_BASE > + dli a5, 0x81000000 /* EN + base address 0x11000000 */ > + sd a5, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4) > + > + /* Copy code to that remaps the boot bus to movable region */ > + sd zero, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) > + > + PTR_LA a6, change_boot_mappings > + GETOFFSET(a5, change_boot_mappings); > + daddu a5, a5, a6 > + > + /* The code is 16 bytes (2 DWORDS) */ > + ld a7, 0(a5) > + sd a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) > + ld a7, 8(a5) > + sd a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) > + > + /* Read from an RML register to ensure that the previous writes have > + * completed before we branch to the movable region. > + */ > + ld zero, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4) > + > + /* Compute value for boot bus configuration register */ > + /* Read region 0 config so we can _modify_ the base address field */ > + PTR_LI a4, OCTEON_MIO_BOOT_REG_CFG0 /* region 0 config */ > + ld a0, 0(a4) > + dli a4, 0xf0000000 /* Mask off bits we want to save */ > + and a4, a4, a0 > + dli a0, 0x0fff0000 /* Force size to max */ > + or a4, a4, a0 > + > + move a5, s6 > + /* Convert to 64k blocks, as used by boot bus config */ > + srl a5, 16 > + li a6, 0x1fc0 /* 'normal' boot bus base config value */ > + subu a6, a6, a5 /* Subtract offset */ > + /* combine into register value to pass to boot bus routine */ > + or a0, a4, a6 > + > + /* Branch there */ > + PTR_LA a1, __mapped_continue_label > + PTR_LI a2, OCTEON_MIO_BOOT_REG_CFG0 > + /* If region 0 is not enabled we can skip it */ > + ld a4, 0(a2) > + bbit0 a4, 31, __mapped_continue_label > + nop > + li a4, 0x10000000 > + j a4 > + synci 0(zero) > + > + /* We never get here, as we go directly to __mapped_continue_label */ > + break > + > + > +uboot_in_ram2: > + > + /* Now jump to address in TLB mapped memory to continue execution */ > + PTR_LA a4, __mapped_continue_label > + synci 0(a4) > + j a4 > + nop > + > +__mapped_continue_label: > + /* Check if we are core 0, if we are not then we need > + * to vector to code in DRAM to do application setup, and > + * skip the rest of the bootloader. Only core 0 runs the bootloader > + * and sets up the tables that the other cores will use for > + * configuration. > + */ > + mfc0 a0, CP0_EBASE > + andi a0, EBASE_CPUNUM /* get core */ > + /* if (__all_cores_are_equal==0 && core==0), > + * then jump to execute BL on core 0; else 'go to next line' > + * (core_0_cont1 is executed ONLY when k0=a0=0(core0_ID)) > + */ > + lw t0, __all_cores_are_equal > + beq a0, t0, core_0_cont1 > + nop > + > + /* other cores look up addr from dram */ > + /* DRAM controller already set up by first core */ > + li a1, (BOOT_VECTOR_NUM_WORDS * 4) > + mul a0, a0, a1 > + > + /* Now find out the boot vector base address from the moveable boot > + * bus region. > + */ > + > + /* Get the address of the boot bus moveable region */ > + PTR_LI t8, OCTEON_MIO_BOOT_BASE > + ld t9, OCTEON_MIO_BOOT_LOC_CFG0_OFF(t8) > + /* Make sure it's enabled */ > + bbit0 t9, 31, invalid_boot_vector > + dext t9, t9, 3, 24 > + dsll t9, t9, 7 > + /* Make address XKPHYS */ > + li t0, 1 > + dins t9, t0, 63, 1 > + > + ld t0, OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET(t9) > + dli t1, OCTEON_BOOT_MOVEABLE_MAGIC1 > + bne t0, t1, invalid_boot_vector > + nop > + > + /* Load base address of boot vector table */ > + ld t0, OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET(t9) > + /* Add offset for core */ > + daddu a1, t0, a0 > + > + mfc0 v0, CP0_STATUS > + move v1, v0 > + ins v1, zero, 19, 1 /* Clear NMI bit */ > + mtc0 v1, CP0_STATUS > + > + /* Get app start function address */ > + lw t9, 8(a1) > + beqz t9, invalid_boot_vector > + nop > + > + j t9 > + lw k0, 12(a1) /* Load global data (deprecated) */ > + > +invalid_boot_vector: > + wait > + b invalid_boot_vector > + nop > + > +__all_cores_are_equal: > + /* The following .word tell if 'all_cores_are_equal' or core0 is special > + * By default (for the first execution) the core0 should be special, > + * in order to behave like the old(existing not-modified) bootloader > + * and run the bootloader on core 0 to follow the existing design. > + * However after that we make 'all_cores_equal' which allows to run SE > + * applications on core0 like on any other core. NOTE that value written > + * to '__all_cores_are_equal' should not match any core ID. > + */ > + .word 0 > + > +core_0_cont1: > + li t0, 0xffffffff > + sw t0, __all_cores_are_equal > + /* From here on, only core 0 runs, other cores have branched > + * away. > + */ > +#ifdef CONFIG_MIPS_INIT_STACK_IN_SRAM > + /* Set up initial stack and global data */ > + setup_stack_gd > +# ifdef CONFIG_DEBUG_UART > + PTR_LA t9, debug_uart_init > + jalr t9 > + nop > +# endif > +#endif > + move a0, zero # a0 <-- boot_flags = 0 > + PTR_LA t9, board_init_f > + > + jr t9 > + move ra, zero > + END(_start) > + > + .balign 8 > + .globl single_tlb_setup > + .ent single_tlb_setup > + /* Sets up a single TLB entry. Virtual/physical addresses > + * must be properly aligned. > + * a0 Virtual address > + * a1 Physical address > + * a2 page (_not_ mapping) size > + */ > +single_tlb_setup: > + /* Determine the number of TLB entries available, and > + * use the top one. > + */ > + mfc0 a3, CP0_CONFIG1 > + dext a3, a3, 25, 6 /* a3 now has the max mmu entry index */ > + mfc0 a5, CP0_CONFIG3 /* Check if config4 reg present */ > + bbit0 a5, 31, single_tlb_setup_cont > + nop > + mfc0 a5, CP0_CONFIG4 > + bbit0 a5, 14, single_tlb_setup_cont /* check config4[MMUExtDef] */ > + nop > + /* append config4[MMUSizeExt] to most significant bit of > + * config1[MMUSize-1] > + */ > + dins a3, a5, 6, 8 > + and a3, a3, 0x3fff /* a3 now includes max entries for cn6xxx */ > + > +single_tlb_setup_cont: > + > + /* Format physical address for entry low */ > + nop > + dsrl a1, a1, 12 > + dsll a1, a1, 6 > + ori a1, a1, 0x7 /* set DVG bits */ > + > + move a4, a2 > + daddu a5, a4, a4 /* mapping size */ > + dsll a6, a4, 1 > + daddiu a6, a6, -1 /* pagemask */ > + dsrl a4, a4, 6 /* adjust for adding with entrylo */ > + > + /* Now set up mapping */ > + mtc0 a6, CP0_PAGEMASK > + mtc0 a3, CP0_INDEX > + > + dmtc0 a1, CP0_ENTRYLO0 > + daddu a1, a1, a4 > + > + dmtc0 a1, CP0_ENTRYLO1 > + daddu a1, a1, a4 > + > + dmtc0 a0, CP0_ENTRYHI > + daddu a0, a0, a5 > + > + ehb > + tlbwi > + jr ra > + nop > + .end single_tlb_setup > + > + > +/** > + * This code is moved to a movable boot bus region, > + * and it is responsible for changing the flash mappings and > + * jumping to run from the TLB mapped address. > + * > + * @param a0 New address for boot bus region 0 > + * @param a1 Address to branch to afterwards > + * @param a2 Address of MIO_BOOT_REG_CFG0 > + */ > + .balign 8 > +change_boot_mappings: > + sd a0, 0(a2) > + sync > + j a1 /* Jump to new TLB mapped location */ > + synci 0(zero) > + > +/* If we need a large stack, allocate it here. */ > +#if CONFIG_OCTEON_BIG_STACK_SIZE > + /* Allocate the stack here so it's in L2 cache or DRAM */ > + .balign 16 > +big_stack_end: > + .skip CONFIG_OCTEON_BIG_STACK_SIZE, 0 > +big_stack_start: > + .dword 0 > +#endif >
Am 02.05.20 um 10:59 schrieb Stefan Roese: > From: Aaron Williams <awilliams at marvell.com> > > This patch adds very basic support for the Octeon III SoCs. Only > CFI parallel NOR flash and UART is supported for now. > > Please note that the basic Octeon port does not include the DDR3/4 > initialization yet. This will be added in some follow-up patches > later. To still use U-Boot on with this port, the L2 cache (4MiB on > Octeon III CN73xx) is used as RAM. This way, U-Boot can boot to the > prompt on such boards. > > Signed-off-by: Aaron Williams <awilliams at marvell.com> > Signed-off-by: Stefan Roese <sr at denx.de> > --- > > MAINTAINERS | 6 + > arch/Kconfig | 1 + > arch/mips/Kconfig | 49 +- > arch/mips/Makefile | 7 + > arch/mips/cpu/Makefile | 4 +- > arch/mips/include/asm/arch-octeon/cavm-reg.h | 42 + > arch/mips/include/asm/arch-octeon/clock.h | 24 + > arch/mips/mach-octeon/Kconfig | 92 ++ > arch/mips/mach-octeon/Makefile | 10 + > arch/mips/mach-octeon/clock.c | 22 + > arch/mips/mach-octeon/cpu.c | 55 + > arch/mips/mach-octeon/dram.c | 27 + > arch/mips/mach-octeon/include/ioremap.h | 30 + > arch/mips/mach-octeon/start.S | 1241 ++++++++++++++++++ > 14 files changed, 1608 insertions(+), 2 deletions(-) > create mode 100644 arch/mips/include/asm/arch-octeon/cavm-reg.h > create mode 100644 arch/mips/include/asm/arch-octeon/clock.h > create mode 100644 arch/mips/mach-octeon/Kconfig > create mode 100644 arch/mips/mach-octeon/Makefile > create mode 100644 arch/mips/mach-octeon/clock.c > create mode 100644 arch/mips/mach-octeon/cpu.c > create mode 100644 arch/mips/mach-octeon/dram.c > create mode 100644 arch/mips/mach-octeon/include/ioremap.h > create mode 100644 arch/mips/mach-octeon/start.S > I couldn't completely understand the start.S. There is too much stuff in it for an initial merge. But I don't see a hard reason against using the generic start.S. So the first patch series should only implement the bare minimum needed to boot from flash, init the boot CPU core, maybe suspend all other cores and relocate to L2 cache. I know the current start.S is not really suited yet but I'm working on a refactoring to add some more hooks which a SoC/CPU can implement. Once we have your initial patch series and the refactoring in mainline, it should be possible to gradually add more Octeon stuff like memory init. Basic idea for refactoring is something like this: reset: - mips_cpu_early_init() # custom early init, fix errata - init CP0 registers, Watch registers - mips_cache_disable() # set K0 CCA to uncached - mips_cpu_core_init() # per CPU core init # -> generic code issues wait instr. # -> custom code can do custom init # or custom boot protocols - mips_cm_map() # init CM if available - mips_cache_init() # init caches, set K0 CCA to non-coh. - mips_sram_init() # init SRAM, Scratch RAM if avail - setup initial stack and global_data - debug_uart_init() - mips_mem_init() # init external memory, C env avail. - init malloc_f - board_init_f() > + > +#endif /* __ASM_MACH_OCTEON_IOREMAP_H */ > diff --git a/arch/mips/mach-octeon/start.S b/arch/mips/mach-octeon/start.S > new file mode 100644 > index 0000000000..acb967201a > --- /dev/null > +++ b/arch/mips/mach-octeon/start.S > @@ -0,0 +1,1241 @@ > +/* SPDX-License-Identifier: GPL-2.0+ */ > +/* > + * Startup Code for OCTEON 64-bit CPU-core > + * > + * Copyright (c) 2003 Wolfgang Denk <wd at denx.de> > + * Copyright 2004, 2005, 2010 - 2015 Cavium Inc.. > + */ > + > +#include <asm-offsets.h> > +#include <config.h> > +#include <asm/regdef.h> > +#include <asm/mipsregs.h> > +#include <asm/asm.h> > + > +#define BOOT_VECTOR_NUM_WORDS 8 > + > +#define OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET 0x70 > +#define OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET 0x78 > + > +#define OCTEON_BOOT_MOVEABLE_MAGIC1_RAW 0xdb00110ad358eacd > +#define OCTEON_BOOT_MOVEABLE_MAGIC1 OCTEON_BOOT_MOVEABLE_MAGIC1_RAW > + > +#define OCTEON_CIU_SOFT_RST 0x8001070000000740 > + > +#define OCTEON_L2C_WPAR_PP0 0x8001180080840000 > +#define OCTEON_MIO_BOOT_BASE 0x8001180000000000 > +#define OCTEON_MIO_BOOT_REG_CFG0_OFF 0x0000 > +#define OCTEON_MIO_BOOT_LOC_CFG0_OFF 0x0080 > +#define OCTEON_MIO_BOOT_LOC_ADR_OFF 0x0090 > +#define OCTEON_MIO_BOOT_LOC_DAT_OFF 0x0098 > +#define OCTEON_MIO_RST_BOOT 0x8001180000001600 > +#define OCTEON_MIO_BOOT_REG_CFG0 0x8001180000000000 > +#define OCTEON_MIO_BOOT_REG_TIM0 0x8001180000000040 > +#define OCTEON_MIO_BOOT_LOC_CFG0 0x8001180000000080 > +#define OCTEON_MIO_BOOT_LOC_ADR 0x8001180000000090 > +#define OCTEON_MIO_BOOT_LOC_DAT 0x8001180000000098 > +#define OCTEON_MIO_FUSE_DAT3 0x8001180000001418 > +#define OCTEON_L2D_FUS3 0x80011800800007B8 > +#define OCTEON_LMC0_DDR_PLL_CTL 0x8001180088000258 > + > +#define OCTEON_RST 0x8001180006000000 > +#define OCTEON_RST_BOOT_OFFSET 0x1600 > +#define OCTEON_RST_SOFT_RST_OFFSET 0x1680 > +#define OCTEON_RST_COLD_DATAX_OFFSET(X) (0x17C0 + (X) * 8) > +#define OCTEON_RST_BOOT 0x8001180006001600 > +#define OCTEON_RST_SOFT_RST 0x8001180006001680 > +#define OCTEON_RST_COLD_DATAX(X) (0x80011800060017C0 + (X) * 8) > + > +#define OCTEON_OCX_COM_NODE 0x8001180011000000 > +#define OCTEON_L2C_OCI_CTL 0x8001180080800020 > +#define OCTEON_L2C_TAD_CTL 0x8001180080800018 > +#define OCTEON_L2C_CTL 0x8001180080800000 > + > +#define OCTEON_DBG_DATA 0x80011F00000001E8 > +#define OCTEON_PCI_READ_CMD_E 0x80011F0000001188 > +#define OCTEON_NPEI_DBG_DATA 0x80011F0000008510 > +#define OCTEON_CIU_WDOG(X) (0x8001070000000500 + (X) * 8) > +#define OCTEON_CIU_PP_POKE(X) (0x8001070000000580 + (X) * 8) > +#define OCTEON_CIU3_WDOG(X) (0x8001010000020000 + (X) * 8) > +#define OCTEON_CIU3_PP_POKE(X) (0x8001010000030000 + (X) * 8) > +#define OCTEON_OCX_COM_LINKX_CTL(X) (0x8001180011000020 + (X) * 8) > +#define OCTEON_SLI_CTL_STATUS 0x80011F0000028570 > +#define OCTEON_GSERX_SCRATCH(X) (0x8001180090000020 + (X) * 0x1000000) > + > +/** PRID for CN56XX */ > +#define OCTEON_PRID_CN56XX 0x04 > +/** PRID for CN52XX */ > +#define OCTEON_PRID_CN52XX 0x07 > +/** PRID for CN63XX */ > +#define OCTEON_PRID_CN63XX 0x90 > +/** PRID for CN68XX */ > +#define OCTEON_PRID_CN68XX 0x91 > +/** PRID for CN66XX */ > +#define OCTEON_PRID_CN66XX 0x92 > +/** PRID for CN61XX */ > +#define OCTEON_PRID_CN61XX 0x93 > +/** PRID for CNF71XX */ > +#define OCTEON_PRID_CNF71XX 0x94 > +/** PRID for CN78XX */ > +#define OCTEON_PRID_CN78XX 0x95 > +/** PRID for CN70XX */ > +#define OCTEON_PRID_CN70XX 0x96 > +/** PRID for CN73XX */ > +#define OCTEON_PRID_CN73XX 0x97 > +/** PRID for CNF75XX */ > +#define OCTEON_PRID_CNF75XX 0x98 > + > +/* func argument is used to create a mark, must be unique */ > +#define GETOFFSET(reg, func) \ > + .balign 8; \ > + bal func ##_mark; \ > + nop; \ > + .dword .; \ > +func ##_mark: \ > + ld reg, 0(ra); \ > + dsubu reg, ra, reg; > + > +#define JAL(func) \ > + .balign 8; \ > + bal func ##_mark; \ > + nop; \ > + .dword .; \ > +func ##_mark: \ > + ld t8, 0(ra); \ > + dsubu t8, ra, t8; \ > + dla t9, func; \ > + daddu t9, t9, t8; \ > + jalr t9; \ > + nop; > + > + .set arch=octeon3 > + .set noreorder > + > + .macro uhi_mips_exception > + move k0, t9 # preserve t9 in k0 > + move k1, a0 # preserve a0 in k1 > + li t9, 15 # UHI exception operation > + li a0, 0 # Use hard register context > + sdbbp 1 # Invoke UHI operation > + .endm > + > + .macro setup_stack_gd > + li t0, -16 > + PTR_LI t1, big_stack_start > + and sp, t1, t0 # force 16 byte alignment > + PTR_SUBU \ > + sp, sp, GD_SIZE # reserve space for gd > + and sp, sp, t0 # force 16 byte alignment > + move k0, sp # save gd pointer > +#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \ > + !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F) > + li t2, CONFIG_VAL(SYS_MALLOC_F_LEN) > + PTR_SUBU \ > + sp, sp, t2 # reserve space for early malloc > + and sp, sp, t0 # force 16 byte alignment > +#endif > + move fp, sp > + > + /* Clear gd */ > + move t0, k0 > +1: > + PTR_S zero, 0(t0) > + PTR_ADDIU t0, PTRSIZE > + blt t0, t1, 1b > + nop > + > +#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \ > + !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F) > + PTR_S sp, GD_MALLOC_BASE(k0) # gd->malloc_base offset > +#endif > + .endm > + > +/* Saved register usage: > + * s0: not used > + * s1: not used > + * s2: Address U-Boot loaded into in L2 cache > + * s3: Start address > + * s4: flags > + * 1: booting from RAM > + * 2: executing out of cache > + * 4: booting from flash > + * s5: u-boot size (data end - _start) > + * s6: offset in flash. > + * s7: _start physical address > + * s8: > + */ > + > +ENTRY(_start) > + /* U-Boot entry point */ > + b reset > + > + /* The above jump instruction/nop are considered part of the > + * bootloader_header_t structure but are not changed when the header is > + * updated. > + */ > + > + /* Leave room for bootloader_header_t header at start of binary. This > + * header is used to identify the board the bootloader is for, what > + * address it is linked at, failsafe/normal, etc. It also contains a > + * CRC of the entire image. > + */ > + > +#if defined(CONFIG_ROM_EXCEPTION_VECTORS) > + /* > + * Exception vector entry points. When running from ROM, an exception > + * cannot be handled. Halt execution and transfer control to debugger, > + * if one is attached. > + */ > + .org 0x200 > + /* TLB refill, 32 bit task */ > + uhi_mips_exception > + > + .org 0x280 > + /* XTLB refill, 64 bit task */ > + uhi_mips_exception > + > + .org 0x300 > + /* Cache error exception */ > + uhi_mips_exception > + > + .org 0x380 > + /* General exception */ > + uhi_mips_exception > + > + .org 0x400 > + /* Catch interrupt exceptions */ > + uhi_mips_exception > + > + .org 0x480 > + /* EJTAG debug exception */ > +1: b 1b > + nop > + > + .org 0x500 > +#endif > + > +/* Reserve extra space so that when we use the boot bus local memory > + * segment to remap the debug exception vector we don't overwrite > + * anything useful > + */ > + > +/* Basic exception handler (dump registers) in all ASM. When using the TLB for > + * mapping u-boot C code, we can't branch to that C code for exception handling > + * (TLB is disabled for some exceptions. > + */ > + > +/* RESET/start here */ > + .balign 8 > +reset: > + nop > + synci 0(zero) > + mfc0 k0, CP0_STATUS > + ori k0, 0x00E0 /* enable 64 bit mode for CSR access */ > + mtc0 k0, CP0_STATUS > + > + /* Save the address we're booting from, strip off low bits */ > + bal 1f > + nop > +1: > + move s3, ra > + dins s3, zero, 0, 12 > + > + /* Disable boot bus moveable regions */ > + PTR_LI k0, OCTEON_MIO_BOOT_LOC_CFG0 > + sd zero, 0(k0) > + sd zero, 8(k0) > + > + /* Disable the watchdog timer > + * First we check if we're running on CN78XX, CN73XX or CNF75XX to see > + * if we use CIU3 or CIU. > + */ > + mfc0 t0, CP0_PRID > + ext t0, t0, 8, 8 > + /* Assume CIU */ > + PTR_LI t1, OCTEON_CIU_WDOG(0) > + PTR_LI t2, OCTEON_CIU_PP_POKE(0) > + blt t0, OCTEON_PRID_CN78XX, wd_use_ciu > + nop > + beq t0, OCTEON_PRID_CN70XX, wd_use_ciu > + nop > + /* Use CIU3 */ > + PTR_LI t1, OCTEON_CIU3_WDOG(0) > + PTR_LI t2, OCTEON_CIU3_PP_POKE(0) > +wd_use_ciu: > + sd zero, 0(t2) /* Pet the dog */ > + sd zero, 0(t1) /* Disable watchdog timer */ > + > + /* Errata: CN76XX has a node ID of 3. change it to zero here. > + * This needs to be done before we relocate to L2 as addresses change > + * For 76XX pass 1.X we need to zero out the OCX_COM_NODE[ID], > + * L2C_OCI_CTL[GKSEGNODE] and CP0 of Root.CvmMemCtl2[KSEGNODE]. > + */ > + mfc0 a4, CP0_PRID > + /* Check for 78xx pass 1.x processor ID */ > + andi a4, 0xffff > + blt a4, (OCTEON_PRID_CN78XX << 8), 1f > + nop > + > + /* Zero out alternate package for now */ > + dins a4, zero, 6, 1 > + bge a4, ((OCTEON_PRID_CN78XX << 8) | 0x08), 1f > + nop > + > + /* 78xx or 76xx here, first check for bug #27141 */ > + PTR_LI a5, OCTEON_SLI_CTL_STATUS > + ld a6, 0(a5) > + andi a7, a4, 0xff > + andi a6, a6, 0xff > + > + beq a6, a7, not_bug27141 > + nop > + > + /* core 0 proc_id rev_id field does not match SLI_CTL_STATUS rev_id */ > + /* We just hit bug #27141. Need to reset the chip and try again */ > + > + PTR_LI a4, OCTEON_RST_SOFT_RST > + ori a5, zero, 0x1 /* set the reset bit */ > + > +reset_78xx_27141: > + sync > + synci 0(zero) > + cache 9, 0(zero) > + sd a5, 0(a4) > + wait > + b reset_78xx_27141 > + nop > + > +not_bug27141: > + /* 76XX pass 1.x has the node number set to 3 */ > + mfc0 a4, CP0_EBASE > + ext a4, a4, 0, 10 > + bne a4, 0x180, 1f /* Branch if not node 3 core 0 */ > + nop > + > + /* Clear OCX_COM_NODE[ID] */ > + PTR_LI a5, OCTEON_OCX_COM_NODE > + ld a4, 0(a5) > + dins a4, zero, 0, 2 > + sd a4, 0(a5) > + ld zero, 0(a5) > + > + /* Clear L2C_OCI_CTL[GKSEGNODE] */ > + PTR_LI a5, OCTEON_L2C_OCI_CTL > + ld a4, 0(a5) > + dins a4, zero, 4, 2 > + sd a4, 0(a5) > + ld zero, 0(a5) > + > + /* Clear CP0 Root.CvmMemCtl2[KSEGNODE] */ > + dmfc0 a4, CP0_CVMMEMCTL2 > + dins a4, zero, 12, 2 > + dmtc0 a4, CP0_CVMMEMCTL2 > + > + /* Put the flash address in the start of the EBASE register to > + * enable our exception handler but only for core 0. > + */ > + mfc0 a4, CP0_EBASE > + dext a4, a4, 0, 10 > + bnez a4, no_flash > + /* OK in delay slot */ > + dext a6, a6, 0, 16 /* Get the base address in flash */ > + sll a6, a6, 16 > + mtc0 a6, CP0_EBASE /* Enable exceptions */ > + > +no_flash: > + /* Zero out various registers */ > + mtc0 zero, CP0_DEPC > + mtc0 zero, CP0_EPC > + mtc0 zero, CP0_CAUSE > + mfc0 a4, CP0_PRID > + ext a4, a4, 8, 8 > + mtc0 zero, CP0_DESAVE > + > + /* The following are only available on Octeon 2 or later */ > + mtc0 zero, CP0_KSCRATCH1 > + mtc0 zero, CP0_KSCRATCH2 > + mtc0 zero, CP0_KSCRATCH3 > + mtc0 zero, CP0_USERLOCAL > + > + /* Turn off ROMEN bit to disable ROM */ > + PTR_LI a1, OCTEON_MIO_RST_BOOT > + /* For OCTEON 3 we use RST_BOOT instead of MIO_RST_BOOT. > + * The difference is bits 24-26 are 6 instead of 0 for the address. > + */ > + /* For Octeon 2 and CN70XX we can ignore the watchdog */ > + blt a4, OCTEON_PRID_CN78XX, watchdog_ok > + nop > + > + PTR_LI a1, OCTEON_RST_BOOT > + > + beq a4, OCTEON_PRID_CN70XX, watchdog_ok > + nop > + > + ld a2, 0(a1) > + /* There is a bug where some registers don't get properly reset when > + * the watchdog timer causes a reset. In this case we need to force > + * a reset. > + */ > + bbit0 a2, 11, watchdog_ok /* Skip if watchdog not hit */ > + dins a2, zero, 2, 18 /* Don't clear LBOOT, LBOOT_EXT or LBOOT_OCI */ > + /* Clear bit indicating reset due to watchdog */ > + ori a2, 1 << 11 > + sd a2, 0(a1) > + > + /* Disable watchdog */ > + PTR_LI a1, OCTEON_CIU3_PP_POKE(0) > + sd zero, 0(a1) > + PTR_LI a1, OCTEON_CIU3_WDOG(0) > + sd zero, 0(a1) > + > + /* Record this in the GSER0_SCRATCH register in bit 11 */ > + PTR_LI a1, OCTEON_GSERX_SCRATCH(0) > + ld a2, 0(a1) > + ori a2, 1 << 11 > + sd a2, 0(a1) > + > + PTR_LI a1, OCTEON_RST_SOFT_RST > + li a2, 1 > + sd a2, 0(a1) > + wait > + > + /* We should never get here */ > + > +watchdog_ok: > + ld a2, 0(a1) > + /* Don't clear LBOOT/LBOOT_EXT or LBOOT_OCI */ > + dins a2, zero, 2, 18 > + dins a2, zero, 60, 1 /* Clear ROMEN bit */ > + sd a2, 0(a1) > + > + /* Start of Octeon setup */ > + > + /* Check what core we are - if core 0, branch to init tlb > + * loop in flash. Otherwise, look up address of init tlb > + * loop that was saved in the boot vector block. > + */ > + mfc0 a0, CP0_EBASE > + andi a0, EBASE_CPUNUM /* get core */ > + beqz a0, InitTLBStart_local > + nop > + > + break > + /* We should never get here - non-zero cores now go directly to > + * tlb init from the boot stub in movable region. > + */ > + > + .globl InitTLBStart > +InitTLBStart: > +InitTLBStart_local: > + /* If we don't have working memory yet configure a bunch of > + * scratch memory, and set the stack pointer to the top > + * of it. This allows us to go to C code without having > + * memory set up > + * > + * Warning: do not change SCRATCH_STACK_LINES as this can impact the > + * transition from start.S to crti.asm. crti requires 590 bytes of > + * stack space. > + */ > + cache 1,0(zero) /* Clear Dcache so cvmseg works right */ > +#if CONFIG_OCTEON_BIG_STACK_SIZE > + rdhwr v0, $0 > + bnez v0, 1f > + nop > + PTR_LA sp, big_stack_start - 16 > + b stack_clear_done > + nop > +1: > +#endif > +#define SCRATCH_STACK_LINES 0x36 /* MAX is 0x36 */ > + dmfc0 v0, CP0_CVMMEMCTL > + dins v0, zero, 0, 9 > + /* setup SCRATCH_STACK_LINES scratch lines of scratch */ > + ori v0, 0x100 | SCRATCH_STACK_LINES > + dmtc0 v0, CP0_CVMMEMCTL > + /* set stack to top of scratch memory */ > + li sp, 0xffffffffffff8000 + (SCRATCH_STACK_LINES * 128) > + /* Clear scratch for CN63XX pass 2.0 errata Core-15169*/ > + li t0, 0xffffffffffff8000 > +clear_scratch: > + sd zero, 0(t0) > + addiu t0, 8 > + bne t0, sp, clear_scratch > + nop > + > + /* This code run on all cores - core 0 from flash, > + * the rest from DRAM. When booting from PCI, non-zero cores > + * come directly here from the boot vector - no earlier code in this > + * file is executed. > + */ > + > + /* Some generic initialization is done here as well, as we need this > + * done on all cores even when booting from PCI > + */ > +stack_clear_done: > + /* Clear watch registers. */ > + mtc0 zero, CP0_WATCHLO > + mtc0 zero, CP0_WATCHHI > + > + /* STATUS register */ > + mfc0 k0, CP0_STATUS > + li k1, ~ST0_IE > + and k0, k1 > + mtc0 k0, CP0_STATUS > + > + /* CAUSE register */ > + mtc0 zero, CP0_CAUSE > + > + /* Init Timer */ > + dmtc0 zero, CP0_COUNT > + dmtc0 zero, CP0_COMPARE > + > + > + mfc0 a5, CP0_STATUS > + li v0, 0xE0 /* enable 64 bit mode for CSR access */ > + or v0, v0, a5 > + mtc0 v0, CP0_STATUS > + > + > + dli v0, 1 << 29 /* Enable large physical address support in TLB */ > + mtc0 v0, CP0_PAGEGRAIN > + > +InitTLB: > + dmtc0 zero, CP0_ENTRYLO0 > + dmtc0 zero, CP0_ENTRYLO1 > + mtc0 zero, CP0_PAGEMASK > + dmtc0 zero, CP0_CONTEXT > + /* Use an offset into kseg0 so we won't conflict with Mips1 legacy > + * TLB clearing > + */ > + PTR_LI v0, 0xFFFFFFFF90000000 > + mfc0 a0, CP0_CONFIG1 > + srl a0, a0, 25 > + /* Check if config4 reg present */ > + mfc0 a1, CP0_CONFIG3 > + bbit0 a1, 31, 2f > + and a0, a0, 0x3F /* a0 now has the max mmu entry index */ > + mfc0 a1, CP0_CONFIG4 > + bbit0 a1, 14, 2f /* check config4[MMUExtDef] */ > + nop > + /* append config4[MMUSizeExt] to most significant bit of > + * config1[MMUSize-1] > + */ > + ins a0, a1, 6, 8 > + and a0, a0, 0x3fff /* a0 now includes max entries for cn6xxx */ > +2: > + dmtc0 zero, CP0_XCONTEXT > + mtc0 zero, CP0_WIRED > + > +InitTLBloop: > + dmtc0 v0, CP0_ENTRYHI > + tlbp > + mfc0 v1, CP0_INDEX > + daddiu v0, v0, 1<<13 > + bgez v1, InitTLBloop > + > + mtc0 a0, CP0_INDEX > + tlbwi > + bnez a0, InitTLBloop > + daddiu a0, -1 > + > + mthi zero > + mtlo zero > + > + /* Set up status register */ > + mfc0 v0, CP0_STATUS > + /* Enable COP0 and COP2 access */ > + li a4, (1 << 28) | (1 << 30) > + or v0, a4 > + > + /* Must leave BEV set here, as DRAM is not configured for core 0. > + * Also, BEV must be 1 later on when the exception base address is set. > + */ > + > + /* Mask all interrupts */ > + ins v0, zero, 0, 16 > + /* Clear NMI (used to start cores other than core 0) */ > + ori v0, 0xE4 /* enable 64 bit, disable interrupts */ > + mtc0 v0, CP0_STATUS > + > + dli v0,0xE000000F /* enable all readhw locations */ > + mtc0 v0, CP0_HWRENA > + > + dmfc0 v0, CP0_CVMCTL > + ori v0, 1<<14 /* enable fixup of unaligned mem access */ > + dmtc0 v0, CP0_CVMCTL > + > + /* Setup scratch memory. This is also done in > + * cvmx_user_app_init, and this code will be removed > + * from the bootloader in the near future. > + */ > + > + /* Set L2C_LAD_CTL[MAXLFB] = 0 on CN73XX */ > + mfc0 a4, CP0_PRID > + ext a4, a4, 8, 8 > + blt a4, OCTEON_PRID_CN73XX, 72f > + nop > + PTR_LI v0, OCTEON_L2C_TAD_CTL > + ld t1, 0(v0) > + dins t1, zero, 0, 4 > + sd t1, 0(v0) > + ld zero, 0(v0) > + > +72: > + > + /* clear these to avoid immediate interrupt in noperf mode */ > + dmtc0 zero, CP0_COMPARE /* clear timer interrupt */ > + dmtc0 zero, CP0_COUNT /* clear timer interrupt */ > + dmtc0 zero, CP0_PERF_CNT0 /* clear perfCnt0 */ > + dmtc0 zero, CP0_PERF_CNT1 /* clear perfCnt1 */ > + dmtc0 zero, CP0_PERF_CNT2 > + dmtc0 zero, CP0_PERF_CNT3 > + > + /* If we're running on a node other than 0 then we need to set KSEGNODE > + * to 0. The nice thing with this code is that it also autodetects if > + * we're running on a processor that supports CVMMEMCTL2 or not since > + * only processors that have this will have a non-zero node ID. Because > + * of this there's no need to check if we're running on a 78XX. > + */ > + mfc0 t1, CP0_EBASE > + dext t1, t1, 7, 3 /* Extract node number */ > + beqz t1, is_node0 /* If non-zero then we're not node 0 */ > + nop > + dmfc0 t1, CP0_CVMMEMCTL2 > + dins t1, zero, 12, 4 > + dmtc0 t1, CP0_CVMMEMCTL2 > +is_node0: > + > + /* Set up TLB mappings for u-boot code in flash. */ > + > + /* Use a bal to get the current PC into ra. Since this bal is to > + * the address immediately following the delay slot, the ra is > + * the address of the label. We then use this to get the actual > + * address that we are executing from. > + */ > + bal __dummy > + nop > + > +__dummy: > + /* Get the actual address that we are running at */ > + PTR_LA a6, _start /* Linked address of _start */ > + PTR_LA a7, __dummy > + dsubu t0, a7, a6 /* offset of __dummy label from _start*/ > + dsubu a7, ra, t0 /* a7 now has actual address of _start*/ > + > + /* Save actual _start address in s7. This is where we > + * are executing from, as opposed to where the code is > + * linked. > + */ > + move s7, a7 > + move s4, zero > + > + /* s7 has actual address of _start. If this is > + * on the boot bus, it will be between 0xBFC000000 and 0xBFFFFFFF. > + * If it is on the boot bus, use 0xBFC00000 as the physical address > + * for the TLB mapping, as we will be adjusting the boot bus > + * to make this adjustment. > + * If we are running from DRAM (remote-boot), then we want to use the > + * real address in DRAM. > + */ > + > + /* Check to see if we are running from flash - we expect that to > + * be 0xffffffffb0000000-0xffffffffbfffffff > + * (0x10000000-0x1fffffff, unmapped/uncached) > + */ > + dli t2, 0xffffffffb0000000 > + dsubu t2, s7 > + slt s4, s7, t2 > + bltz t2, uboot_in_flash > + nop > + > + /* If we're not core 0 then we don't care about cache */ > + mfc0 t2, CP0_EBASE > + andi t2, EBASE_CPUNUM > + bnez t2, uboot_in_ram > + nop > + > + /* Find out if we're OCTEON I or OCTEON + which don't support running > + * out of cache. > + */ > + mfc0 t2, CP0_PRID > + ext t2, t2, 8, 8 > + li s4, 1 > + blt t2, 0x90, uboot_in_ram > + nop > + > + /* U-Boot can be executing either in RAM or L2 cache. Now we need to > + * check if DRAM is initialized. The way we do that is to look at > + * the reset bit of the LMC0_DDR_PLL_CTL register (bit 7) > + */ > + PTR_LI t2, OCTEON_LMC0_DDR_PLL_CTL > + ld t2, 0(t2) > + bbit1 t2, 7, uboot_in_ram > + nop > + > + /* We must be executing out of cache */ > + b uboot_in_ram > + li s4, 2 > + > +uboot_in_flash: > + /* Set s4 to 4 to indicate we're running in FLASH */ > + li s4, 4 > + > +#if defined(CONFIG_OCTEON_DISABLE_L2_CACHE_INDEX_ALIASING) > + /* By default, L2C index aliasing is enabled. In some cases it may > + * need to be disabled. The L2C index aliasing can only be disabled > + * if U-Boot is running out of L2 cache and the L2 cache has not been > + * used to store anything. > + */ > + PTR_LI t1, OCTEON_L2C_CTL > + ld t2, 0(t1) > + ori t2, 1 > + sd t2, 0(t1) > +#endif > + > + /* Use BFC00000 as physical address for TLB mappings when booting > + * from flash, as we will adjust the boot bus mappings to make this > + * mapping correct. > + */ > + dli a7, 0xFFFFFFFFBFC00000 > + dsubu s6, s7, a7 /* Save flash offset in s6 */ > + > +#if defined(CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2) > + /* For OCTEON II we check to see if the L2 cache is big enough to hold > + * U-Boot. If it is big enough then we copy ourself from flash to the > + * L2 cache in order to speed up execution. > + */ > + > + /* Check for OCTEON 2 */ > + mfc0 t1, CP0_PRID > + ext t1, t1, 8, 8 > + /* Get number of L2 cache sets */ > + beq t1, OCTEON_PRID_CNF71XX, got_l2_sets /* CNF71XX */ > + li t2, 1 << 9 > + beq t1, OCTEON_PRID_CN78XX, got_l2_sets /* CN78XX */ > + li t2, 1 << 13 > + beq t1, OCTEON_PRID_CN70XX, got_l2_sets /* CN70XX */ > + li t2, 1 << 10 > + beq t1, OCTEON_PRID_CN73XX, got_l2_sets /* CN73XX */ > + li t2, 1 << 11 > + beq t1, OCTEON_PRID_CNF75XX, got_l2_sets /* CNF75XX */ > + li t2, 1 << 11 > + b l2_cache_too_small /* Unknown OCTEON model */ > + nop > + > +got_l2_sets: > + /* Get number of associations */ > + PTR_LI t0, OCTEON_MIO_FUSE_DAT3 > + ld t0, 0(t0) > + dext t0, t0, 32, 3 > + > + beq t1, OCTEON_PRID_CN70XX, process_70xx_l2sets > + nop > + /* 0 = 16-way, 1 = 12-way, 2 = 8-way, 3 = 4-way, 4-7 reserved */ > + beqz t0, got_l2_ways > + li t3, 16 > + beq t0, 1, got_l2_ways > + li t3, 12 > + beq t0, 2, got_l2_ways > + li t3, 8 > + beq t0, 3, got_l2_ways > + li t3, 4 > + b l2_cache_too_small > + nop > + > +process_70xx_l2sets: > + /* For 70XX, the number of ways is defined as: > + * 0 - full cache (4-way) 512K > + * 1 - 3/4 ways (3-way) 384K > + * 2 - 1/2 ways (2-way) 256K > + * 3 - 1/4 ways (1-way) 128K > + * 4-7 illegal (aliased to 0-3) > + */ > + andi t0, 3 > + beqz t0, got_l2_ways > + li t3, 4 > + beq t0, 1, got_l2_ways > + li t3, 3 > + beq t0, 2, got_l2_ways > + li t3, 2 > + li t3, 1 > + > +got_l2_ways: > + dmul a1, t2, t3 /* Calculate cache size */ > + dsll a1, 7 /* Ways * Sets * cache line sz (128) */ > + daddiu a1, a1, -128 /* Adjust cache size for copy code */ > + > + /* Calculate size of U-Boot image */ > + /* > + * "uboot_end - _start" is not correct, as the image also > + * includes the DTB appended to the end (OF_EMBED is deprecated). > + * Lets use a defined max for now here. > + */ > + PTR_LI s5, CONFIG_BOARD_SIZE_LIMIT > + > + daddu t2, s5, s7 /* t2 = end address */ > + daddiu t2, t2, 127 > + ins t2, zero, 0, 7 /* Round up to cache line for memcpy */ > + > + slt t1, a1, s5 /* See if we're bigger than the L2 cache */ > + bnez t1, l2_cache_too_small > + nop > + /* Address we plan to load at in the L2 cache */ > + PTR_LI t9, CONFIG_OCTEON_L2_UBOOT_ADDR > +# ifdef CONFIG_OCTEON_L2_MEMCPY_IN_CACHE > + /* Enable all ways for PP0. Authentik ROM may have disabled these */ > + PTR_LI a1, OCTEON_L2C_WPAR_PP0 > + sd zero, 0(a1) > + > + /* Address to place our memcpy code */ > + PTR_LI a0, CONFIG_OCTEON_L2_MEMCPY_ADDR > + /* The following code writes a simple memcpy routine into the cache > + * to copy ourself from flash into the L2 cache. This makes the > + * memcpy routine a lot faster since each instruction can potentially > + * require four read cycles to flash over the boot bus. > + */ > + /* Zero cache line in the L2 cache */ > + zcb (a0) > + synci 0(zero) > + dli a1, 0xdd840000dd850008 /* ld a0, 0(t0); ld a1, 8(t0) */ > + sd a1, 0(a0) > + dli a1, 0xdd860010dd870018 /* ld a2, 16(t0); ld a3, 24(t0) */ > + sd a1, 8(a0) > + dli a1, 0xfda40000fda50008 /* sd a0, 0(t1); sd a1, 8(t1) */ > + sd a1, 16(a0) > + dli a1, 0xfda60010fda70018 /* sd a2, 16(t1); sd a3, 24(t1) */ > + sd a1, 24(a0) > + dli a1, 0x258c0020158efff6 /* addiu t0, 32; bne t0, t2, -40 */ > + sd a1, 32(a0) > + dli a1, 0x25ad002003e00008 /* addiu t1, 32; jr ra */ > + sd a1, 40(a0) > + sd zero, 48(a0) /* nop; nop */ > + > + /* Synchronize the caches */ > + sync > + synci 0(zero) > + > + move t0, s7 > + move t1, t9 > + > + /* Do the memcpy operation in L2 cache to copy ourself from flash > + * to the L2 cache. > + */ > + jalr a0 > + nop > + > +# else > + /* Copy ourself to the L2 cache from flash, 32 bytes at a time */ > + /* This code is now written to the L2 cache using the code above */ > +1: > + ld a0, 0(t0) > + ld a1, 8(t0) > + ld a2, 16(t0) > + ld a3, 24(t0) > + sd a0, 0(t1) > + sd a1, 8(t1) > + sd a2, 16(t1) > + sd a3, 24(t1) > + addiu t0, 32 > + bne t0, t2, 1b > + addiu t1, 32 > +# endif /* CONFIG_OCTEON_L2_MEMCPY_IN_CACHE */ > + > + /* Adjust the start address of U-Boot and the global pointer */ > + subu t0, s7, t9 /* t0 = address difference */ > + move s7, t9 /* Update physical address */ > + move s2, t9 > + sync > + synci 0(zero) > + > + /* Now we branch to the L2 cache. We first get our PC then adjust it > + */ > + bal 3f > + nop > +3: > + /* Don't add any instructions here! */ > + subu t9, ra, t0 > + /* Give ourself 16 bytes */ > + addiu t9, 0x10 > + > + jal t9 /* Branch to address in L2 cache */ > + > + nop > + nop > + /* Add instructions after here */ > + > + move a7, s7 > + > + b uboot_in_ram > + ori s4, 2 /* Running out of L2 cache */ > + > +l2_cache_too_small: /* We go here if we can't copy ourself to L2 */ > +#endif /* CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2 */ > + > + /* This code is only executed if booting from flash. */ > + /* For flash boot (_not_ RAM boot), we do a workaround for > + * an LLM errata on CN38XX and CN58XX parts. > + */ > + > +uboot_in_ram: > + /* U-boot address is now in reg a7, and is 4 MByte aligned. > + * (boot bus addressing has been adjusted to make this happen for flash, > + * and for DRAM this alignment must be provided by the remote boot > + * utility. > + */ > + /* See if we're in KSEG0 range, if so set EBASE register to handle > + * exceptions. > + */ > + dli a1, 0x20000000 > + bge a7, a1, 1f > + nop > + /* Convert our physical address to KSEG0 */ > + PTR_LI a1, 0xffffffff80000000 > + or a1, a1, a7 > + mtc0 a1, CP0_EBASE > +1: > + /* U-boot now starts at 0xBFC00000. Use a single 4 MByte TLB mapping > + * to map u-boot. > + */ > + move a0, a6 /* Virtual addr in a0 */ > + dins a0, zero, 0, 16 /* Zero out offset bits */ > + move a1, a7 /* Physical addr in a1 */ > + > + /* Now we need to remove the MIPS address space bits. For this we > + * need to determine if it is a 32 bit compatibility address or not. > + */ > + > + /* 'lowest' address in compatibility space */ > + PTR_LI t0, 0xffffffff80000000 > + dsubu t0, t0, a1 > + bltz t0, compat_space > + nop > + > + /* We have a xkphys address, so strip off top bit */ > + b addr_fixup_done > + dins a1, zero, 63, 1 > + > +compat_space: > + PTR_LI a2, 0x1fffffff > + and a1, a1, a2 /* Mask phy addr to remove address space bits */ > + > +addr_fixup_done: > + /* Currenty the u-boot image size is limited to 4 MBytes. In order to > + * support larger images the flash mapping will need to be changed to > + * be able to access more than that before C code is run. Until that > + * is done, we just use a 4 MByte mapping for the secondary cores as > + * well. > + */ > + /* page size (only support 4 Meg binary size for now for core 0) > + * This limitation is due to the fact that the boot vector is > + * 0xBFC00000 which only makes 4MB available. Later more flash > + * address space will be available after U-Boot has been copied to > + * RAM. For now assume that it is in flash. > + */ > + li a2, 2*1024*1024 > + > + mfc0 a4, CP0_EBASE > + andi a4, EBASE_CPUNUM /* get core */ > + beqz a4, core_0_tlb > + nop > + > + /* Now determine how big a mapping to use for secondary cores, > + * which need to map all of u-boot + heap in DRAM > + */ > + /* Here we look at the alignment of the the physical address, > + * and use the largest page size possible. In some cases > + * this can result in an oversize mapping, but for secondary cores > + * this mapping is very short lived. > + */ > + > + /* Physical address in a1 */ > + li a2, 1 > +1: > + sll a2, 1 > + and a5, a1, a2 > + beqz a5, 1b > + nop > + > + /* a2 now contains largest page size we can use */ > +core_0_tlb: > + JAL(single_tlb_setup) > + > + /* Check if we're running from cache */ > + bbit1 s4, 1, uboot_in_cache > + nop > + > + /* If we are already running from ram, we don't need to muck > + * with boot bus mappings. > + */ > + PTR_LI t2, 0xffffffffb0000000 > + dsubu t2, s7 > + /* See if our starting address is lower than the boot bus */ > + bgez t2, uboot_in_ram2 /* If yes, booting from RAM */ > + nop > + > +uboot_in_cache: > +#if CONFIG_OCTEON_BIG_STACK_SIZE > + /* The large stack is only for core 0. For all other cores we need to > + * use the L1 cache otherwise the other cores will stomp on top of each > + * other unless even more space is reserved for the stack space for > + * each core. With potentially 96 cores this gets excessive. > + */ > + mfc0 v0, CP0_EBASE > + andi a0, EBASE_CPUNUM > + bnez a0, no_big_stack > + nop > + PTR_LA sp, big_stack_start > + daddiu sp, -16 > + > +no_big_stack: > +#endif > + /* We now have the TLB set up, so we need to remap the boot bus. > + * This is tricky, as we are running from flash, and will be changing > + * the addressing of the flash. > + */ > + /* Enable movable boot bus region 0, at address 0x10000000 */ > + PTR_LI a4, OCTEON_MIO_BOOT_BASE > + dli a5, 0x81000000 /* EN + base address 0x11000000 */ > + sd a5, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4) > + > + /* Copy code to that remaps the boot bus to movable region */ > + sd zero, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) > + > + PTR_LA a6, change_boot_mappings > + GETOFFSET(a5, change_boot_mappings); > + daddu a5, a5, a6 > + > + /* The code is 16 bytes (2 DWORDS) */ > + ld a7, 0(a5) > + sd a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) > + ld a7, 8(a5) > + sd a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) > + > + /* Read from an RML register to ensure that the previous writes have > + * completed before we branch to the movable region. > + */ > + ld zero, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4) > + > + /* Compute value for boot bus configuration register */ > + /* Read region 0 config so we can _modify_ the base address field */ > + PTR_LI a4, OCTEON_MIO_BOOT_REG_CFG0 /* region 0 config */ > + ld a0, 0(a4) > + dli a4, 0xf0000000 /* Mask off bits we want to save */ > + and a4, a4, a0 > + dli a0, 0x0fff0000 /* Force size to max */ > + or a4, a4, a0 > + > + move a5, s6 > + /* Convert to 64k blocks, as used by boot bus config */ > + srl a5, 16 > + li a6, 0x1fc0 /* 'normal' boot bus base config value */ > + subu a6, a6, a5 /* Subtract offset */ > + /* combine into register value to pass to boot bus routine */ > + or a0, a4, a6 > + > + /* Branch there */ > + PTR_LA a1, __mapped_continue_label > + PTR_LI a2, OCTEON_MIO_BOOT_REG_CFG0 > + /* If region 0 is not enabled we can skip it */ > + ld a4, 0(a2) > + bbit0 a4, 31, __mapped_continue_label > + nop > + li a4, 0x10000000 > + j a4 > + synci 0(zero) > + > + /* We never get here, as we go directly to __mapped_continue_label */ > + break > + > + > +uboot_in_ram2: > + > + /* Now jump to address in TLB mapped memory to continue execution */ > + PTR_LA a4, __mapped_continue_label > + synci 0(a4) > + j a4 > + nop > + > +__mapped_continue_label: > + /* Check if we are core 0, if we are not then we need > + * to vector to code in DRAM to do application setup, and > + * skip the rest of the bootloader. Only core 0 runs the bootloader > + * and sets up the tables that the other cores will use for > + * configuration. > + */ > + mfc0 a0, CP0_EBASE > + andi a0, EBASE_CPUNUM /* get core */ > + /* if (__all_cores_are_equal==0 && core==0), > + * then jump to execute BL on core 0; else 'go to next line' > + * (core_0_cont1 is executed ONLY when k0=a0=0(core0_ID)) > + */ > + lw t0, __all_cores_are_equal > + beq a0, t0, core_0_cont1 > + nop > + > + /* other cores look up addr from dram */ > + /* DRAM controller already set up by first core */ > + li a1, (BOOT_VECTOR_NUM_WORDS * 4) > + mul a0, a0, a1 > + > + /* Now find out the boot vector base address from the moveable boot > + * bus region. > + */ > + > + /* Get the address of the boot bus moveable region */ > + PTR_LI t8, OCTEON_MIO_BOOT_BASE > + ld t9, OCTEON_MIO_BOOT_LOC_CFG0_OFF(t8) > + /* Make sure it's enabled */ > + bbit0 t9, 31, invalid_boot_vector > + dext t9, t9, 3, 24 > + dsll t9, t9, 7 > + /* Make address XKPHYS */ > + li t0, 1 > + dins t9, t0, 63, 1 > + > + ld t0, OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET(t9) > + dli t1, OCTEON_BOOT_MOVEABLE_MAGIC1 > + bne t0, t1, invalid_boot_vector > + nop > + > + /* Load base address of boot vector table */ > + ld t0, OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET(t9) > + /* Add offset for core */ > + daddu a1, t0, a0 > + > + mfc0 v0, CP0_STATUS > + move v1, v0 > + ins v1, zero, 19, 1 /* Clear NMI bit */ > + mtc0 v1, CP0_STATUS > + > + /* Get app start function address */ > + lw t9, 8(a1) > + beqz t9, invalid_boot_vector > + nop > + > + j t9 > + lw k0, 12(a1) /* Load global data (deprecated) */ > + > +invalid_boot_vector: > + wait > + b invalid_boot_vector > + nop > + > +__all_cores_are_equal: > + /* The following .word tell if 'all_cores_are_equal' or core0 is special > + * By default (for the first execution) the core0 should be special, > + * in order to behave like the old(existing not-modified) bootloader > + * and run the bootloader on core 0 to follow the existing design. > + * However after that we make 'all_cores_equal' which allows to run SE > + * applications on core0 like on any other core. NOTE that value written > + * to '__all_cores_are_equal' should not match any core ID. > + */ > + .word 0 > + > +core_0_cont1: > + li t0, 0xffffffff > + sw t0, __all_cores_are_equal > + /* From here on, only core 0 runs, other cores have branched > + * away. > + */ > +#ifdef CONFIG_MIPS_INIT_STACK_IN_SRAM > + /* Set up initial stack and global data */ > + setup_stack_gd > +# ifdef CONFIG_DEBUG_UART > + PTR_LA t9, debug_uart_init > + jalr t9 > + nop > +# endif > +#endif > + move a0, zero # a0 <-- boot_flags = 0 > + PTR_LA t9, board_init_f > + > + jr t9 > + move ra, zero > + END(_start) > + > + .balign 8 > + .globl single_tlb_setup > + .ent single_tlb_setup > + /* Sets up a single TLB entry. Virtual/physical addresses > + * must be properly aligned. > + * a0 Virtual address > + * a1 Physical address > + * a2 page (_not_ mapping) size > + */ > +single_tlb_setup: > + /* Determine the number of TLB entries available, and > + * use the top one. > + */ > + mfc0 a3, CP0_CONFIG1 > + dext a3, a3, 25, 6 /* a3 now has the max mmu entry index */ > + mfc0 a5, CP0_CONFIG3 /* Check if config4 reg present */ > + bbit0 a5, 31, single_tlb_setup_cont > + nop > + mfc0 a5, CP0_CONFIG4 > + bbit0 a5, 14, single_tlb_setup_cont /* check config4[MMUExtDef] */ > + nop > + /* append config4[MMUSizeExt] to most significant bit of > + * config1[MMUSize-1] > + */ > + dins a3, a5, 6, 8 > + and a3, a3, 0x3fff /* a3 now includes max entries for cn6xxx */ > + > +single_tlb_setup_cont: > + > + /* Format physical address for entry low */ > + nop > + dsrl a1, a1, 12 > + dsll a1, a1, 6 > + ori a1, a1, 0x7 /* set DVG bits */ > + > + move a4, a2 > + daddu a5, a4, a4 /* mapping size */ > + dsll a6, a4, 1 > + daddiu a6, a6, -1 /* pagemask */ > + dsrl a4, a4, 6 /* adjust for adding with entrylo */ > + > + /* Now set up mapping */ > + mtc0 a6, CP0_PAGEMASK > + mtc0 a3, CP0_INDEX > + > + dmtc0 a1, CP0_ENTRYLO0 > + daddu a1, a1, a4 > + > + dmtc0 a1, CP0_ENTRYLO1 > + daddu a1, a1, a4 > + > + dmtc0 a0, CP0_ENTRYHI > + daddu a0, a0, a5 > + > + ehb > + tlbwi > + jr ra > + nop > + .end single_tlb_setup > + > + > +/** > + * This code is moved to a movable boot bus region, > + * and it is responsible for changing the flash mappings and > + * jumping to run from the TLB mapped address. > + * > + * @param a0 New address for boot bus region 0 > + * @param a1 Address to branch to afterwards > + * @param a2 Address of MIO_BOOT_REG_CFG0 > + */ > + .balign 8 > +change_boot_mappings: > + sd a0, 0(a2) > + sync > + j a1 /* Jump to new TLB mapped location */ > + synci 0(zero) > + > +/* If we need a large stack, allocate it here. */ > +#if CONFIG_OCTEON_BIG_STACK_SIZE > + /* Allocate the stack here so it's in L2 cache or DRAM */ > + .balign 16 > +big_stack_end: > + .skip CONFIG_OCTEON_BIG_STACK_SIZE, 0 > +big_stack_start: > + .dword 0 > +#endif >
Hi Daniel, On 13.05.20 14:49, Daniel Schwierzeck wrote: > sorry for the delay ;) NP. I know that its sometimes not easy to find the time for this maintainer / review job. ;) > Am 02.05.20 um 10:59 schrieb Stefan Roese: >> From: Aaron Williams <awilliams at marvell.com> >> >> This patch adds very basic support for the Octeon III SoCs. Only >> CFI parallel NOR flash and UART is supported for now. >> >> Please note that the basic Octeon port does not include the DDR3/4 >> initialization yet. This will be added in some follow-up patches >> later. To still use U-Boot on with this port, the L2 cache (4MiB on >> Octeon III CN73xx) is used as RAM. This way, U-Boot can boot to the >> prompt on such boards. > > this patch should come after the common MIPS patches Okay, I'll re-arrange the sequence of patches in v2. >> >> Signed-off-by: Aaron Williams <awilliams at marvell.com> >> Signed-off-by: Stefan Roese <sr at denx.de> >> --- >> >> MAINTAINERS | 6 + >> arch/Kconfig | 1 + >> arch/mips/Kconfig | 49 +- >> arch/mips/Makefile | 7 + >> arch/mips/cpu/Makefile | 4 +- >> arch/mips/include/asm/arch-octeon/cavm-reg.h | 42 + >> arch/mips/include/asm/arch-octeon/clock.h | 24 + >> arch/mips/mach-octeon/Kconfig | 92 ++ >> arch/mips/mach-octeon/Makefile | 10 + >> arch/mips/mach-octeon/clock.c | 22 + >> arch/mips/mach-octeon/cpu.c | 55 + >> arch/mips/mach-octeon/dram.c | 27 + >> arch/mips/mach-octeon/include/ioremap.h | 30 + >> arch/mips/mach-octeon/start.S | 1241 ++++++++++++++++++ >> 14 files changed, 1608 insertions(+), 2 deletions(-) >> create mode 100644 arch/mips/include/asm/arch-octeon/cavm-reg.h >> create mode 100644 arch/mips/include/asm/arch-octeon/clock.h >> create mode 100644 arch/mips/mach-octeon/Kconfig >> create mode 100644 arch/mips/mach-octeon/Makefile >> create mode 100644 arch/mips/mach-octeon/clock.c >> create mode 100644 arch/mips/mach-octeon/cpu.c >> create mode 100644 arch/mips/mach-octeon/dram.c >> create mode 100644 arch/mips/mach-octeon/include/ioremap.h >> create mode 100644 arch/mips/mach-octeon/start.S >> >> diff --git a/MAINTAINERS b/MAINTAINERS >> index 66f0b07263..29f2d7328c 100644 >> --- a/MAINTAINERS >> +++ b/MAINTAINERS >> @@ -749,6 +749,12 @@ M: Ezequiel Garcia <ezequiel at collabora.com> >> S: Maintained >> F: arch/mips/mach-jz47xx/ >> >> +MIPS Octeon >> +M: Aaron Williams <awilliams at marvell.com> >> +S: Maintained >> +F: arch/mips/mach-octeon/ >> +F: arch/mips/include/asm/arch-octeon/ >> + >> MMC >> M: Peng Fan <peng.fan at nxp.com> >> S: Maintained >> diff --git a/arch/Kconfig b/arch/Kconfig >> index 91e049b322..1cd3e1dc0b 100644 >> --- a/arch/Kconfig >> +++ b/arch/Kconfig >> @@ -37,6 +37,7 @@ config MICROBLAZE >> >> config MIPS >> bool "MIPS architecture" >> + select CREATE_ARCH_SYMLINK > > you should not need that. The path arch/mips/mach-octeon/include/ will > be automatically added to the include search paths. Thus move all files > in arch/mips/include/asm/arch-octeon/ to arch/mips/mach-octeon/include/ Good idea. >> select HAVE_ARCH_IOREMAP >> select HAVE_PRIVATE_LIBGCC >> select SUPPORT_OF_CONTROL >> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig >> index 48e754cc46..3c7f3eb94f 100644 >> --- a/arch/mips/Kconfig >> +++ b/arch/mips/Kconfig >> @@ -106,6 +106,24 @@ config ARCH_JZ47XX >> select OF_CONTROL >> select DM >> >> +config ARCH_OCTEON >> + bool "Support Marvell Octeon CN7xxx platforms" >> + select DISPLAY_CPUINFO >> + select DMA_ADDR_T_64BIT >> + select DM >> + select DM_SERIAL >> + select MIPS_CACHE_COHERENT >> + select MIPS_INIT_STACK_IN_SRAM >> + select MIPS_L2_CACHE >> + select MIPS_TUNE_OCTEON3 >> + select ROM_EXCEPTION_VECTORS >> + select SUPPORTS_BIG_ENDIAN >> + select SUPPORTS_CPU_MIPS64_OCTEON >> + select PHYS_64BIT >> + select OF_CONTROL >> + select OF_LIVE >> + imply CMD_DM >> + >> config MACH_PIC32 >> bool "Support Microchip PIC32" >> select DM >> @@ -160,6 +178,7 @@ source "arch/mips/mach-bmips/Kconfig" >> source "arch/mips/mach-jz47xx/Kconfig" >> source "arch/mips/mach-pic32/Kconfig" >> source "arch/mips/mach-mtmips/Kconfig" >> +source "arch/mips/mach-octeon/Kconfig" >> >> if MIPS >> >> @@ -233,6 +252,14 @@ config CPU_MIPS64_R6 >> Choose this option to build a kernel for release 6 or later of the >> MIPS64 architecture. >> >> +config CPU_MIPS64_OCTEON >> + bool "Marvell Octeon series of CPUs" >> + depends on SUPPORTS_CPU_MIPS64_OCTEON >> + select 64BIT >> + help >> + Choose this option for Marvell Octeon CPUs. These CPUs are between >> + MIPS64 R5 and R6 with other extensions. >> + >> endchoice >> >> menu "General setup" >> @@ -261,7 +288,7 @@ config MIPS_CM_BASE >> config MIPS_CACHE_INDEX_BASE >> hex "Index base address for cache initialisation" >> default 0x80000000 if CPU_MIPS32 >> - default 0xffffffff80000000 if CPU_MIPS64 >> + default 0xFFFFFFFFC0000000 if ARCH_OCTEON >> help >> This is the base address for a memory block, which is used for >> initialising the cache lines. This is also the base address of a memory >> @@ -342,6 +369,14 @@ config SPL_LOADER_SUPPORT >> help >> Enable this option if you want to use SPL loaders without DM enabled. >> >> +config MIPS_CACHE_COHERENT >> + bool "Set if MIPS processor is cache coherent" >> + help >> + Enable this if the MIPS architecture is cache coherent like the >> + Marvell Octeon series of SoCs. When this is set, cache flushes >> + and invalidates only flush the write buffer since the hardware >> + maintains cache coherency. >> + >> endmenu >> >> menu "OS boot interface" >> @@ -398,6 +433,9 @@ config SUPPORTS_CPU_MIPS64_R2 >> config SUPPORTS_CPU_MIPS64_R6 >> bool >> >> +config SUPPORTS_CPU_MIPS64_OCTEON >> + bool >> + >> config CPU_MIPS32 >> bool >> default y if CPU_MIPS32_R1 || CPU_MIPS32_R2 || CPU_MIPS32_R6 >> @@ -405,6 +443,7 @@ config CPU_MIPS32 >> config CPU_MIPS64 >> bool >> default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R6 >> + default y if CPU_MIPS64_OCTEON >> >> config MIPS_TUNE_4KC >> bool >> @@ -421,6 +460,9 @@ config MIPS_TUNE_34KC >> config MIPS_TUNE_74KC >> bool >> >> +config MIPS_TUNE_OCTEON3 >> + bool >> + >> config 32BIT >> bool >> >> @@ -453,6 +495,11 @@ config MIPS_SRAM_INIT >> before it can be used. If enabled, a function mips_sram_init() will >> be called just before setup_stack_gd. >> >> +config DMA_ADDR_T_64BIT >> + bool >> + help >> + Select this to enable 64-bit DMA addressing >> + >> config SYS_DCACHE_SIZE >> int >> default 0 >> diff --git a/arch/mips/Makefile b/arch/mips/Makefile >> index af3f227436..fa1ba7855a 100644 >> --- a/arch/mips/Makefile >> +++ b/arch/mips/Makefile >> @@ -1,6 +1,10 @@ >> # SPDX-License-Identifier: GPL-2.0+ >> >> +ifneq ($(CONFIG_ARCH_OCTEON),y) >> head-y := arch/mips/cpu/start.o >> +else >> +head-y := arch/mips/mach-octeon/start.o >> +endif >> >> ifeq ($(CONFIG_SPL_BUILD),y) >> ifneq ($(CONFIG_SPL_START_S_PATH),) >> @@ -17,6 +21,7 @@ machine-$(CONFIG_ARCH_JZ47XX) += jz47xx >> machine-$(CONFIG_MACH_PIC32) += pic32 >> machine-$(CONFIG_ARCH_MTMIPS) += mtmips >> machine-$(CONFIG_ARCH_MSCC) += mscc >> +machine-${CONFIG_ARCH_OCTEON} += octeon >> >> machdirs := $(patsubst %,arch/mips/mach-%/,$(machine-y)) >> libs-y += $(machdirs) >> @@ -30,6 +35,7 @@ arch-$(CONFIG_CPU_MIPS32_R6) += -march=mips32r6 -Wa,-mips32r6 >> arch-$(CONFIG_CPU_MIPS64_R1) += -march=mips64 -Wa,-mips64 >> arch-$(CONFIG_CPU_MIPS64_R2) += -march=mips64r2 -Wa,-mips64r2 >> arch-$(CONFIG_CPU_MIPS64_R6) += -march=mips64r6 -Wa,-mips64r6 >> +arch-${CONFIG_CPU_MIPS64_OCTEON} += -march=octeon3 >> >> # Allow extra optimization for specific CPUs/SoCs >> tune-$(CONFIG_MIPS_TUNE_4KC) += -mtune=4kc >> @@ -37,6 +43,7 @@ tune-$(CONFIG_MIPS_TUNE_14KC) += -mtune=14kc >> tune-$(CONFIG_MIPS_TUNE_24KC) += -mtune=24kc >> tune-$(CONFIG_MIPS_TUNE_34KC) += -mtune=34kc >> tune-$(CONFIG_MIPS_TUNE_74KC) += -mtune=74kc >> +tune-${CONFIG_MIPS_TUNE_OCTEON3} += -mtune=octeon3 >> >> # Include default header files >> cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic >> diff --git a/arch/mips/cpu/Makefile b/arch/mips/cpu/Makefile >> index 6df7bb4e48..732015d6f3 100644 >> --- a/arch/mips/cpu/Makefile >> +++ b/arch/mips/cpu/Makefile >> @@ -1,6 +1,8 @@ >> # SPDX-License-Identifier: GPL-2.0+ >> >> -extra-y = start.o >> +ifneq ($(CONFIG_ARCH_OCTEON),y) >> +extra-y = start.o >> +endif >> >> obj-y += time.o >> obj-y += interrupts.o >> diff --git a/arch/mips/include/asm/arch-octeon/cavm-reg.h b/arch/mips/include/asm/arch-octeon/cavm-reg.h >> new file mode 100644 >> index 0000000000..b961e54956 >> --- /dev/null >> +++ b/arch/mips/include/asm/arch-octeon/cavm-reg.h >> @@ -0,0 +1,42 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +/* >> + * Copyright (C) 2020 Marvell International Ltd. >> + */ >> + >> +#ifndef __CAVM_REG_H__ >> + >> +/* Register offsets */ >> +#define CAVM_CIU_FUSE ((u64 *)0x80010100000001a0) >> +#define CAVM_MIO_BOOT_REG_CFG0 ((u64 *)0x8001180000000000) >> +#define CAVM_RST_BOOT ((u64 *)0x8001180006001600) >> + >> +/* Register structs */ >> + >> +/** >> + * Register (RSL) rst_boot >> + * >> + * RST Boot Register >> + */ >> +union cavm_rst_boot { >> + u64 u; >> + struct cavm_rst_boot_s { >> + u64 chipkill : 1; >> + u64 jtcsrdis : 1; >> + u64 ejtagdis : 1; >> + u64 romen : 1; >> + u64 ckill_ppdis : 1; >> + u64 jt_tstmode : 1; >> + u64 vrm_err : 1; >> + u64 reserved_37_56 : 20; >> + u64 c_mul : 7; >> + u64 pnr_mul : 6; >> + u64 reserved_21_23 : 3; >> + u64 lboot_oci : 3; >> + u64 lboot_ext : 6; >> + u64 lboot : 10; >> + u64 rboot : 1; >> + u64 rboot_pin : 1; >> + } s; >> +}; >> + >> +#endif /* __CAVM_REG_H__ */ >> diff --git a/arch/mips/include/asm/arch-octeon/clock.h b/arch/mips/include/asm/arch-octeon/clock.h >> new file mode 100644 >> index 0000000000..a844a222c9 >> --- /dev/null >> +++ b/arch/mips/include/asm/arch-octeon/clock.h >> @@ -0,0 +1,24 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +/* >> + * Copyright (C) 2018, 2019 Marvell International Ltd. >> + * >> + * https://spdx.org/licenses >> + */ >> + >> +#ifndef __CLOCK_H__ >> + >> +/** System PLL reference clock */ >> +#define PLL_REF_CLK 50000000 /* 50 MHz */ >> +#define NS_PER_REF_CLK_TICK (1000000000 / PLL_REF_CLK) >> + >> +/** >> + * Returns the I/O clock speed in Hz >> + */ >> +u64 octeon_get_io_clock(void); >> + >> +/** >> + * Returns the core clock speed in Hz >> + */ >> +u64 octeon_get_core_clock(void); >> + >> +#endif /* __CLOCK_H__ */ >> diff --git a/arch/mips/mach-octeon/Kconfig b/arch/mips/mach-octeon/Kconfig >> new file mode 100644 >> index 0000000000..67fcb6058c >> --- /dev/null >> +++ b/arch/mips/mach-octeon/Kconfig >> @@ -0,0 +1,92 @@ >> +menu "Octeon platforms" >> + depends on ARCH_OCTEON >> + >> +config SYS_SOC >> + string >> + default "octeon" >> + >> +config OCTEON_CN7XXX >> + bool "Octeon CN7XXX SoC" >> + >> +config OCTEON_CN70XX >> + bool "Octeon CN70XX SoC" >> + select OCTEON_CN7XXX >> + >> +config OCTEON_CN73XX >> + bool "Octeon CN73XX SoC" >> + select OCTEON_CN7XXX >> + >> +config OCTEON_CN78XX >> + bool "Octeon CN78XX SoC" >> + select OCTEON_CN7XXX >> + >> +choice >> + prompt "Octeon MIPS family select" >> + >> +config SOC_OCTEON2 >> + bool "Octeon II family" >> + help >> + This selects the Octeon II SoC family > > this should be added later when needed Yes, makes sense. >> + >> +config SOC_OCTEON3 >> + bool "Octeon III family" >> + help >> + This selects the Octeon III SoC family CN70xx, CN73XX, CN78xx >> + and CNF75XX. >> + >> +endchoice >> + >> +config SYS_DCACHE_SIZE >> + default 32768 >> + >> +config SYS_DCACHE_LINE_SIZE >> + default 128 >> + >> +config SYS_ICACHE_SIZE >> + default 79872 >> + >> +config SYS_ICACHE_LINE_SIZE >> + default 128 >> + >> +config OCTEON_BIG_STACK_SIZE >> + hex >> + default 0x4000 >> + help >> + This enables a larger stack needed for Octeon 3 DRAM initialization. >> + If this is disabled then a part of the L1 cache will be reserved for >> + the stack, resulting in a smaller image. If this is true then >> + a portion of the TEXT address space will be reserved for the stack. >> + Note that this requires that U-Boot MUST be able to fit entirely >> + within the L2 cache and cannot be executed from a parallel NOR flash. >> + The default size is 16KiB. >> + >> +config OCTEON_COPY_FROM_FLASH_TO_L2 >> + bool >> + default y >> + help >> + Set this for U-Boot to attempt to copy itself from flash memory into >> + the L2 cache. This significantly improvess the boot performance. >> + >> +config OCTEON_L2_MEMCPY_IN_CACHE >> + bool >> + default y >> + help >> + If this is set then the memcpy code that is used to copy U-Boot from >> + the flash to the L2 cache is written to the L2 cache. This >> + significantly speeds up the memcpy operation. >> + >> +config OCTEON_L2_UBOOT_ADDR >> + hex >> + default 0xffffffff81000000 >> + help >> + This specifies the address where U-Boot will be copied into the L2 >> + cache. >> + >> +config OCTEON_L2_MEMCPY_ADDR >> + hex >> + default 0xffffffff81400000 >> + help >> + This specifies where U-Boot will place the memcpy routine used for >> + copying U-Boot from flash to L2 cache. >> + >> +endmenu >> diff --git a/arch/mips/mach-octeon/Makefile b/arch/mips/mach-octeon/Makefile >> new file mode 100644 >> index 0000000000..a5fda682a7 >> --- /dev/null >> +++ b/arch/mips/mach-octeon/Makefile >> @@ -0,0 +1,10 @@ >> +# (C) Copyright 2019 Marvell, Inc. >> +# >> +# SPDX-License-Identifier: GPL-2.0+ >> +# >> + >> +extra-y = start.o >> + >> +obj-y += clock.o >> +obj-y += cpu.o >> +obj-y += dram.o >> diff --git a/arch/mips/mach-octeon/clock.c b/arch/mips/mach-octeon/clock.c >> new file mode 100644 >> index 0000000000..6e32008641 >> --- /dev/null >> +++ b/arch/mips/mach-octeon/clock.c >> @@ -0,0 +1,22 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* >> + * Copyright (C) 2018, 2019 Marvell International Ltd. >> + */ >> + >> +#include <common.h> >> +#include <asm/arch/clock.h> >> + >> +DECLARE_GLOBAL_DATA_PTR; >> + >> +int octeon_get_timer_freq(void) >> +{ >> + return gd->cpu_clk; >> +} >> + >> +/** >> + * Returns the I/O clock speed in Hz >> + */ >> +u64 octeon_get_io_clock(void) >> +{ >> + return gd->bus_clk; >> +} >> diff --git a/arch/mips/mach-octeon/cpu.c b/arch/mips/mach-octeon/cpu.c >> new file mode 100644 >> index 0000000000..a1373c6d56 >> --- /dev/null >> +++ b/arch/mips/mach-octeon/cpu.c >> @@ -0,0 +1,55 @@ >> +// SPDX-License-Identifier: GPL-2.0+ >> +/* >> + * Copyright (C) 2020 Marvell International Ltd. >> + */ >> + >> +#include <common.h> >> +#include <linux/io.h> >> +#include <asm/arch/clock.h> >> +#include <asm/arch-octeon/cavm-reg.h> >> + >> +DECLARE_GLOBAL_DATA_PTR; >> + >> +static int get_clocks(void) >> +{ >> + const u64 ref_clock = PLL_REF_CLK; >> + union cavm_rst_boot rst_boot; >> + >> + rst_boot.u = ioread64(CAVM_RST_BOOT); >> + gd->cpu_clk = ref_clock * rst_boot.s.c_mul; >> + gd->bus_clk = ref_clock * rst_boot.s.pnr_mul; >> + >> + debug("%s: cpu: %lu, bus: %lu\n", __func__, gd->cpu_clk, gd->bus_clk); >> + >> + return 0; >> +} >> + >> +/* Early mach init code run from flash */ >> +int mach_cpu_init(void) >> +{ >> + /* Remap boot-bus 0x1fc0.0000 -> 0x1f40.0000 */ >> + /* ToDo: Move this to an early running bus (bootbus) DM driver */ >> + clrsetbits_be64(CAVM_MIO_BOOT_REG_CFG0, 0xffff, 0x1f40); >> + >> + /* Get clocks and store them in GD */ >> + get_clocks(); >> + >> + return 0; >> +} >> + >> +/** >> + * Returns number of cores >> + * >> + * @return number of CPU cores for the specified node >> + */ >> +static int cavm_octeon_num_cores(void) >> +{ >> + return fls64(ioread64(CAVM_CIU_FUSE) & 0xffffffffffff); >> +} >> + >> +int print_cpuinfo(void) >> +{ >> + printf("SoC: Octeon CN73xx (%d cores)\n", cavm_octeon_num_cores()); >> + >> + return 0; >> +} >> diff --git a/arch/mips/mach-octeon/dram.c b/arch/mips/mach-octeon/dram.c >> new file mode 100644 >> index 0000000000..c16a73e8e6 >> --- /dev/null >> +++ b/arch/mips/mach-octeon/dram.c >> @@ -0,0 +1,27 @@ >> +// SPDX-License-Identifier: GPL-2.0+ >> +/* >> + * Copyright (C) 2020 Marvell International Ltd. >> + */ >> + >> +#include <common.h> >> +#include <dm.h> >> +#include <ram.h> >> + >> +DECLARE_GLOBAL_DATA_PTR; >> + >> +int dram_init(void) >> +{ >> + /* >> + * No DDR init yet -> run in L2 cache >> + */ >> + gd->ram_size = (2 << 20); >> + gd->bd->bi_dram[0].size = gd->ram_size; >> + gd->bd->bi_dram[1].size = 0; >> + >> + return 0; >> +} >> + >> +ulong board_get_usable_ram_top(ulong total_size) >> +{ >> + return gd->ram_top; >> +} >> diff --git a/arch/mips/mach-octeon/include/ioremap.h b/arch/mips/mach-octeon/include/ioremap.h >> new file mode 100644 >> index 0000000000..59b75008a2 >> --- /dev/null >> +++ b/arch/mips/mach-octeon/include/ioremap.h >> @@ -0,0 +1,30 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +#ifndef __ASM_MACH_OCTEON_IOREMAP_H >> +#define __ASM_MACH_OCTEON_IOREMAP_H >> + >> +#include <linux/types.h> >> + >> +/* >> + * Allow physical addresses to be fixed up to help peripherals located >> + * outside the low 32-bit range -- generic pass-through version. >> + */ >> +static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, >> + phys_addr_t size) >> +{ >> + return phys_addr; >> +} >> + >> +static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size, >> + unsigned long flags) >> +{ >> + return (void __iomem *)(XKPHYS | offset); >> +} >> + >> +static inline int plat_iounmap(const volatile void __iomem *addr) >> +{ >> + return 0; >> +} >> + >> +#define _page_cachable_default _CACHE_CACHABLE_NONCOHERENT >> + >> +#endif /* __ASM_MACH_OCTEON_IOREMAP_H */ >> diff --git a/arch/mips/mach-octeon/start.S b/arch/mips/mach-octeon/start.S >> new file mode 100644 >> index 0000000000..acb967201a >> --- /dev/null >> +++ b/arch/mips/mach-octeon/start.S >> @@ -0,0 +1,1241 @@ >> +/* SPDX-License-Identifier: GPL-2.0+ */ >> +/* >> + * Startup Code for OCTEON 64-bit CPU-core >> + * >> + * Copyright (c) 2003 Wolfgang Denk <wd at denx.de> >> + * Copyright 2004, 2005, 2010 - 2015 Cavium Inc.. >> + */ >> + >> +#include <asm-offsets.h> >> +#include <config.h> >> +#include <asm/regdef.h> >> +#include <asm/mipsregs.h> >> +#include <asm/asm.h> >> + >> +#define BOOT_VECTOR_NUM_WORDS 8 >> + >> +#define OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET 0x70 >> +#define OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET 0x78 >> + >> +#define OCTEON_BOOT_MOVEABLE_MAGIC1_RAW 0xdb00110ad358eacd >> +#define OCTEON_BOOT_MOVEABLE_MAGIC1 OCTEON_BOOT_MOVEABLE_MAGIC1_RAW >> + >> +#define OCTEON_CIU_SOFT_RST 0x8001070000000740 >> + >> +#define OCTEON_L2C_WPAR_PP0 0x8001180080840000 >> +#define OCTEON_MIO_BOOT_BASE 0x8001180000000000 >> +#define OCTEON_MIO_BOOT_REG_CFG0_OFF 0x0000 >> +#define OCTEON_MIO_BOOT_LOC_CFG0_OFF 0x0080 >> +#define OCTEON_MIO_BOOT_LOC_ADR_OFF 0x0090 >> +#define OCTEON_MIO_BOOT_LOC_DAT_OFF 0x0098 >> +#define OCTEON_MIO_RST_BOOT 0x8001180000001600 >> +#define OCTEON_MIO_BOOT_REG_CFG0 0x8001180000000000 >> +#define OCTEON_MIO_BOOT_REG_TIM0 0x8001180000000040 >> +#define OCTEON_MIO_BOOT_LOC_CFG0 0x8001180000000080 >> +#define OCTEON_MIO_BOOT_LOC_ADR 0x8001180000000090 >> +#define OCTEON_MIO_BOOT_LOC_DAT 0x8001180000000098 >> +#define OCTEON_MIO_FUSE_DAT3 0x8001180000001418 >> +#define OCTEON_L2D_FUS3 0x80011800800007B8 >> +#define OCTEON_LMC0_DDR_PLL_CTL 0x8001180088000258 >> + >> +#define OCTEON_RST 0x8001180006000000 >> +#define OCTEON_RST_BOOT_OFFSET 0x1600 >> +#define OCTEON_RST_SOFT_RST_OFFSET 0x1680 >> +#define OCTEON_RST_COLD_DATAX_OFFSET(X) (0x17C0 + (X) * 8) >> +#define OCTEON_RST_BOOT 0x8001180006001600 >> +#define OCTEON_RST_SOFT_RST 0x8001180006001680 >> +#define OCTEON_RST_COLD_DATAX(X) (0x80011800060017C0 + (X) * 8) >> + >> +#define OCTEON_OCX_COM_NODE 0x8001180011000000 >> +#define OCTEON_L2C_OCI_CTL 0x8001180080800020 >> +#define OCTEON_L2C_TAD_CTL 0x8001180080800018 >> +#define OCTEON_L2C_CTL 0x8001180080800000 >> + >> +#define OCTEON_DBG_DATA 0x80011F00000001E8 >> +#define OCTEON_PCI_READ_CMD_E 0x80011F0000001188 >> +#define OCTEON_NPEI_DBG_DATA 0x80011F0000008510 >> +#define OCTEON_CIU_WDOG(X) (0x8001070000000500 + (X) * 8) >> +#define OCTEON_CIU_PP_POKE(X) (0x8001070000000580 + (X) * 8) >> +#define OCTEON_CIU3_WDOG(X) (0x8001010000020000 + (X) * 8) >> +#define OCTEON_CIU3_PP_POKE(X) (0x8001010000030000 + (X) * 8) >> +#define OCTEON_OCX_COM_LINKX_CTL(X) (0x8001180011000020 + (X) * 8) >> +#define OCTEON_SLI_CTL_STATUS 0x80011F0000028570 >> +#define OCTEON_GSERX_SCRATCH(X) (0x8001180090000020 + (X) * 0x1000000) >> + >> +/** PRID for CN56XX */ >> +#define OCTEON_PRID_CN56XX 0x04 >> +/** PRID for CN52XX */ >> +#define OCTEON_PRID_CN52XX 0x07 >> +/** PRID for CN63XX */ >> +#define OCTEON_PRID_CN63XX 0x90 >> +/** PRID for CN68XX */ >> +#define OCTEON_PRID_CN68XX 0x91 >> +/** PRID for CN66XX */ >> +#define OCTEON_PRID_CN66XX 0x92 >> +/** PRID for CN61XX */ >> +#define OCTEON_PRID_CN61XX 0x93 >> +/** PRID for CNF71XX */ >> +#define OCTEON_PRID_CNF71XX 0x94 >> +/** PRID for CN78XX */ >> +#define OCTEON_PRID_CN78XX 0x95 >> +/** PRID for CN70XX */ >> +#define OCTEON_PRID_CN70XX 0x96 >> +/** PRID for CN73XX */ >> +#define OCTEON_PRID_CN73XX 0x97 >> +/** PRID for CNF75XX */ >> +#define OCTEON_PRID_CNF75XX 0x98 >> + >> +/* func argument is used to create a mark, must be unique */ >> +#define GETOFFSET(reg, func) \ >> + .balign 8; \ >> + bal func ##_mark; \ >> + nop; \ >> + .dword .; \ >> +func ##_mark: \ >> + ld reg, 0(ra); \ >> + dsubu reg, ra, reg; >> + >> +#define JAL(func) \ >> + .balign 8; \ >> + bal func ##_mark; \ >> + nop; \ >> + .dword .; \ >> +func ##_mark: \ >> + ld t8, 0(ra); \ >> + dsubu t8, ra, t8; \ >> + dla t9, func; \ >> + daddu t9, t9, t8; \ >> + jalr t9; \ >> + nop; >> + >> + .set arch=octeon3 >> + .set noreorder >> + >> + .macro uhi_mips_exception >> + move k0, t9 # preserve t9 in k0 >> + move k1, a0 # preserve a0 in k1 >> + li t9, 15 # UHI exception operation >> + li a0, 0 # Use hard register context >> + sdbbp 1 # Invoke UHI operation >> + .endm >> + >> + .macro setup_stack_gd >> + li t0, -16 >> + PTR_LI t1, big_stack_start >> + and sp, t1, t0 # force 16 byte alignment >> + PTR_SUBU \ >> + sp, sp, GD_SIZE # reserve space for gd >> + and sp, sp, t0 # force 16 byte alignment >> + move k0, sp # save gd pointer >> +#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \ >> + !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F) >> + li t2, CONFIG_VAL(SYS_MALLOC_F_LEN) >> + PTR_SUBU \ >> + sp, sp, t2 # reserve space for early malloc >> + and sp, sp, t0 # force 16 byte alignment >> +#endif >> + move fp, sp >> + >> + /* Clear gd */ >> + move t0, k0 >> +1: >> + PTR_S zero, 0(t0) >> + PTR_ADDIU t0, PTRSIZE >> + blt t0, t1, 1b >> + nop >> + >> +#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \ >> + !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F) >> + PTR_S sp, GD_MALLOC_BASE(k0) # gd->malloc_base offset >> +#endif >> + .endm >> + >> +/* Saved register usage: >> + * s0: not used >> + * s1: not used >> + * s2: Address U-Boot loaded into in L2 cache >> + * s3: Start address >> + * s4: flags >> + * 1: booting from RAM >> + * 2: executing out of cache >> + * 4: booting from flash >> + * s5: u-boot size (data end - _start) >> + * s6: offset in flash. >> + * s7: _start physical address >> + * s8: >> + */ >> + >> +ENTRY(_start) >> + /* U-Boot entry point */ >> + b reset >> + >> + /* The above jump instruction/nop are considered part of the >> + * bootloader_header_t structure but are not changed when the header is >> + * updated. >> + */ >> + >> + /* Leave room for bootloader_header_t header at start of binary. This >> + * header is used to identify the board the bootloader is for, what >> + * address it is linked at, failsafe/normal, etc. It also contains a >> + * CRC of the entire image. >> + */ >> + >> +#if defined(CONFIG_ROM_EXCEPTION_VECTORS) >> + /* >> + * Exception vector entry points. When running from ROM, an exception >> + * cannot be handled. Halt execution and transfer control to debugger, >> + * if one is attached. >> + */ >> + .org 0x200 >> + /* TLB refill, 32 bit task */ >> + uhi_mips_exception >> + >> + .org 0x280 >> + /* XTLB refill, 64 bit task */ >> + uhi_mips_exception >> + >> + .org 0x300 >> + /* Cache error exception */ >> + uhi_mips_exception >> + >> + .org 0x380 >> + /* General exception */ >> + uhi_mips_exception >> + >> + .org 0x400 >> + /* Catch interrupt exceptions */ >> + uhi_mips_exception >> + >> + .org 0x480 >> + /* EJTAG debug exception */ >> +1: b 1b >> + nop >> + >> + .org 0x500 >> +#endif >> + >> +/* Reserve extra space so that when we use the boot bus local memory >> + * segment to remap the debug exception vector we don't overwrite >> + * anything useful >> + */ >> + >> +/* Basic exception handler (dump registers) in all ASM. When using the TLB for >> + * mapping u-boot C code, we can't branch to that C code for exception handling >> + * (TLB is disabled for some exceptions. >> + */ >> + >> +/* RESET/start here */ >> + .balign 8 >> +reset: >> + nop >> + synci 0(zero) >> + mfc0 k0, CP0_STATUS >> + ori k0, 0x00E0 /* enable 64 bit mode for CSR access */ >> + mtc0 k0, CP0_STATUS >> + >> + /* Save the address we're booting from, strip off low bits */ >> + bal 1f >> + nop >> +1: >> + move s3, ra >> + dins s3, zero, 0, 12 >> + >> + /* Disable boot bus moveable regions */ >> + PTR_LI k0, OCTEON_MIO_BOOT_LOC_CFG0 >> + sd zero, 0(k0) >> + sd zero, 8(k0) >> + >> + /* Disable the watchdog timer >> + * First we check if we're running on CN78XX, CN73XX or CNF75XX to see >> + * if we use CIU3 or CIU. >> + */ >> + mfc0 t0, CP0_PRID >> + ext t0, t0, 8, 8 >> + /* Assume CIU */ >> + PTR_LI t1, OCTEON_CIU_WDOG(0) >> + PTR_LI t2, OCTEON_CIU_PP_POKE(0) >> + blt t0, OCTEON_PRID_CN78XX, wd_use_ciu >> + nop >> + beq t0, OCTEON_PRID_CN70XX, wd_use_ciu >> + nop >> + /* Use CIU3 */ >> + PTR_LI t1, OCTEON_CIU3_WDOG(0) >> + PTR_LI t2, OCTEON_CIU3_PP_POKE(0) >> +wd_use_ciu: >> + sd zero, 0(t2) /* Pet the dog */ >> + sd zero, 0(t1) /* Disable watchdog timer */ >> + >> + /* Errata: CN76XX has a node ID of 3. change it to zero here. >> + * This needs to be done before we relocate to L2 as addresses change >> + * For 76XX pass 1.X we need to zero out the OCX_COM_NODE[ID], >> + * L2C_OCI_CTL[GKSEGNODE] and CP0 of Root.CvmMemCtl2[KSEGNODE]. >> + */ >> + mfc0 a4, CP0_PRID >> + /* Check for 78xx pass 1.x processor ID */ >> + andi a4, 0xffff >> + blt a4, (OCTEON_PRID_CN78XX << 8), 1f >> + nop >> + >> + /* Zero out alternate package for now */ >> + dins a4, zero, 6, 1 >> + bge a4, ((OCTEON_PRID_CN78XX << 8) | 0x08), 1f >> + nop >> + >> + /* 78xx or 76xx here, first check for bug #27141 */ >> + PTR_LI a5, OCTEON_SLI_CTL_STATUS >> + ld a6, 0(a5) >> + andi a7, a4, 0xff >> + andi a6, a6, 0xff >> + >> + beq a6, a7, not_bug27141 >> + nop >> + >> + /* core 0 proc_id rev_id field does not match SLI_CTL_STATUS rev_id */ >> + /* We just hit bug #27141. Need to reset the chip and try again */ >> + >> + PTR_LI a4, OCTEON_RST_SOFT_RST >> + ori a5, zero, 0x1 /* set the reset bit */ >> + >> +reset_78xx_27141: >> + sync >> + synci 0(zero) >> + cache 9, 0(zero) >> + sd a5, 0(a4) >> + wait >> + b reset_78xx_27141 >> + nop >> + >> +not_bug27141: >> + /* 76XX pass 1.x has the node number set to 3 */ >> + mfc0 a4, CP0_EBASE >> + ext a4, a4, 0, 10 >> + bne a4, 0x180, 1f /* Branch if not node 3 core 0 */ >> + nop >> + >> + /* Clear OCX_COM_NODE[ID] */ >> + PTR_LI a5, OCTEON_OCX_COM_NODE >> + ld a4, 0(a5) >> + dins a4, zero, 0, 2 >> + sd a4, 0(a5) >> + ld zero, 0(a5) >> + >> + /* Clear L2C_OCI_CTL[GKSEGNODE] */ >> + PTR_LI a5, OCTEON_L2C_OCI_CTL >> + ld a4, 0(a5) >> + dins a4, zero, 4, 2 >> + sd a4, 0(a5) >> + ld zero, 0(a5) >> + >> + /* Clear CP0 Root.CvmMemCtl2[KSEGNODE] */ >> + dmfc0 a4, CP0_CVMMEMCTL2 >> + dins a4, zero, 12, 2 >> + dmtc0 a4, CP0_CVMMEMCTL2 >> + >> + /* Put the flash address in the start of the EBASE register to >> + * enable our exception handler but only for core 0. >> + */ >> + mfc0 a4, CP0_EBASE >> + dext a4, a4, 0, 10 >> + bnez a4, no_flash >> + /* OK in delay slot */ >> + dext a6, a6, 0, 16 /* Get the base address in flash */ >> + sll a6, a6, 16 >> + mtc0 a6, CP0_EBASE /* Enable exceptions */ >> + >> +no_flash: >> + /* Zero out various registers */ >> + mtc0 zero, CP0_DEPC >> + mtc0 zero, CP0_EPC >> + mtc0 zero, CP0_CAUSE >> + mfc0 a4, CP0_PRID >> + ext a4, a4, 8, 8 >> + mtc0 zero, CP0_DESAVE >> + >> + /* The following are only available on Octeon 2 or later */ >> + mtc0 zero, CP0_KSCRATCH1 >> + mtc0 zero, CP0_KSCRATCH2 >> + mtc0 zero, CP0_KSCRATCH3 >> + mtc0 zero, CP0_USERLOCAL >> + >> + /* Turn off ROMEN bit to disable ROM */ >> + PTR_LI a1, OCTEON_MIO_RST_BOOT >> + /* For OCTEON 3 we use RST_BOOT instead of MIO_RST_BOOT. >> + * The difference is bits 24-26 are 6 instead of 0 for the address. >> + */ >> + /* For Octeon 2 and CN70XX we can ignore the watchdog */ >> + blt a4, OCTEON_PRID_CN78XX, watchdog_ok >> + nop >> + >> + PTR_LI a1, OCTEON_RST_BOOT >> + >> + beq a4, OCTEON_PRID_CN70XX, watchdog_ok >> + nop >> + >> + ld a2, 0(a1) >> + /* There is a bug where some registers don't get properly reset when >> + * the watchdog timer causes a reset. In this case we need to force >> + * a reset. >> + */ >> + bbit0 a2, 11, watchdog_ok /* Skip if watchdog not hit */ >> + dins a2, zero, 2, 18 /* Don't clear LBOOT, LBOOT_EXT or LBOOT_OCI */ >> + /* Clear bit indicating reset due to watchdog */ >> + ori a2, 1 << 11 >> + sd a2, 0(a1) >> + >> + /* Disable watchdog */ >> + PTR_LI a1, OCTEON_CIU3_PP_POKE(0) >> + sd zero, 0(a1) >> + PTR_LI a1, OCTEON_CIU3_WDOG(0) >> + sd zero, 0(a1) >> + >> + /* Record this in the GSER0_SCRATCH register in bit 11 */ >> + PTR_LI a1, OCTEON_GSERX_SCRATCH(0) >> + ld a2, 0(a1) >> + ori a2, 1 << 11 >> + sd a2, 0(a1) >> + >> + PTR_LI a1, OCTEON_RST_SOFT_RST >> + li a2, 1 >> + sd a2, 0(a1) >> + wait >> + >> + /* We should never get here */ >> + >> +watchdog_ok: >> + ld a2, 0(a1) >> + /* Don't clear LBOOT/LBOOT_EXT or LBOOT_OCI */ >> + dins a2, zero, 2, 18 >> + dins a2, zero, 60, 1 /* Clear ROMEN bit */ >> + sd a2, 0(a1) >> + >> + /* Start of Octeon setup */ >> + >> + /* Check what core we are - if core 0, branch to init tlb >> + * loop in flash. Otherwise, look up address of init tlb >> + * loop that was saved in the boot vector block. >> + */ >> + mfc0 a0, CP0_EBASE >> + andi a0, EBASE_CPUNUM /* get core */ >> + beqz a0, InitTLBStart_local >> + nop >> + >> + break >> + /* We should never get here - non-zero cores now go directly to >> + * tlb init from the boot stub in movable region. >> + */ >> + >> + .globl InitTLBStart >> +InitTLBStart: >> +InitTLBStart_local: >> + /* If we don't have working memory yet configure a bunch of >> + * scratch memory, and set the stack pointer to the top >> + * of it. This allows us to go to C code without having >> + * memory set up >> + * >> + * Warning: do not change SCRATCH_STACK_LINES as this can impact the >> + * transition from start.S to crti.asm. crti requires 590 bytes of >> + * stack space. >> + */ >> + cache 1,0(zero) /* Clear Dcache so cvmseg works right */ >> +#if CONFIG_OCTEON_BIG_STACK_SIZE >> + rdhwr v0, $0 >> + bnez v0, 1f >> + nop >> + PTR_LA sp, big_stack_start - 16 >> + b stack_clear_done >> + nop >> +1: >> +#endif >> +#define SCRATCH_STACK_LINES 0x36 /* MAX is 0x36 */ >> + dmfc0 v0, CP0_CVMMEMCTL >> + dins v0, zero, 0, 9 >> + /* setup SCRATCH_STACK_LINES scratch lines of scratch */ >> + ori v0, 0x100 | SCRATCH_STACK_LINES >> + dmtc0 v0, CP0_CVMMEMCTL >> + /* set stack to top of scratch memory */ >> + li sp, 0xffffffffffff8000 + (SCRATCH_STACK_LINES * 128) >> + /* Clear scratch for CN63XX pass 2.0 errata Core-15169*/ >> + li t0, 0xffffffffffff8000 >> +clear_scratch: >> + sd zero, 0(t0) >> + addiu t0, 8 >> + bne t0, sp, clear_scratch >> + nop >> + >> + /* This code run on all cores - core 0 from flash, >> + * the rest from DRAM. When booting from PCI, non-zero cores >> + * come directly here from the boot vector - no earlier code in this >> + * file is executed. >> + */ >> + >> + /* Some generic initialization is done here as well, as we need this >> + * done on all cores even when booting from PCI >> + */ >> +stack_clear_done: >> + /* Clear watch registers. */ >> + mtc0 zero, CP0_WATCHLO >> + mtc0 zero, CP0_WATCHHI >> + >> + /* STATUS register */ >> + mfc0 k0, CP0_STATUS >> + li k1, ~ST0_IE >> + and k0, k1 >> + mtc0 k0, CP0_STATUS >> + >> + /* CAUSE register */ >> + mtc0 zero, CP0_CAUSE >> + >> + /* Init Timer */ >> + dmtc0 zero, CP0_COUNT >> + dmtc0 zero, CP0_COMPARE >> + >> + >> + mfc0 a5, CP0_STATUS >> + li v0, 0xE0 /* enable 64 bit mode for CSR access */ >> + or v0, v0, a5 >> + mtc0 v0, CP0_STATUS >> + >> + >> + dli v0, 1 << 29 /* Enable large physical address support in TLB */ >> + mtc0 v0, CP0_PAGEGRAIN >> + >> +InitTLB: >> + dmtc0 zero, CP0_ENTRYLO0 >> + dmtc0 zero, CP0_ENTRYLO1 >> + mtc0 zero, CP0_PAGEMASK >> + dmtc0 zero, CP0_CONTEXT >> + /* Use an offset into kseg0 so we won't conflict with Mips1 legacy >> + * TLB clearing >> + */ >> + PTR_LI v0, 0xFFFFFFFF90000000 >> + mfc0 a0, CP0_CONFIG1 >> + srl a0, a0, 25 >> + /* Check if config4 reg present */ >> + mfc0 a1, CP0_CONFIG3 >> + bbit0 a1, 31, 2f >> + and a0, a0, 0x3F /* a0 now has the max mmu entry index */ >> + mfc0 a1, CP0_CONFIG4 >> + bbit0 a1, 14, 2f /* check config4[MMUExtDef] */ >> + nop >> + /* append config4[MMUSizeExt] to most significant bit of >> + * config1[MMUSize-1] >> + */ >> + ins a0, a1, 6, 8 >> + and a0, a0, 0x3fff /* a0 now includes max entries for cn6xxx */ >> +2: >> + dmtc0 zero, CP0_XCONTEXT >> + mtc0 zero, CP0_WIRED >> + >> +InitTLBloop: >> + dmtc0 v0, CP0_ENTRYHI >> + tlbp >> + mfc0 v1, CP0_INDEX >> + daddiu v0, v0, 1<<13 >> + bgez v1, InitTLBloop >> + >> + mtc0 a0, CP0_INDEX >> + tlbwi >> + bnez a0, InitTLBloop >> + daddiu a0, -1 >> + >> + mthi zero >> + mtlo zero >> + >> + /* Set up status register */ >> + mfc0 v0, CP0_STATUS >> + /* Enable COP0 and COP2 access */ >> + li a4, (1 << 28) | (1 << 30) >> + or v0, a4 >> + >> + /* Must leave BEV set here, as DRAM is not configured for core 0. >> + * Also, BEV must be 1 later on when the exception base address is set. >> + */ >> + >> + /* Mask all interrupts */ >> + ins v0, zero, 0, 16 >> + /* Clear NMI (used to start cores other than core 0) */ >> + ori v0, 0xE4 /* enable 64 bit, disable interrupts */ >> + mtc0 v0, CP0_STATUS >> + >> + dli v0,0xE000000F /* enable all readhw locations */ >> + mtc0 v0, CP0_HWRENA >> + >> + dmfc0 v0, CP0_CVMCTL >> + ori v0, 1<<14 /* enable fixup of unaligned mem access */ >> + dmtc0 v0, CP0_CVMCTL >> + >> + /* Setup scratch memory. This is also done in >> + * cvmx_user_app_init, and this code will be removed >> + * from the bootloader in the near future. >> + */ >> + >> + /* Set L2C_LAD_CTL[MAXLFB] = 0 on CN73XX */ >> + mfc0 a4, CP0_PRID >> + ext a4, a4, 8, 8 >> + blt a4, OCTEON_PRID_CN73XX, 72f >> + nop >> + PTR_LI v0, OCTEON_L2C_TAD_CTL >> + ld t1, 0(v0) >> + dins t1, zero, 0, 4 >> + sd t1, 0(v0) >> + ld zero, 0(v0) >> + >> +72: >> + >> + /* clear these to avoid immediate interrupt in noperf mode */ >> + dmtc0 zero, CP0_COMPARE /* clear timer interrupt */ >> + dmtc0 zero, CP0_COUNT /* clear timer interrupt */ >> + dmtc0 zero, CP0_PERF_CNT0 /* clear perfCnt0 */ >> + dmtc0 zero, CP0_PERF_CNT1 /* clear perfCnt1 */ >> + dmtc0 zero, CP0_PERF_CNT2 >> + dmtc0 zero, CP0_PERF_CNT3 >> + >> + /* If we're running on a node other than 0 then we need to set KSEGNODE >> + * to 0. The nice thing with this code is that it also autodetects if >> + * we're running on a processor that supports CVMMEMCTL2 or not since >> + * only processors that have this will have a non-zero node ID. Because >> + * of this there's no need to check if we're running on a 78XX. >> + */ >> + mfc0 t1, CP0_EBASE >> + dext t1, t1, 7, 3 /* Extract node number */ >> + beqz t1, is_node0 /* If non-zero then we're not node 0 */ >> + nop >> + dmfc0 t1, CP0_CVMMEMCTL2 >> + dins t1, zero, 12, 4 >> + dmtc0 t1, CP0_CVMMEMCTL2 >> +is_node0: >> + >> + /* Set up TLB mappings for u-boot code in flash. */ >> + >> + /* Use a bal to get the current PC into ra. Since this bal is to >> + * the address immediately following the delay slot, the ra is >> + * the address of the label. We then use this to get the actual >> + * address that we are executing from. >> + */ >> + bal __dummy >> + nop >> + >> +__dummy: >> + /* Get the actual address that we are running at */ >> + PTR_LA a6, _start /* Linked address of _start */ >> + PTR_LA a7, __dummy >> + dsubu t0, a7, a6 /* offset of __dummy label from _start*/ >> + dsubu a7, ra, t0 /* a7 now has actual address of _start*/ >> + >> + /* Save actual _start address in s7. This is where we >> + * are executing from, as opposed to where the code is >> + * linked. >> + */ >> + move s7, a7 >> + move s4, zero >> + >> + /* s7 has actual address of _start. If this is >> + * on the boot bus, it will be between 0xBFC000000 and 0xBFFFFFFF. >> + * If it is on the boot bus, use 0xBFC00000 as the physical address >> + * for the TLB mapping, as we will be adjusting the boot bus >> + * to make this adjustment. >> + * If we are running from DRAM (remote-boot), then we want to use the >> + * real address in DRAM. >> + */ >> + >> + /* Check to see if we are running from flash - we expect that to >> + * be 0xffffffffb0000000-0xffffffffbfffffff >> + * (0x10000000-0x1fffffff, unmapped/uncached) >> + */ >> + dli t2, 0xffffffffb0000000 >> + dsubu t2, s7 >> + slt s4, s7, t2 >> + bltz t2, uboot_in_flash >> + nop >> + >> + /* If we're not core 0 then we don't care about cache */ >> + mfc0 t2, CP0_EBASE >> + andi t2, EBASE_CPUNUM >> + bnez t2, uboot_in_ram >> + nop >> + >> + /* Find out if we're OCTEON I or OCTEON + which don't support running >> + * out of cache. >> + */ >> + mfc0 t2, CP0_PRID >> + ext t2, t2, 8, 8 >> + li s4, 1 >> + blt t2, 0x90, uboot_in_ram >> + nop >> + >> + /* U-Boot can be executing either in RAM or L2 cache. Now we need to >> + * check if DRAM is initialized. The way we do that is to look at >> + * the reset bit of the LMC0_DDR_PLL_CTL register (bit 7) >> + */ >> + PTR_LI t2, OCTEON_LMC0_DDR_PLL_CTL >> + ld t2, 0(t2) >> + bbit1 t2, 7, uboot_in_ram >> + nop >> + >> + /* We must be executing out of cache */ >> + b uboot_in_ram >> + li s4, 2 >> + >> +uboot_in_flash: >> + /* Set s4 to 4 to indicate we're running in FLASH */ >> + li s4, 4 >> + >> +#if defined(CONFIG_OCTEON_DISABLE_L2_CACHE_INDEX_ALIASING) >> + /* By default, L2C index aliasing is enabled. In some cases it may >> + * need to be disabled. The L2C index aliasing can only be disabled >> + * if U-Boot is running out of L2 cache and the L2 cache has not been >> + * used to store anything. >> + */ >> + PTR_LI t1, OCTEON_L2C_CTL >> + ld t2, 0(t1) >> + ori t2, 1 >> + sd t2, 0(t1) >> +#endif >> + >> + /* Use BFC00000 as physical address for TLB mappings when booting >> + * from flash, as we will adjust the boot bus mappings to make this >> + * mapping correct. >> + */ >> + dli a7, 0xFFFFFFFFBFC00000 >> + dsubu s6, s7, a7 /* Save flash offset in s6 */ >> + >> +#if defined(CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2) >> + /* For OCTEON II we check to see if the L2 cache is big enough to hold >> + * U-Boot. If it is big enough then we copy ourself from flash to the >> + * L2 cache in order to speed up execution. >> + */ >> + >> + /* Check for OCTEON 2 */ >> + mfc0 t1, CP0_PRID >> + ext t1, t1, 8, 8 >> + /* Get number of L2 cache sets */ >> + beq t1, OCTEON_PRID_CNF71XX, got_l2_sets /* CNF71XX */ >> + li t2, 1 << 9 >> + beq t1, OCTEON_PRID_CN78XX, got_l2_sets /* CN78XX */ >> + li t2, 1 << 13 >> + beq t1, OCTEON_PRID_CN70XX, got_l2_sets /* CN70XX */ >> + li t2, 1 << 10 >> + beq t1, OCTEON_PRID_CN73XX, got_l2_sets /* CN73XX */ >> + li t2, 1 << 11 >> + beq t1, OCTEON_PRID_CNF75XX, got_l2_sets /* CNF75XX */ >> + li t2, 1 << 11 >> + b l2_cache_too_small /* Unknown OCTEON model */ >> + nop >> + >> +got_l2_sets: >> + /* Get number of associations */ >> + PTR_LI t0, OCTEON_MIO_FUSE_DAT3 >> + ld t0, 0(t0) >> + dext t0, t0, 32, 3 >> + >> + beq t1, OCTEON_PRID_CN70XX, process_70xx_l2sets >> + nop >> + /* 0 = 16-way, 1 = 12-way, 2 = 8-way, 3 = 4-way, 4-7 reserved */ >> + beqz t0, got_l2_ways >> + li t3, 16 >> + beq t0, 1, got_l2_ways >> + li t3, 12 >> + beq t0, 2, got_l2_ways >> + li t3, 8 >> + beq t0, 3, got_l2_ways >> + li t3, 4 >> + b l2_cache_too_small >> + nop >> + >> +process_70xx_l2sets: >> + /* For 70XX, the number of ways is defined as: >> + * 0 - full cache (4-way) 512K >> + * 1 - 3/4 ways (3-way) 384K >> + * 2 - 1/2 ways (2-way) 256K >> + * 3 - 1/4 ways (1-way) 128K >> + * 4-7 illegal (aliased to 0-3) >> + */ >> + andi t0, 3 >> + beqz t0, got_l2_ways >> + li t3, 4 >> + beq t0, 1, got_l2_ways >> + li t3, 3 >> + beq t0, 2, got_l2_ways >> + li t3, 2 >> + li t3, 1 >> + >> +got_l2_ways: >> + dmul a1, t2, t3 /* Calculate cache size */ >> + dsll a1, 7 /* Ways * Sets * cache line sz (128) */ >> + daddiu a1, a1, -128 /* Adjust cache size for copy code */ >> + >> + /* Calculate size of U-Boot image */ >> + /* >> + * "uboot_end - _start" is not correct, as the image also >> + * includes the DTB appended to the end (OF_EMBED is deprecated). >> + * Lets use a defined max for now here. >> + */ >> + PTR_LI s5, CONFIG_BOARD_SIZE_LIMIT >> + >> + daddu t2, s5, s7 /* t2 = end address */ >> + daddiu t2, t2, 127 >> + ins t2, zero, 0, 7 /* Round up to cache line for memcpy */ >> + >> + slt t1, a1, s5 /* See if we're bigger than the L2 cache */ >> + bnez t1, l2_cache_too_small >> + nop >> + /* Address we plan to load at in the L2 cache */ >> + PTR_LI t9, CONFIG_OCTEON_L2_UBOOT_ADDR >> +# ifdef CONFIG_OCTEON_L2_MEMCPY_IN_CACHE >> + /* Enable all ways for PP0. Authentik ROM may have disabled these */ >> + PTR_LI a1, OCTEON_L2C_WPAR_PP0 >> + sd zero, 0(a1) >> + >> + /* Address to place our memcpy code */ >> + PTR_LI a0, CONFIG_OCTEON_L2_MEMCPY_ADDR >> + /* The following code writes a simple memcpy routine into the cache >> + * to copy ourself from flash into the L2 cache. This makes the >> + * memcpy routine a lot faster since each instruction can potentially >> + * require four read cycles to flash over the boot bus. >> + */ >> + /* Zero cache line in the L2 cache */ >> + zcb (a0) >> + synci 0(zero) >> + dli a1, 0xdd840000dd850008 /* ld a0, 0(t0); ld a1, 8(t0) */ >> + sd a1, 0(a0) >> + dli a1, 0xdd860010dd870018 /* ld a2, 16(t0); ld a3, 24(t0) */ >> + sd a1, 8(a0) >> + dli a1, 0xfda40000fda50008 /* sd a0, 0(t1); sd a1, 8(t1) */ >> + sd a1, 16(a0) >> + dli a1, 0xfda60010fda70018 /* sd a2, 16(t1); sd a3, 24(t1) */ >> + sd a1, 24(a0) >> + dli a1, 0x258c0020158efff6 /* addiu t0, 32; bne t0, t2, -40 */ >> + sd a1, 32(a0) >> + dli a1, 0x25ad002003e00008 /* addiu t1, 32; jr ra */ >> + sd a1, 40(a0) >> + sd zero, 48(a0) /* nop; nop */ >> + >> + /* Synchronize the caches */ >> + sync >> + synci 0(zero) >> + >> + move t0, s7 >> + move t1, t9 >> + >> + /* Do the memcpy operation in L2 cache to copy ourself from flash >> + * to the L2 cache. >> + */ >> + jalr a0 >> + nop >> + >> +# else >> + /* Copy ourself to the L2 cache from flash, 32 bytes at a time */ >> + /* This code is now written to the L2 cache using the code above */ >> +1: >> + ld a0, 0(t0) >> + ld a1, 8(t0) >> + ld a2, 16(t0) >> + ld a3, 24(t0) >> + sd a0, 0(t1) >> + sd a1, 8(t1) >> + sd a2, 16(t1) >> + sd a3, 24(t1) >> + addiu t0, 32 >> + bne t0, t2, 1b >> + addiu t1, 32 >> +# endif /* CONFIG_OCTEON_L2_MEMCPY_IN_CACHE */ >> + >> + /* Adjust the start address of U-Boot and the global pointer */ >> + subu t0, s7, t9 /* t0 = address difference */ >> + move s7, t9 /* Update physical address */ >> + move s2, t9 >> + sync >> + synci 0(zero) >> + >> + /* Now we branch to the L2 cache. We first get our PC then adjust it >> + */ >> + bal 3f >> + nop >> +3: >> + /* Don't add any instructions here! */ >> + subu t9, ra, t0 >> + /* Give ourself 16 bytes */ >> + addiu t9, 0x10 >> + >> + jal t9 /* Branch to address in L2 cache */ >> + >> + nop >> + nop >> + /* Add instructions after here */ >> + >> + move a7, s7 >> + >> + b uboot_in_ram >> + ori s4, 2 /* Running out of L2 cache */ >> + >> +l2_cache_too_small: /* We go here if we can't copy ourself to L2 */ >> +#endif /* CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2 */ >> + >> + /* This code is only executed if booting from flash. */ >> + /* For flash boot (_not_ RAM boot), we do a workaround for >> + * an LLM errata on CN38XX and CN58XX parts. >> + */ >> + >> +uboot_in_ram: >> + /* U-boot address is now in reg a7, and is 4 MByte aligned. >> + * (boot bus addressing has been adjusted to make this happen for flash, >> + * and for DRAM this alignment must be provided by the remote boot >> + * utility. >> + */ >> + /* See if we're in KSEG0 range, if so set EBASE register to handle >> + * exceptions. >> + */ >> + dli a1, 0x20000000 >> + bge a7, a1, 1f >> + nop >> + /* Convert our physical address to KSEG0 */ >> + PTR_LI a1, 0xffffffff80000000 >> + or a1, a1, a7 >> + mtc0 a1, CP0_EBASE >> +1: >> + /* U-boot now starts at 0xBFC00000. Use a single 4 MByte TLB mapping >> + * to map u-boot. >> + */ >> + move a0, a6 /* Virtual addr in a0 */ >> + dins a0, zero, 0, 16 /* Zero out offset bits */ >> + move a1, a7 /* Physical addr in a1 */ >> + >> + /* Now we need to remove the MIPS address space bits. For this we >> + * need to determine if it is a 32 bit compatibility address or not. >> + */ >> + >> + /* 'lowest' address in compatibility space */ >> + PTR_LI t0, 0xffffffff80000000 >> + dsubu t0, t0, a1 >> + bltz t0, compat_space >> + nop >> + >> + /* We have a xkphys address, so strip off top bit */ >> + b addr_fixup_done >> + dins a1, zero, 63, 1 >> + >> +compat_space: >> + PTR_LI a2, 0x1fffffff >> + and a1, a1, a2 /* Mask phy addr to remove address space bits */ >> + >> +addr_fixup_done: >> + /* Currenty the u-boot image size is limited to 4 MBytes. In order to >> + * support larger images the flash mapping will need to be changed to >> + * be able to access more than that before C code is run. Until that >> + * is done, we just use a 4 MByte mapping for the secondary cores as >> + * well. >> + */ >> + /* page size (only support 4 Meg binary size for now for core 0) >> + * This limitation is due to the fact that the boot vector is >> + * 0xBFC00000 which only makes 4MB available. Later more flash >> + * address space will be available after U-Boot has been copied to >> + * RAM. For now assume that it is in flash. >> + */ >> + li a2, 2*1024*1024 >> + >> + mfc0 a4, CP0_EBASE >> + andi a4, EBASE_CPUNUM /* get core */ >> + beqz a4, core_0_tlb >> + nop >> + >> + /* Now determine how big a mapping to use for secondary cores, >> + * which need to map all of u-boot + heap in DRAM >> + */ >> + /* Here we look at the alignment of the the physical address, >> + * and use the largest page size possible. In some cases >> + * this can result in an oversize mapping, but for secondary cores >> + * this mapping is very short lived. >> + */ >> + >> + /* Physical address in a1 */ >> + li a2, 1 >> +1: >> + sll a2, 1 >> + and a5, a1, a2 >> + beqz a5, 1b >> + nop >> + >> + /* a2 now contains largest page size we can use */ >> +core_0_tlb: >> + JAL(single_tlb_setup) >> + >> + /* Check if we're running from cache */ >> + bbit1 s4, 1, uboot_in_cache >> + nop >> + >> + /* If we are already running from ram, we don't need to muck >> + * with boot bus mappings. >> + */ >> + PTR_LI t2, 0xffffffffb0000000 >> + dsubu t2, s7 >> + /* See if our starting address is lower than the boot bus */ >> + bgez t2, uboot_in_ram2 /* If yes, booting from RAM */ >> + nop >> + >> +uboot_in_cache: >> +#if CONFIG_OCTEON_BIG_STACK_SIZE >> + /* The large stack is only for core 0. For all other cores we need to >> + * use the L1 cache otherwise the other cores will stomp on top of each >> + * other unless even more space is reserved for the stack space for >> + * each core. With potentially 96 cores this gets excessive. >> + */ >> + mfc0 v0, CP0_EBASE >> + andi a0, EBASE_CPUNUM >> + bnez a0, no_big_stack >> + nop >> + PTR_LA sp, big_stack_start >> + daddiu sp, -16 >> + >> +no_big_stack: >> +#endif >> + /* We now have the TLB set up, so we need to remap the boot bus. >> + * This is tricky, as we are running from flash, and will be changing >> + * the addressing of the flash. >> + */ >> + /* Enable movable boot bus region 0, at address 0x10000000 */ >> + PTR_LI a4, OCTEON_MIO_BOOT_BASE >> + dli a5, 0x81000000 /* EN + base address 0x11000000 */ >> + sd a5, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4) >> + >> + /* Copy code to that remaps the boot bus to movable region */ >> + sd zero, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) >> + >> + PTR_LA a6, change_boot_mappings >> + GETOFFSET(a5, change_boot_mappings); >> + daddu a5, a5, a6 >> + >> + /* The code is 16 bytes (2 DWORDS) */ >> + ld a7, 0(a5) >> + sd a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) >> + ld a7, 8(a5) >> + sd a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) >> + >> + /* Read from an RML register to ensure that the previous writes have >> + * completed before we branch to the movable region. >> + */ >> + ld zero, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4) >> + >> + /* Compute value for boot bus configuration register */ >> + /* Read region 0 config so we can _modify_ the base address field */ >> + PTR_LI a4, OCTEON_MIO_BOOT_REG_CFG0 /* region 0 config */ >> + ld a0, 0(a4) >> + dli a4, 0xf0000000 /* Mask off bits we want to save */ >> + and a4, a4, a0 >> + dli a0, 0x0fff0000 /* Force size to max */ >> + or a4, a4, a0 >> + >> + move a5, s6 >> + /* Convert to 64k blocks, as used by boot bus config */ >> + srl a5, 16 >> + li a6, 0x1fc0 /* 'normal' boot bus base config value */ >> + subu a6, a6, a5 /* Subtract offset */ >> + /* combine into register value to pass to boot bus routine */ >> + or a0, a4, a6 >> + >> + /* Branch there */ >> + PTR_LA a1, __mapped_continue_label >> + PTR_LI a2, OCTEON_MIO_BOOT_REG_CFG0 >> + /* If region 0 is not enabled we can skip it */ >> + ld a4, 0(a2) >> + bbit0 a4, 31, __mapped_continue_label >> + nop >> + li a4, 0x10000000 >> + j a4 >> + synci 0(zero) >> + >> + /* We never get here, as we go directly to __mapped_continue_label */ >> + break >> + >> + >> +uboot_in_ram2: >> + >> + /* Now jump to address in TLB mapped memory to continue execution */ >> + PTR_LA a4, __mapped_continue_label >> + synci 0(a4) >> + j a4 >> + nop >> + >> +__mapped_continue_label: >> + /* Check if we are core 0, if we are not then we need >> + * to vector to code in DRAM to do application setup, and >> + * skip the rest of the bootloader. Only core 0 runs the bootloader >> + * and sets up the tables that the other cores will use for >> + * configuration. >> + */ >> + mfc0 a0, CP0_EBASE >> + andi a0, EBASE_CPUNUM /* get core */ >> + /* if (__all_cores_are_equal==0 && core==0), >> + * then jump to execute BL on core 0; else 'go to next line' >> + * (core_0_cont1 is executed ONLY when k0=a0=0(core0_ID)) >> + */ >> + lw t0, __all_cores_are_equal >> + beq a0, t0, core_0_cont1 >> + nop >> + >> + /* other cores look up addr from dram */ >> + /* DRAM controller already set up by first core */ >> + li a1, (BOOT_VECTOR_NUM_WORDS * 4) >> + mul a0, a0, a1 >> + >> + /* Now find out the boot vector base address from the moveable boot >> + * bus region. >> + */ >> + >> + /* Get the address of the boot bus moveable region */ >> + PTR_LI t8, OCTEON_MIO_BOOT_BASE >> + ld t9, OCTEON_MIO_BOOT_LOC_CFG0_OFF(t8) >> + /* Make sure it's enabled */ >> + bbit0 t9, 31, invalid_boot_vector >> + dext t9, t9, 3, 24 >> + dsll t9, t9, 7 >> + /* Make address XKPHYS */ >> + li t0, 1 >> + dins t9, t0, 63, 1 >> + >> + ld t0, OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET(t9) >> + dli t1, OCTEON_BOOT_MOVEABLE_MAGIC1 >> + bne t0, t1, invalid_boot_vector >> + nop >> + >> + /* Load base address of boot vector table */ >> + ld t0, OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET(t9) >> + /* Add offset for core */ >> + daddu a1, t0, a0 >> + >> + mfc0 v0, CP0_STATUS >> + move v1, v0 >> + ins v1, zero, 19, 1 /* Clear NMI bit */ >> + mtc0 v1, CP0_STATUS >> + >> + /* Get app start function address */ >> + lw t9, 8(a1) >> + beqz t9, invalid_boot_vector >> + nop >> + >> + j t9 >> + lw k0, 12(a1) /* Load global data (deprecated) */ >> + >> +invalid_boot_vector: >> + wait >> + b invalid_boot_vector >> + nop >> + >> +__all_cores_are_equal: >> + /* The following .word tell if 'all_cores_are_equal' or core0 is special >> + * By default (for the first execution) the core0 should be special, >> + * in order to behave like the old(existing not-modified) bootloader >> + * and run the bootloader on core 0 to follow the existing design. >> + * However after that we make 'all_cores_equal' which allows to run SE >> + * applications on core0 like on any other core. NOTE that value written >> + * to '__all_cores_are_equal' should not match any core ID. >> + */ >> + .word 0 >> + >> +core_0_cont1: >> + li t0, 0xffffffff >> + sw t0, __all_cores_are_equal >> + /* From here on, only core 0 runs, other cores have branched >> + * away. >> + */ >> +#ifdef CONFIG_MIPS_INIT_STACK_IN_SRAM >> + /* Set up initial stack and global data */ >> + setup_stack_gd >> +# ifdef CONFIG_DEBUG_UART >> + PTR_LA t9, debug_uart_init >> + jalr t9 >> + nop >> +# endif >> +#endif >> + move a0, zero # a0 <-- boot_flags = 0 >> + PTR_LA t9, board_init_f >> + >> + jr t9 >> + move ra, zero >> + END(_start) >> + >> + .balign 8 >> + .globl single_tlb_setup >> + .ent single_tlb_setup >> + /* Sets up a single TLB entry. Virtual/physical addresses >> + * must be properly aligned. >> + * a0 Virtual address >> + * a1 Physical address >> + * a2 page (_not_ mapping) size >> + */ >> +single_tlb_setup: >> + /* Determine the number of TLB entries available, and >> + * use the top one. >> + */ >> + mfc0 a3, CP0_CONFIG1 >> + dext a3, a3, 25, 6 /* a3 now has the max mmu entry index */ >> + mfc0 a5, CP0_CONFIG3 /* Check if config4 reg present */ >> + bbit0 a5, 31, single_tlb_setup_cont >> + nop >> + mfc0 a5, CP0_CONFIG4 >> + bbit0 a5, 14, single_tlb_setup_cont /* check config4[MMUExtDef] */ >> + nop >> + /* append config4[MMUSizeExt] to most significant bit of >> + * config1[MMUSize-1] >> + */ >> + dins a3, a5, 6, 8 >> + and a3, a3, 0x3fff /* a3 now includes max entries for cn6xxx */ >> + >> +single_tlb_setup_cont: >> + >> + /* Format physical address for entry low */ >> + nop >> + dsrl a1, a1, 12 >> + dsll a1, a1, 6 >> + ori a1, a1, 0x7 /* set DVG bits */ >> + >> + move a4, a2 >> + daddu a5, a4, a4 /* mapping size */ >> + dsll a6, a4, 1 >> + daddiu a6, a6, -1 /* pagemask */ >> + dsrl a4, a4, 6 /* adjust for adding with entrylo */ >> + >> + /* Now set up mapping */ >> + mtc0 a6, CP0_PAGEMASK >> + mtc0 a3, CP0_INDEX >> + >> + dmtc0 a1, CP0_ENTRYLO0 >> + daddu a1, a1, a4 >> + >> + dmtc0 a1, CP0_ENTRYLO1 >> + daddu a1, a1, a4 >> + >> + dmtc0 a0, CP0_ENTRYHI >> + daddu a0, a0, a5 >> + >> + ehb >> + tlbwi >> + jr ra >> + nop >> + .end single_tlb_setup >> + >> + >> +/** >> + * This code is moved to a movable boot bus region, >> + * and it is responsible for changing the flash mappings and >> + * jumping to run from the TLB mapped address. >> + * >> + * @param a0 New address for boot bus region 0 >> + * @param a1 Address to branch to afterwards >> + * @param a2 Address of MIO_BOOT_REG_CFG0 >> + */ >> + .balign 8 >> +change_boot_mappings: >> + sd a0, 0(a2) >> + sync >> + j a1 /* Jump to new TLB mapped location */ >> + synci 0(zero) >> + >> +/* If we need a large stack, allocate it here. */ >> +#if CONFIG_OCTEON_BIG_STACK_SIZE >> + /* Allocate the stack here so it's in L2 cache or DRAM */ >> + .balign 16 >> +big_stack_end: >> + .skip CONFIG_OCTEON_BIG_STACK_SIZE, 0 >> +big_stack_start: >> + .dword 0 >> +#endif >> > Viele Gr??e, Stefan
On 14.05.20 01:43, Daniel Schwierzeck wrote: > > > Am 02.05.20 um 10:59 schrieb Stefan Roese: >> From: Aaron Williams <awilliams at marvell.com> >> >> This patch adds very basic support for the Octeon III SoCs. Only >> CFI parallel NOR flash and UART is supported for now. >> >> Please note that the basic Octeon port does not include the DDR3/4 >> initialization yet. This will be added in some follow-up patches >> later. To still use U-Boot on with this port, the L2 cache (4MiB on >> Octeon III CN73xx) is used as RAM. This way, U-Boot can boot to the >> prompt on such boards. >> >> Signed-off-by: Aaron Williams <awilliams at marvell.com> >> Signed-off-by: Stefan Roese <sr at denx.de> >> --- >> >> MAINTAINERS | 6 + >> arch/Kconfig | 1 + >> arch/mips/Kconfig | 49 +- >> arch/mips/Makefile | 7 + >> arch/mips/cpu/Makefile | 4 +- >> arch/mips/include/asm/arch-octeon/cavm-reg.h | 42 + >> arch/mips/include/asm/arch-octeon/clock.h | 24 + >> arch/mips/mach-octeon/Kconfig | 92 ++ >> arch/mips/mach-octeon/Makefile | 10 + >> arch/mips/mach-octeon/clock.c | 22 + >> arch/mips/mach-octeon/cpu.c | 55 + >> arch/mips/mach-octeon/dram.c | 27 + >> arch/mips/mach-octeon/include/ioremap.h | 30 + >> arch/mips/mach-octeon/start.S | 1241 ++++++++++++++++++ >> 14 files changed, 1608 insertions(+), 2 deletions(-) >> create mode 100644 arch/mips/include/asm/arch-octeon/cavm-reg.h >> create mode 100644 arch/mips/include/asm/arch-octeon/clock.h >> create mode 100644 arch/mips/mach-octeon/Kconfig >> create mode 100644 arch/mips/mach-octeon/Makefile >> create mode 100644 arch/mips/mach-octeon/clock.c >> create mode 100644 arch/mips/mach-octeon/cpu.c >> create mode 100644 arch/mips/mach-octeon/dram.c >> create mode 100644 arch/mips/mach-octeon/include/ioremap.h >> create mode 100644 arch/mips/mach-octeon/start.S >> > > I couldn't completely understand the start.S. There is too much stuff in > it for an initial merge. But I don't see a hard reason against using the > generic start.S. So the first patch series should only implement the > bare minimum needed to boot from flash, init the boot CPU core, maybe > suspend all other cores and relocate to L2 cache. I already worked on using the common start.S with minimal custom additions for Octeon. This will be included in v2 of the base Octeon patchset. > I know the current start.S is not really suited yet but I'm working on a > refactoring to add some more hooks which a SoC/CPU can implement. Once > we have your initial patch series and the refactoring in mainline, it > should be possible to gradually add more Octeon stuff like memory init. > > Basic idea for refactoring is something like this: > > reset: > - mips_cpu_early_init() # custom early init, fix errata > - init CP0 registers, Watch registers > - mips_cache_disable() # set K0 CCA to uncached > - mips_cpu_core_init() # per CPU core init > # -> generic code issues wait instr. > # -> custom code can do custom init > # or custom boot protocols > - mips_cm_map() # init CM if available > - mips_cache_init() # init caches, set K0 CCA to non-coh. > - mips_sram_init() # init SRAM, Scratch RAM if avail > - setup initial stack and global_data > - debug_uart_init() > - mips_mem_init() # init external memory, C env avail. > - init malloc_f > - board_init_f() Thanks Daniel, this sounds like a very good approach. I'll send v2 later today (as its already finished). We can then work on how to integrate it, either by using the currently available functions like mips_sram_init(), or by extending start.S (and the Octeon custom code) with some other, newly introduced functions. Thanks, Stefan >> + >> +#endif /* __ASM_MACH_OCTEON_IOREMAP_H */ >> diff --git a/arch/mips/mach-octeon/start.S b/arch/mips/mach-octeon/start.S >> new file mode 100644 >> index 0000000000..acb967201a >> --- /dev/null >> +++ b/arch/mips/mach-octeon/start.S >> @@ -0,0 +1,1241 @@ >> +/* SPDX-License-Identifier: GPL-2.0+ */ >> +/* >> + * Startup Code for OCTEON 64-bit CPU-core >> + * >> + * Copyright (c) 2003 Wolfgang Denk <wd at denx.de> >> + * Copyright 2004, 2005, 2010 - 2015 Cavium Inc.. >> + */ >> + >> +#include <asm-offsets.h> >> +#include <config.h> >> +#include <asm/regdef.h> >> +#include <asm/mipsregs.h> >> +#include <asm/asm.h> >> + >> +#define BOOT_VECTOR_NUM_WORDS 8 >> + >> +#define OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET 0x70 >> +#define OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET 0x78 >> + >> +#define OCTEON_BOOT_MOVEABLE_MAGIC1_RAW 0xdb00110ad358eacd >> +#define OCTEON_BOOT_MOVEABLE_MAGIC1 OCTEON_BOOT_MOVEABLE_MAGIC1_RAW >> + >> +#define OCTEON_CIU_SOFT_RST 0x8001070000000740 >> + >> +#define OCTEON_L2C_WPAR_PP0 0x8001180080840000 >> +#define OCTEON_MIO_BOOT_BASE 0x8001180000000000 >> +#define OCTEON_MIO_BOOT_REG_CFG0_OFF 0x0000 >> +#define OCTEON_MIO_BOOT_LOC_CFG0_OFF 0x0080 >> +#define OCTEON_MIO_BOOT_LOC_ADR_OFF 0x0090 >> +#define OCTEON_MIO_BOOT_LOC_DAT_OFF 0x0098 >> +#define OCTEON_MIO_RST_BOOT 0x8001180000001600 >> +#define OCTEON_MIO_BOOT_REG_CFG0 0x8001180000000000 >> +#define OCTEON_MIO_BOOT_REG_TIM0 0x8001180000000040 >> +#define OCTEON_MIO_BOOT_LOC_CFG0 0x8001180000000080 >> +#define OCTEON_MIO_BOOT_LOC_ADR 0x8001180000000090 >> +#define OCTEON_MIO_BOOT_LOC_DAT 0x8001180000000098 >> +#define OCTEON_MIO_FUSE_DAT3 0x8001180000001418 >> +#define OCTEON_L2D_FUS3 0x80011800800007B8 >> +#define OCTEON_LMC0_DDR_PLL_CTL 0x8001180088000258 >> + >> +#define OCTEON_RST 0x8001180006000000 >> +#define OCTEON_RST_BOOT_OFFSET 0x1600 >> +#define OCTEON_RST_SOFT_RST_OFFSET 0x1680 >> +#define OCTEON_RST_COLD_DATAX_OFFSET(X) (0x17C0 + (X) * 8) >> +#define OCTEON_RST_BOOT 0x8001180006001600 >> +#define OCTEON_RST_SOFT_RST 0x8001180006001680 >> +#define OCTEON_RST_COLD_DATAX(X) (0x80011800060017C0 + (X) * 8) >> + >> +#define OCTEON_OCX_COM_NODE 0x8001180011000000 >> +#define OCTEON_L2C_OCI_CTL 0x8001180080800020 >> +#define OCTEON_L2C_TAD_CTL 0x8001180080800018 >> +#define OCTEON_L2C_CTL 0x8001180080800000 >> + >> +#define OCTEON_DBG_DATA 0x80011F00000001E8 >> +#define OCTEON_PCI_READ_CMD_E 0x80011F0000001188 >> +#define OCTEON_NPEI_DBG_DATA 0x80011F0000008510 >> +#define OCTEON_CIU_WDOG(X) (0x8001070000000500 + (X) * 8) >> +#define OCTEON_CIU_PP_POKE(X) (0x8001070000000580 + (X) * 8) >> +#define OCTEON_CIU3_WDOG(X) (0x8001010000020000 + (X) * 8) >> +#define OCTEON_CIU3_PP_POKE(X) (0x8001010000030000 + (X) * 8) >> +#define OCTEON_OCX_COM_LINKX_CTL(X) (0x8001180011000020 + (X) * 8) >> +#define OCTEON_SLI_CTL_STATUS 0x80011F0000028570 >> +#define OCTEON_GSERX_SCRATCH(X) (0x8001180090000020 + (X) * 0x1000000) >> + >> +/** PRID for CN56XX */ >> +#define OCTEON_PRID_CN56XX 0x04 >> +/** PRID for CN52XX */ >> +#define OCTEON_PRID_CN52XX 0x07 >> +/** PRID for CN63XX */ >> +#define OCTEON_PRID_CN63XX 0x90 >> +/** PRID for CN68XX */ >> +#define OCTEON_PRID_CN68XX 0x91 >> +/** PRID for CN66XX */ >> +#define OCTEON_PRID_CN66XX 0x92 >> +/** PRID for CN61XX */ >> +#define OCTEON_PRID_CN61XX 0x93 >> +/** PRID for CNF71XX */ >> +#define OCTEON_PRID_CNF71XX 0x94 >> +/** PRID for CN78XX */ >> +#define OCTEON_PRID_CN78XX 0x95 >> +/** PRID for CN70XX */ >> +#define OCTEON_PRID_CN70XX 0x96 >> +/** PRID for CN73XX */ >> +#define OCTEON_PRID_CN73XX 0x97 >> +/** PRID for CNF75XX */ >> +#define OCTEON_PRID_CNF75XX 0x98 >> + >> +/* func argument is used to create a mark, must be unique */ >> +#define GETOFFSET(reg, func) \ >> + .balign 8; \ >> + bal func ##_mark; \ >> + nop; \ >> + .dword .; \ >> +func ##_mark: \ >> + ld reg, 0(ra); \ >> + dsubu reg, ra, reg; >> + >> +#define JAL(func) \ >> + .balign 8; \ >> + bal func ##_mark; \ >> + nop; \ >> + .dword .; \ >> +func ##_mark: \ >> + ld t8, 0(ra); \ >> + dsubu t8, ra, t8; \ >> + dla t9, func; \ >> + daddu t9, t9, t8; \ >> + jalr t9; \ >> + nop; >> + >> + .set arch=octeon3 >> + .set noreorder >> + >> + .macro uhi_mips_exception >> + move k0, t9 # preserve t9 in k0 >> + move k1, a0 # preserve a0 in k1 >> + li t9, 15 # UHI exception operation >> + li a0, 0 # Use hard register context >> + sdbbp 1 # Invoke UHI operation >> + .endm >> + >> + .macro setup_stack_gd >> + li t0, -16 >> + PTR_LI t1, big_stack_start >> + and sp, t1, t0 # force 16 byte alignment >> + PTR_SUBU \ >> + sp, sp, GD_SIZE # reserve space for gd >> + and sp, sp, t0 # force 16 byte alignment >> + move k0, sp # save gd pointer >> +#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \ >> + !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F) >> + li t2, CONFIG_VAL(SYS_MALLOC_F_LEN) >> + PTR_SUBU \ >> + sp, sp, t2 # reserve space for early malloc >> + and sp, sp, t0 # force 16 byte alignment >> +#endif >> + move fp, sp >> + >> + /* Clear gd */ >> + move t0, k0 >> +1: >> + PTR_S zero, 0(t0) >> + PTR_ADDIU t0, PTRSIZE >> + blt t0, t1, 1b >> + nop >> + >> +#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \ >> + !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F) >> + PTR_S sp, GD_MALLOC_BASE(k0) # gd->malloc_base offset >> +#endif >> + .endm >> + >> +/* Saved register usage: >> + * s0: not used >> + * s1: not used >> + * s2: Address U-Boot loaded into in L2 cache >> + * s3: Start address >> + * s4: flags >> + * 1: booting from RAM >> + * 2: executing out of cache >> + * 4: booting from flash >> + * s5: u-boot size (data end - _start) >> + * s6: offset in flash. >> + * s7: _start physical address >> + * s8: >> + */ >> + >> +ENTRY(_start) >> + /* U-Boot entry point */ >> + b reset >> + >> + /* The above jump instruction/nop are considered part of the >> + * bootloader_header_t structure but are not changed when the header is >> + * updated. >> + */ >> + >> + /* Leave room for bootloader_header_t header at start of binary. This >> + * header is used to identify the board the bootloader is for, what >> + * address it is linked at, failsafe/normal, etc. It also contains a >> + * CRC of the entire image. >> + */ >> + >> +#if defined(CONFIG_ROM_EXCEPTION_VECTORS) >> + /* >> + * Exception vector entry points. When running from ROM, an exception >> + * cannot be handled. Halt execution and transfer control to debugger, >> + * if one is attached. >> + */ >> + .org 0x200 >> + /* TLB refill, 32 bit task */ >> + uhi_mips_exception >> + >> + .org 0x280 >> + /* XTLB refill, 64 bit task */ >> + uhi_mips_exception >> + >> + .org 0x300 >> + /* Cache error exception */ >> + uhi_mips_exception >> + >> + .org 0x380 >> + /* General exception */ >> + uhi_mips_exception >> + >> + .org 0x400 >> + /* Catch interrupt exceptions */ >> + uhi_mips_exception >> + >> + .org 0x480 >> + /* EJTAG debug exception */ >> +1: b 1b >> + nop >> + >> + .org 0x500 >> +#endif >> + >> +/* Reserve extra space so that when we use the boot bus local memory >> + * segment to remap the debug exception vector we don't overwrite >> + * anything useful >> + */ >> + >> +/* Basic exception handler (dump registers) in all ASM. When using the TLB for >> + * mapping u-boot C code, we can't branch to that C code for exception handling >> + * (TLB is disabled for some exceptions. >> + */ >> + >> +/* RESET/start here */ >> + .balign 8 >> +reset: >> + nop >> + synci 0(zero) >> + mfc0 k0, CP0_STATUS >> + ori k0, 0x00E0 /* enable 64 bit mode for CSR access */ >> + mtc0 k0, CP0_STATUS >> + >> + /* Save the address we're booting from, strip off low bits */ >> + bal 1f >> + nop >> +1: >> + move s3, ra >> + dins s3, zero, 0, 12 >> + >> + /* Disable boot bus moveable regions */ >> + PTR_LI k0, OCTEON_MIO_BOOT_LOC_CFG0 >> + sd zero, 0(k0) >> + sd zero, 8(k0) >> + >> + /* Disable the watchdog timer >> + * First we check if we're running on CN78XX, CN73XX or CNF75XX to see >> + * if we use CIU3 or CIU. >> + */ >> + mfc0 t0, CP0_PRID >> + ext t0, t0, 8, 8 >> + /* Assume CIU */ >> + PTR_LI t1, OCTEON_CIU_WDOG(0) >> + PTR_LI t2, OCTEON_CIU_PP_POKE(0) >> + blt t0, OCTEON_PRID_CN78XX, wd_use_ciu >> + nop >> + beq t0, OCTEON_PRID_CN70XX, wd_use_ciu >> + nop >> + /* Use CIU3 */ >> + PTR_LI t1, OCTEON_CIU3_WDOG(0) >> + PTR_LI t2, OCTEON_CIU3_PP_POKE(0) >> +wd_use_ciu: >> + sd zero, 0(t2) /* Pet the dog */ >> + sd zero, 0(t1) /* Disable watchdog timer */ >> + >> + /* Errata: CN76XX has a node ID of 3. change it to zero here. >> + * This needs to be done before we relocate to L2 as addresses change >> + * For 76XX pass 1.X we need to zero out the OCX_COM_NODE[ID], >> + * L2C_OCI_CTL[GKSEGNODE] and CP0 of Root.CvmMemCtl2[KSEGNODE]. >> + */ >> + mfc0 a4, CP0_PRID >> + /* Check for 78xx pass 1.x processor ID */ >> + andi a4, 0xffff >> + blt a4, (OCTEON_PRID_CN78XX << 8), 1f >> + nop >> + >> + /* Zero out alternate package for now */ >> + dins a4, zero, 6, 1 >> + bge a4, ((OCTEON_PRID_CN78XX << 8) | 0x08), 1f >> + nop >> + >> + /* 78xx or 76xx here, first check for bug #27141 */ >> + PTR_LI a5, OCTEON_SLI_CTL_STATUS >> + ld a6, 0(a5) >> + andi a7, a4, 0xff >> + andi a6, a6, 0xff >> + >> + beq a6, a7, not_bug27141 >> + nop >> + >> + /* core 0 proc_id rev_id field does not match SLI_CTL_STATUS rev_id */ >> + /* We just hit bug #27141. Need to reset the chip and try again */ >> + >> + PTR_LI a4, OCTEON_RST_SOFT_RST >> + ori a5, zero, 0x1 /* set the reset bit */ >> + >> +reset_78xx_27141: >> + sync >> + synci 0(zero) >> + cache 9, 0(zero) >> + sd a5, 0(a4) >> + wait >> + b reset_78xx_27141 >> + nop >> + >> +not_bug27141: >> + /* 76XX pass 1.x has the node number set to 3 */ >> + mfc0 a4, CP0_EBASE >> + ext a4, a4, 0, 10 >> + bne a4, 0x180, 1f /* Branch if not node 3 core 0 */ >> + nop >> + >> + /* Clear OCX_COM_NODE[ID] */ >> + PTR_LI a5, OCTEON_OCX_COM_NODE >> + ld a4, 0(a5) >> + dins a4, zero, 0, 2 >> + sd a4, 0(a5) >> + ld zero, 0(a5) >> + >> + /* Clear L2C_OCI_CTL[GKSEGNODE] */ >> + PTR_LI a5, OCTEON_L2C_OCI_CTL >> + ld a4, 0(a5) >> + dins a4, zero, 4, 2 >> + sd a4, 0(a5) >> + ld zero, 0(a5) >> + >> + /* Clear CP0 Root.CvmMemCtl2[KSEGNODE] */ >> + dmfc0 a4, CP0_CVMMEMCTL2 >> + dins a4, zero, 12, 2 >> + dmtc0 a4, CP0_CVMMEMCTL2 >> + >> + /* Put the flash address in the start of the EBASE register to >> + * enable our exception handler but only for core 0. >> + */ >> + mfc0 a4, CP0_EBASE >> + dext a4, a4, 0, 10 >> + bnez a4, no_flash >> + /* OK in delay slot */ >> + dext a6, a6, 0, 16 /* Get the base address in flash */ >> + sll a6, a6, 16 >> + mtc0 a6, CP0_EBASE /* Enable exceptions */ >> + >> +no_flash: >> + /* Zero out various registers */ >> + mtc0 zero, CP0_DEPC >> + mtc0 zero, CP0_EPC >> + mtc0 zero, CP0_CAUSE >> + mfc0 a4, CP0_PRID >> + ext a4, a4, 8, 8 >> + mtc0 zero, CP0_DESAVE >> + >> + /* The following are only available on Octeon 2 or later */ >> + mtc0 zero, CP0_KSCRATCH1 >> + mtc0 zero, CP0_KSCRATCH2 >> + mtc0 zero, CP0_KSCRATCH3 >> + mtc0 zero, CP0_USERLOCAL >> + >> + /* Turn off ROMEN bit to disable ROM */ >> + PTR_LI a1, OCTEON_MIO_RST_BOOT >> + /* For OCTEON 3 we use RST_BOOT instead of MIO_RST_BOOT. >> + * The difference is bits 24-26 are 6 instead of 0 for the address. >> + */ >> + /* For Octeon 2 and CN70XX we can ignore the watchdog */ >> + blt a4, OCTEON_PRID_CN78XX, watchdog_ok >> + nop >> + >> + PTR_LI a1, OCTEON_RST_BOOT >> + >> + beq a4, OCTEON_PRID_CN70XX, watchdog_ok >> + nop >> + >> + ld a2, 0(a1) >> + /* There is a bug where some registers don't get properly reset when >> + * the watchdog timer causes a reset. In this case we need to force >> + * a reset. >> + */ >> + bbit0 a2, 11, watchdog_ok /* Skip if watchdog not hit */ >> + dins a2, zero, 2, 18 /* Don't clear LBOOT, LBOOT_EXT or LBOOT_OCI */ >> + /* Clear bit indicating reset due to watchdog */ >> + ori a2, 1 << 11 >> + sd a2, 0(a1) >> + >> + /* Disable watchdog */ >> + PTR_LI a1, OCTEON_CIU3_PP_POKE(0) >> + sd zero, 0(a1) >> + PTR_LI a1, OCTEON_CIU3_WDOG(0) >> + sd zero, 0(a1) >> + >> + /* Record this in the GSER0_SCRATCH register in bit 11 */ >> + PTR_LI a1, OCTEON_GSERX_SCRATCH(0) >> + ld a2, 0(a1) >> + ori a2, 1 << 11 >> + sd a2, 0(a1) >> + >> + PTR_LI a1, OCTEON_RST_SOFT_RST >> + li a2, 1 >> + sd a2, 0(a1) >> + wait >> + >> + /* We should never get here */ >> + >> +watchdog_ok: >> + ld a2, 0(a1) >> + /* Don't clear LBOOT/LBOOT_EXT or LBOOT_OCI */ >> + dins a2, zero, 2, 18 >> + dins a2, zero, 60, 1 /* Clear ROMEN bit */ >> + sd a2, 0(a1) >> + >> + /* Start of Octeon setup */ >> + >> + /* Check what core we are - if core 0, branch to init tlb >> + * loop in flash. Otherwise, look up address of init tlb >> + * loop that was saved in the boot vector block. >> + */ >> + mfc0 a0, CP0_EBASE >> + andi a0, EBASE_CPUNUM /* get core */ >> + beqz a0, InitTLBStart_local >> + nop >> + >> + break >> + /* We should never get here - non-zero cores now go directly to >> + * tlb init from the boot stub in movable region. >> + */ >> + >> + .globl InitTLBStart >> +InitTLBStart: >> +InitTLBStart_local: >> + /* If we don't have working memory yet configure a bunch of >> + * scratch memory, and set the stack pointer to the top >> + * of it. This allows us to go to C code without having >> + * memory set up >> + * >> + * Warning: do not change SCRATCH_STACK_LINES as this can impact the >> + * transition from start.S to crti.asm. crti requires 590 bytes of >> + * stack space. >> + */ >> + cache 1,0(zero) /* Clear Dcache so cvmseg works right */ >> +#if CONFIG_OCTEON_BIG_STACK_SIZE >> + rdhwr v0, $0 >> + bnez v0, 1f >> + nop >> + PTR_LA sp, big_stack_start - 16 >> + b stack_clear_done >> + nop >> +1: >> +#endif >> +#define SCRATCH_STACK_LINES 0x36 /* MAX is 0x36 */ >> + dmfc0 v0, CP0_CVMMEMCTL >> + dins v0, zero, 0, 9 >> + /* setup SCRATCH_STACK_LINES scratch lines of scratch */ >> + ori v0, 0x100 | SCRATCH_STACK_LINES >> + dmtc0 v0, CP0_CVMMEMCTL >> + /* set stack to top of scratch memory */ >> + li sp, 0xffffffffffff8000 + (SCRATCH_STACK_LINES * 128) >> + /* Clear scratch for CN63XX pass 2.0 errata Core-15169*/ >> + li t0, 0xffffffffffff8000 >> +clear_scratch: >> + sd zero, 0(t0) >> + addiu t0, 8 >> + bne t0, sp, clear_scratch >> + nop >> + >> + /* This code run on all cores - core 0 from flash, >> + * the rest from DRAM. When booting from PCI, non-zero cores >> + * come directly here from the boot vector - no earlier code in this >> + * file is executed. >> + */ >> + >> + /* Some generic initialization is done here as well, as we need this >> + * done on all cores even when booting from PCI >> + */ >> +stack_clear_done: >> + /* Clear watch registers. */ >> + mtc0 zero, CP0_WATCHLO >> + mtc0 zero, CP0_WATCHHI >> + >> + /* STATUS register */ >> + mfc0 k0, CP0_STATUS >> + li k1, ~ST0_IE >> + and k0, k1 >> + mtc0 k0, CP0_STATUS >> + >> + /* CAUSE register */ >> + mtc0 zero, CP0_CAUSE >> + >> + /* Init Timer */ >> + dmtc0 zero, CP0_COUNT >> + dmtc0 zero, CP0_COMPARE >> + >> + >> + mfc0 a5, CP0_STATUS >> + li v0, 0xE0 /* enable 64 bit mode for CSR access */ >> + or v0, v0, a5 >> + mtc0 v0, CP0_STATUS >> + >> + >> + dli v0, 1 << 29 /* Enable large physical address support in TLB */ >> + mtc0 v0, CP0_PAGEGRAIN >> + >> +InitTLB: >> + dmtc0 zero, CP0_ENTRYLO0 >> + dmtc0 zero, CP0_ENTRYLO1 >> + mtc0 zero, CP0_PAGEMASK >> + dmtc0 zero, CP0_CONTEXT >> + /* Use an offset into kseg0 so we won't conflict with Mips1 legacy >> + * TLB clearing >> + */ >> + PTR_LI v0, 0xFFFFFFFF90000000 >> + mfc0 a0, CP0_CONFIG1 >> + srl a0, a0, 25 >> + /* Check if config4 reg present */ >> + mfc0 a1, CP0_CONFIG3 >> + bbit0 a1, 31, 2f >> + and a0, a0, 0x3F /* a0 now has the max mmu entry index */ >> + mfc0 a1, CP0_CONFIG4 >> + bbit0 a1, 14, 2f /* check config4[MMUExtDef] */ >> + nop >> + /* append config4[MMUSizeExt] to most significant bit of >> + * config1[MMUSize-1] >> + */ >> + ins a0, a1, 6, 8 >> + and a0, a0, 0x3fff /* a0 now includes max entries for cn6xxx */ >> +2: >> + dmtc0 zero, CP0_XCONTEXT >> + mtc0 zero, CP0_WIRED >> + >> +InitTLBloop: >> + dmtc0 v0, CP0_ENTRYHI >> + tlbp >> + mfc0 v1, CP0_INDEX >> + daddiu v0, v0, 1<<13 >> + bgez v1, InitTLBloop >> + >> + mtc0 a0, CP0_INDEX >> + tlbwi >> + bnez a0, InitTLBloop >> + daddiu a0, -1 >> + >> + mthi zero >> + mtlo zero >> + >> + /* Set up status register */ >> + mfc0 v0, CP0_STATUS >> + /* Enable COP0 and COP2 access */ >> + li a4, (1 << 28) | (1 << 30) >> + or v0, a4 >> + >> + /* Must leave BEV set here, as DRAM is not configured for core 0. >> + * Also, BEV must be 1 later on when the exception base address is set. >> + */ >> + >> + /* Mask all interrupts */ >> + ins v0, zero, 0, 16 >> + /* Clear NMI (used to start cores other than core 0) */ >> + ori v0, 0xE4 /* enable 64 bit, disable interrupts */ >> + mtc0 v0, CP0_STATUS >> + >> + dli v0,0xE000000F /* enable all readhw locations */ >> + mtc0 v0, CP0_HWRENA >> + >> + dmfc0 v0, CP0_CVMCTL >> + ori v0, 1<<14 /* enable fixup of unaligned mem access */ >> + dmtc0 v0, CP0_CVMCTL >> + >> + /* Setup scratch memory. This is also done in >> + * cvmx_user_app_init, and this code will be removed >> + * from the bootloader in the near future. >> + */ >> + >> + /* Set L2C_LAD_CTL[MAXLFB] = 0 on CN73XX */ >> + mfc0 a4, CP0_PRID >> + ext a4, a4, 8, 8 >> + blt a4, OCTEON_PRID_CN73XX, 72f >> + nop >> + PTR_LI v0, OCTEON_L2C_TAD_CTL >> + ld t1, 0(v0) >> + dins t1, zero, 0, 4 >> + sd t1, 0(v0) >> + ld zero, 0(v0) >> + >> +72: >> + >> + /* clear these to avoid immediate interrupt in noperf mode */ >> + dmtc0 zero, CP0_COMPARE /* clear timer interrupt */ >> + dmtc0 zero, CP0_COUNT /* clear timer interrupt */ >> + dmtc0 zero, CP0_PERF_CNT0 /* clear perfCnt0 */ >> + dmtc0 zero, CP0_PERF_CNT1 /* clear perfCnt1 */ >> + dmtc0 zero, CP0_PERF_CNT2 >> + dmtc0 zero, CP0_PERF_CNT3 >> + >> + /* If we're running on a node other than 0 then we need to set KSEGNODE >> + * to 0. The nice thing with this code is that it also autodetects if >> + * we're running on a processor that supports CVMMEMCTL2 or not since >> + * only processors that have this will have a non-zero node ID. Because >> + * of this there's no need to check if we're running on a 78XX. >> + */ >> + mfc0 t1, CP0_EBASE >> + dext t1, t1, 7, 3 /* Extract node number */ >> + beqz t1, is_node0 /* If non-zero then we're not node 0 */ >> + nop >> + dmfc0 t1, CP0_CVMMEMCTL2 >> + dins t1, zero, 12, 4 >> + dmtc0 t1, CP0_CVMMEMCTL2 >> +is_node0: >> + >> + /* Set up TLB mappings for u-boot code in flash. */ >> + >> + /* Use a bal to get the current PC into ra. Since this bal is to >> + * the address immediately following the delay slot, the ra is >> + * the address of the label. We then use this to get the actual >> + * address that we are executing from. >> + */ >> + bal __dummy >> + nop >> + >> +__dummy: >> + /* Get the actual address that we are running at */ >> + PTR_LA a6, _start /* Linked address of _start */ >> + PTR_LA a7, __dummy >> + dsubu t0, a7, a6 /* offset of __dummy label from _start*/ >> + dsubu a7, ra, t0 /* a7 now has actual address of _start*/ >> + >> + /* Save actual _start address in s7. This is where we >> + * are executing from, as opposed to where the code is >> + * linked. >> + */ >> + move s7, a7 >> + move s4, zero >> + >> + /* s7 has actual address of _start. If this is >> + * on the boot bus, it will be between 0xBFC000000 and 0xBFFFFFFF. >> + * If it is on the boot bus, use 0xBFC00000 as the physical address >> + * for the TLB mapping, as we will be adjusting the boot bus >> + * to make this adjustment. >> + * If we are running from DRAM (remote-boot), then we want to use the >> + * real address in DRAM. >> + */ >> + >> + /* Check to see if we are running from flash - we expect that to >> + * be 0xffffffffb0000000-0xffffffffbfffffff >> + * (0x10000000-0x1fffffff, unmapped/uncached) >> + */ >> + dli t2, 0xffffffffb0000000 >> + dsubu t2, s7 >> + slt s4, s7, t2 >> + bltz t2, uboot_in_flash >> + nop >> + >> + /* If we're not core 0 then we don't care about cache */ >> + mfc0 t2, CP0_EBASE >> + andi t2, EBASE_CPUNUM >> + bnez t2, uboot_in_ram >> + nop >> + >> + /* Find out if we're OCTEON I or OCTEON + which don't support running >> + * out of cache. >> + */ >> + mfc0 t2, CP0_PRID >> + ext t2, t2, 8, 8 >> + li s4, 1 >> + blt t2, 0x90, uboot_in_ram >> + nop >> + >> + /* U-Boot can be executing either in RAM or L2 cache. Now we need to >> + * check if DRAM is initialized. The way we do that is to look at >> + * the reset bit of the LMC0_DDR_PLL_CTL register (bit 7) >> + */ >> + PTR_LI t2, OCTEON_LMC0_DDR_PLL_CTL >> + ld t2, 0(t2) >> + bbit1 t2, 7, uboot_in_ram >> + nop >> + >> + /* We must be executing out of cache */ >> + b uboot_in_ram >> + li s4, 2 >> + >> +uboot_in_flash: >> + /* Set s4 to 4 to indicate we're running in FLASH */ >> + li s4, 4 >> + >> +#if defined(CONFIG_OCTEON_DISABLE_L2_CACHE_INDEX_ALIASING) >> + /* By default, L2C index aliasing is enabled. In some cases it may >> + * need to be disabled. The L2C index aliasing can only be disabled >> + * if U-Boot is running out of L2 cache and the L2 cache has not been >> + * used to store anything. >> + */ >> + PTR_LI t1, OCTEON_L2C_CTL >> + ld t2, 0(t1) >> + ori t2, 1 >> + sd t2, 0(t1) >> +#endif >> + >> + /* Use BFC00000 as physical address for TLB mappings when booting >> + * from flash, as we will adjust the boot bus mappings to make this >> + * mapping correct. >> + */ >> + dli a7, 0xFFFFFFFFBFC00000 >> + dsubu s6, s7, a7 /* Save flash offset in s6 */ >> + >> +#if defined(CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2) >> + /* For OCTEON II we check to see if the L2 cache is big enough to hold >> + * U-Boot. If it is big enough then we copy ourself from flash to the >> + * L2 cache in order to speed up execution. >> + */ >> + >> + /* Check for OCTEON 2 */ >> + mfc0 t1, CP0_PRID >> + ext t1, t1, 8, 8 >> + /* Get number of L2 cache sets */ >> + beq t1, OCTEON_PRID_CNF71XX, got_l2_sets /* CNF71XX */ >> + li t2, 1 << 9 >> + beq t1, OCTEON_PRID_CN78XX, got_l2_sets /* CN78XX */ >> + li t2, 1 << 13 >> + beq t1, OCTEON_PRID_CN70XX, got_l2_sets /* CN70XX */ >> + li t2, 1 << 10 >> + beq t1, OCTEON_PRID_CN73XX, got_l2_sets /* CN73XX */ >> + li t2, 1 << 11 >> + beq t1, OCTEON_PRID_CNF75XX, got_l2_sets /* CNF75XX */ >> + li t2, 1 << 11 >> + b l2_cache_too_small /* Unknown OCTEON model */ >> + nop >> + >> +got_l2_sets: >> + /* Get number of associations */ >> + PTR_LI t0, OCTEON_MIO_FUSE_DAT3 >> + ld t0, 0(t0) >> + dext t0, t0, 32, 3 >> + >> + beq t1, OCTEON_PRID_CN70XX, process_70xx_l2sets >> + nop >> + /* 0 = 16-way, 1 = 12-way, 2 = 8-way, 3 = 4-way, 4-7 reserved */ >> + beqz t0, got_l2_ways >> + li t3, 16 >> + beq t0, 1, got_l2_ways >> + li t3, 12 >> + beq t0, 2, got_l2_ways >> + li t3, 8 >> + beq t0, 3, got_l2_ways >> + li t3, 4 >> + b l2_cache_too_small >> + nop >> + >> +process_70xx_l2sets: >> + /* For 70XX, the number of ways is defined as: >> + * 0 - full cache (4-way) 512K >> + * 1 - 3/4 ways (3-way) 384K >> + * 2 - 1/2 ways (2-way) 256K >> + * 3 - 1/4 ways (1-way) 128K >> + * 4-7 illegal (aliased to 0-3) >> + */ >> + andi t0, 3 >> + beqz t0, got_l2_ways >> + li t3, 4 >> + beq t0, 1, got_l2_ways >> + li t3, 3 >> + beq t0, 2, got_l2_ways >> + li t3, 2 >> + li t3, 1 >> + >> +got_l2_ways: >> + dmul a1, t2, t3 /* Calculate cache size */ >> + dsll a1, 7 /* Ways * Sets * cache line sz (128) */ >> + daddiu a1, a1, -128 /* Adjust cache size for copy code */ >> + >> + /* Calculate size of U-Boot image */ >> + /* >> + * "uboot_end - _start" is not correct, as the image also >> + * includes the DTB appended to the end (OF_EMBED is deprecated). >> + * Lets use a defined max for now here. >> + */ >> + PTR_LI s5, CONFIG_BOARD_SIZE_LIMIT >> + >> + daddu t2, s5, s7 /* t2 = end address */ >> + daddiu t2, t2, 127 >> + ins t2, zero, 0, 7 /* Round up to cache line for memcpy */ >> + >> + slt t1, a1, s5 /* See if we're bigger than the L2 cache */ >> + bnez t1, l2_cache_too_small >> + nop >> + /* Address we plan to load at in the L2 cache */ >> + PTR_LI t9, CONFIG_OCTEON_L2_UBOOT_ADDR >> +# ifdef CONFIG_OCTEON_L2_MEMCPY_IN_CACHE >> + /* Enable all ways for PP0. Authentik ROM may have disabled these */ >> + PTR_LI a1, OCTEON_L2C_WPAR_PP0 >> + sd zero, 0(a1) >> + >> + /* Address to place our memcpy code */ >> + PTR_LI a0, CONFIG_OCTEON_L2_MEMCPY_ADDR >> + /* The following code writes a simple memcpy routine into the cache >> + * to copy ourself from flash into the L2 cache. This makes the >> + * memcpy routine a lot faster since each instruction can potentially >> + * require four read cycles to flash over the boot bus. >> + */ >> + /* Zero cache line in the L2 cache */ >> + zcb (a0) >> + synci 0(zero) >> + dli a1, 0xdd840000dd850008 /* ld a0, 0(t0); ld a1, 8(t0) */ >> + sd a1, 0(a0) >> + dli a1, 0xdd860010dd870018 /* ld a2, 16(t0); ld a3, 24(t0) */ >> + sd a1, 8(a0) >> + dli a1, 0xfda40000fda50008 /* sd a0, 0(t1); sd a1, 8(t1) */ >> + sd a1, 16(a0) >> + dli a1, 0xfda60010fda70018 /* sd a2, 16(t1); sd a3, 24(t1) */ >> + sd a1, 24(a0) >> + dli a1, 0x258c0020158efff6 /* addiu t0, 32; bne t0, t2, -40 */ >> + sd a1, 32(a0) >> + dli a1, 0x25ad002003e00008 /* addiu t1, 32; jr ra */ >> + sd a1, 40(a0) >> + sd zero, 48(a0) /* nop; nop */ >> + >> + /* Synchronize the caches */ >> + sync >> + synci 0(zero) >> + >> + move t0, s7 >> + move t1, t9 >> + >> + /* Do the memcpy operation in L2 cache to copy ourself from flash >> + * to the L2 cache. >> + */ >> + jalr a0 >> + nop >> + >> +# else >> + /* Copy ourself to the L2 cache from flash, 32 bytes at a time */ >> + /* This code is now written to the L2 cache using the code above */ >> +1: >> + ld a0, 0(t0) >> + ld a1, 8(t0) >> + ld a2, 16(t0) >> + ld a3, 24(t0) >> + sd a0, 0(t1) >> + sd a1, 8(t1) >> + sd a2, 16(t1) >> + sd a3, 24(t1) >> + addiu t0, 32 >> + bne t0, t2, 1b >> + addiu t1, 32 >> +# endif /* CONFIG_OCTEON_L2_MEMCPY_IN_CACHE */ >> + >> + /* Adjust the start address of U-Boot and the global pointer */ >> + subu t0, s7, t9 /* t0 = address difference */ >> + move s7, t9 /* Update physical address */ >> + move s2, t9 >> + sync >> + synci 0(zero) >> + >> + /* Now we branch to the L2 cache. We first get our PC then adjust it >> + */ >> + bal 3f >> + nop >> +3: >> + /* Don't add any instructions here! */ >> + subu t9, ra, t0 >> + /* Give ourself 16 bytes */ >> + addiu t9, 0x10 >> + >> + jal t9 /* Branch to address in L2 cache */ >> + >> + nop >> + nop >> + /* Add instructions after here */ >> + >> + move a7, s7 >> + >> + b uboot_in_ram >> + ori s4, 2 /* Running out of L2 cache */ >> + >> +l2_cache_too_small: /* We go here if we can't copy ourself to L2 */ >> +#endif /* CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2 */ >> + >> + /* This code is only executed if booting from flash. */ >> + /* For flash boot (_not_ RAM boot), we do a workaround for >> + * an LLM errata on CN38XX and CN58XX parts. >> + */ >> + >> +uboot_in_ram: >> + /* U-boot address is now in reg a7, and is 4 MByte aligned. >> + * (boot bus addressing has been adjusted to make this happen for flash, >> + * and for DRAM this alignment must be provided by the remote boot >> + * utility. >> + */ >> + /* See if we're in KSEG0 range, if so set EBASE register to handle >> + * exceptions. >> + */ >> + dli a1, 0x20000000 >> + bge a7, a1, 1f >> + nop >> + /* Convert our physical address to KSEG0 */ >> + PTR_LI a1, 0xffffffff80000000 >> + or a1, a1, a7 >> + mtc0 a1, CP0_EBASE >> +1: >> + /* U-boot now starts at 0xBFC00000. Use a single 4 MByte TLB mapping >> + * to map u-boot. >> + */ >> + move a0, a6 /* Virtual addr in a0 */ >> + dins a0, zero, 0, 16 /* Zero out offset bits */ >> + move a1, a7 /* Physical addr in a1 */ >> + >> + /* Now we need to remove the MIPS address space bits. For this we >> + * need to determine if it is a 32 bit compatibility address or not. >> + */ >> + >> + /* 'lowest' address in compatibility space */ >> + PTR_LI t0, 0xffffffff80000000 >> + dsubu t0, t0, a1 >> + bltz t0, compat_space >> + nop >> + >> + /* We have a xkphys address, so strip off top bit */ >> + b addr_fixup_done >> + dins a1, zero, 63, 1 >> + >> +compat_space: >> + PTR_LI a2, 0x1fffffff >> + and a1, a1, a2 /* Mask phy addr to remove address space bits */ >> + >> +addr_fixup_done: >> + /* Currenty the u-boot image size is limited to 4 MBytes. In order to >> + * support larger images the flash mapping will need to be changed to >> + * be able to access more than that before C code is run. Until that >> + * is done, we just use a 4 MByte mapping for the secondary cores as >> + * well. >> + */ >> + /* page size (only support 4 Meg binary size for now for core 0) >> + * This limitation is due to the fact that the boot vector is >> + * 0xBFC00000 which only makes 4MB available. Later more flash >> + * address space will be available after U-Boot has been copied to >> + * RAM. For now assume that it is in flash. >> + */ >> + li a2, 2*1024*1024 >> + >> + mfc0 a4, CP0_EBASE >> + andi a4, EBASE_CPUNUM /* get core */ >> + beqz a4, core_0_tlb >> + nop >> + >> + /* Now determine how big a mapping to use for secondary cores, >> + * which need to map all of u-boot + heap in DRAM >> + */ >> + /* Here we look at the alignment of the the physical address, >> + * and use the largest page size possible. In some cases >> + * this can result in an oversize mapping, but for secondary cores >> + * this mapping is very short lived. >> + */ >> + >> + /* Physical address in a1 */ >> + li a2, 1 >> +1: >> + sll a2, 1 >> + and a5, a1, a2 >> + beqz a5, 1b >> + nop >> + >> + /* a2 now contains largest page size we can use */ >> +core_0_tlb: >> + JAL(single_tlb_setup) >> + >> + /* Check if we're running from cache */ >> + bbit1 s4, 1, uboot_in_cache >> + nop >> + >> + /* If we are already running from ram, we don't need to muck >> + * with boot bus mappings. >> + */ >> + PTR_LI t2, 0xffffffffb0000000 >> + dsubu t2, s7 >> + /* See if our starting address is lower than the boot bus */ >> + bgez t2, uboot_in_ram2 /* If yes, booting from RAM */ >> + nop >> + >> +uboot_in_cache: >> +#if CONFIG_OCTEON_BIG_STACK_SIZE >> + /* The large stack is only for core 0. For all other cores we need to >> + * use the L1 cache otherwise the other cores will stomp on top of each >> + * other unless even more space is reserved for the stack space for >> + * each core. With potentially 96 cores this gets excessive. >> + */ >> + mfc0 v0, CP0_EBASE >> + andi a0, EBASE_CPUNUM >> + bnez a0, no_big_stack >> + nop >> + PTR_LA sp, big_stack_start >> + daddiu sp, -16 >> + >> +no_big_stack: >> +#endif >> + /* We now have the TLB set up, so we need to remap the boot bus. >> + * This is tricky, as we are running from flash, and will be changing >> + * the addressing of the flash. >> + */ >> + /* Enable movable boot bus region 0, at address 0x10000000 */ >> + PTR_LI a4, OCTEON_MIO_BOOT_BASE >> + dli a5, 0x81000000 /* EN + base address 0x11000000 */ >> + sd a5, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4) >> + >> + /* Copy code to that remaps the boot bus to movable region */ >> + sd zero, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) >> + >> + PTR_LA a6, change_boot_mappings >> + GETOFFSET(a5, change_boot_mappings); >> + daddu a5, a5, a6 >> + >> + /* The code is 16 bytes (2 DWORDS) */ >> + ld a7, 0(a5) >> + sd a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) >> + ld a7, 8(a5) >> + sd a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) >> + >> + /* Read from an RML register to ensure that the previous writes have >> + * completed before we branch to the movable region. >> + */ >> + ld zero, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4) >> + >> + /* Compute value for boot bus configuration register */ >> + /* Read region 0 config so we can _modify_ the base address field */ >> + PTR_LI a4, OCTEON_MIO_BOOT_REG_CFG0 /* region 0 config */ >> + ld a0, 0(a4) >> + dli a4, 0xf0000000 /* Mask off bits we want to save */ >> + and a4, a4, a0 >> + dli a0, 0x0fff0000 /* Force size to max */ >> + or a4, a4, a0 >> + >> + move a5, s6 >> + /* Convert to 64k blocks, as used by boot bus config */ >> + srl a5, 16 >> + li a6, 0x1fc0 /* 'normal' boot bus base config value */ >> + subu a6, a6, a5 /* Subtract offset */ >> + /* combine into register value to pass to boot bus routine */ >> + or a0, a4, a6 >> + >> + /* Branch there */ >> + PTR_LA a1, __mapped_continue_label >> + PTR_LI a2, OCTEON_MIO_BOOT_REG_CFG0 >> + /* If region 0 is not enabled we can skip it */ >> + ld a4, 0(a2) >> + bbit0 a4, 31, __mapped_continue_label >> + nop >> + li a4, 0x10000000 >> + j a4 >> + synci 0(zero) >> + >> + /* We never get here, as we go directly to __mapped_continue_label */ >> + break >> + >> + >> +uboot_in_ram2: >> + >> + /* Now jump to address in TLB mapped memory to continue execution */ >> + PTR_LA a4, __mapped_continue_label >> + synci 0(a4) >> + j a4 >> + nop >> + >> +__mapped_continue_label: >> + /* Check if we are core 0, if we are not then we need >> + * to vector to code in DRAM to do application setup, and >> + * skip the rest of the bootloader. Only core 0 runs the bootloader >> + * and sets up the tables that the other cores will use for >> + * configuration. >> + */ >> + mfc0 a0, CP0_EBASE >> + andi a0, EBASE_CPUNUM /* get core */ >> + /* if (__all_cores_are_equal==0 && core==0), >> + * then jump to execute BL on core 0; else 'go to next line' >> + * (core_0_cont1 is executed ONLY when k0=a0=0(core0_ID)) >> + */ >> + lw t0, __all_cores_are_equal >> + beq a0, t0, core_0_cont1 >> + nop >> + >> + /* other cores look up addr from dram */ >> + /* DRAM controller already set up by first core */ >> + li a1, (BOOT_VECTOR_NUM_WORDS * 4) >> + mul a0, a0, a1 >> + >> + /* Now find out the boot vector base address from the moveable boot >> + * bus region. >> + */ >> + >> + /* Get the address of the boot bus moveable region */ >> + PTR_LI t8, OCTEON_MIO_BOOT_BASE >> + ld t9, OCTEON_MIO_BOOT_LOC_CFG0_OFF(t8) >> + /* Make sure it's enabled */ >> + bbit0 t9, 31, invalid_boot_vector >> + dext t9, t9, 3, 24 >> + dsll t9, t9, 7 >> + /* Make address XKPHYS */ >> + li t0, 1 >> + dins t9, t0, 63, 1 >> + >> + ld t0, OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET(t9) >> + dli t1, OCTEON_BOOT_MOVEABLE_MAGIC1 >> + bne t0, t1, invalid_boot_vector >> + nop >> + >> + /* Load base address of boot vector table */ >> + ld t0, OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET(t9) >> + /* Add offset for core */ >> + daddu a1, t0, a0 >> + >> + mfc0 v0, CP0_STATUS >> + move v1, v0 >> + ins v1, zero, 19, 1 /* Clear NMI bit */ >> + mtc0 v1, CP0_STATUS >> + >> + /* Get app start function address */ >> + lw t9, 8(a1) >> + beqz t9, invalid_boot_vector >> + nop >> + >> + j t9 >> + lw k0, 12(a1) /* Load global data (deprecated) */ >> + >> +invalid_boot_vector: >> + wait >> + b invalid_boot_vector >> + nop >> + >> +__all_cores_are_equal: >> + /* The following .word tell if 'all_cores_are_equal' or core0 is special >> + * By default (for the first execution) the core0 should be special, >> + * in order to behave like the old(existing not-modified) bootloader >> + * and run the bootloader on core 0 to follow the existing design. >> + * However after that we make 'all_cores_equal' which allows to run SE >> + * applications on core0 like on any other core. NOTE that value written >> + * to '__all_cores_are_equal' should not match any core ID. >> + */ >> + .word 0 >> + >> +core_0_cont1: >> + li t0, 0xffffffff >> + sw t0, __all_cores_are_equal >> + /* From here on, only core 0 runs, other cores have branched >> + * away. >> + */ >> +#ifdef CONFIG_MIPS_INIT_STACK_IN_SRAM >> + /* Set up initial stack and global data */ >> + setup_stack_gd >> +# ifdef CONFIG_DEBUG_UART >> + PTR_LA t9, debug_uart_init >> + jalr t9 >> + nop >> +# endif >> +#endif >> + move a0, zero # a0 <-- boot_flags = 0 >> + PTR_LA t9, board_init_f >> + >> + jr t9 >> + move ra, zero >> + END(_start) >> + >> + .balign 8 >> + .globl single_tlb_setup >> + .ent single_tlb_setup >> + /* Sets up a single TLB entry. Virtual/physical addresses >> + * must be properly aligned. >> + * a0 Virtual address >> + * a1 Physical address >> + * a2 page (_not_ mapping) size >> + */ >> +single_tlb_setup: >> + /* Determine the number of TLB entries available, and >> + * use the top one. >> + */ >> + mfc0 a3, CP0_CONFIG1 >> + dext a3, a3, 25, 6 /* a3 now has the max mmu entry index */ >> + mfc0 a5, CP0_CONFIG3 /* Check if config4 reg present */ >> + bbit0 a5, 31, single_tlb_setup_cont >> + nop >> + mfc0 a5, CP0_CONFIG4 >> + bbit0 a5, 14, single_tlb_setup_cont /* check config4[MMUExtDef] */ >> + nop >> + /* append config4[MMUSizeExt] to most significant bit of >> + * config1[MMUSize-1] >> + */ >> + dins a3, a5, 6, 8 >> + and a3, a3, 0x3fff /* a3 now includes max entries for cn6xxx */ >> + >> +single_tlb_setup_cont: >> + >> + /* Format physical address for entry low */ >> + nop >> + dsrl a1, a1, 12 >> + dsll a1, a1, 6 >> + ori a1, a1, 0x7 /* set DVG bits */ >> + >> + move a4, a2 >> + daddu a5, a4, a4 /* mapping size */ >> + dsll a6, a4, 1 >> + daddiu a6, a6, -1 /* pagemask */ >> + dsrl a4, a4, 6 /* adjust for adding with entrylo */ >> + >> + /* Now set up mapping */ >> + mtc0 a6, CP0_PAGEMASK >> + mtc0 a3, CP0_INDEX >> + >> + dmtc0 a1, CP0_ENTRYLO0 >> + daddu a1, a1, a4 >> + >> + dmtc0 a1, CP0_ENTRYLO1 >> + daddu a1, a1, a4 >> + >> + dmtc0 a0, CP0_ENTRYHI >> + daddu a0, a0, a5 >> + >> + ehb >> + tlbwi >> + jr ra >> + nop >> + .end single_tlb_setup >> + >> + >> +/** >> + * This code is moved to a movable boot bus region, >> + * and it is responsible for changing the flash mappings and >> + * jumping to run from the TLB mapped address. >> + * >> + * @param a0 New address for boot bus region 0 >> + * @param a1 Address to branch to afterwards >> + * @param a2 Address of MIO_BOOT_REG_CFG0 >> + */ >> + .balign 8 >> +change_boot_mappings: >> + sd a0, 0(a2) >> + sync >> + j a1 /* Jump to new TLB mapped location */ >> + synci 0(zero) >> + >> +/* If we need a large stack, allocate it here. */ >> +#if CONFIG_OCTEON_BIG_STACK_SIZE >> + /* Allocate the stack here so it's in L2 cache or DRAM */ >> + .balign 16 >> +big_stack_end: >> + .skip CONFIG_OCTEON_BIG_STACK_SIZE, 0 >> +big_stack_start: >> + .dword 0 >> +#endif >> > Viele Gr??e, Stefan
diff --git a/MAINTAINERS b/MAINTAINERS index 66f0b07263..29f2d7328c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -749,6 +749,12 @@ M: Ezequiel Garcia <ezequiel at collabora.com> S: Maintained F: arch/mips/mach-jz47xx/ +MIPS Octeon +M: Aaron Williams <awilliams at marvell.com> +S: Maintained +F: arch/mips/mach-octeon/ +F: arch/mips/include/asm/arch-octeon/ + MMC M: Peng Fan <peng.fan at nxp.com> S: Maintained diff --git a/arch/Kconfig b/arch/Kconfig index 91e049b322..1cd3e1dc0b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -37,6 +37,7 @@ config MICROBLAZE config MIPS bool "MIPS architecture" + select CREATE_ARCH_SYMLINK select HAVE_ARCH_IOREMAP select HAVE_PRIVATE_LIBGCC select SUPPORT_OF_CONTROL diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 48e754cc46..3c7f3eb94f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -106,6 +106,24 @@ config ARCH_JZ47XX select OF_CONTROL select DM +config ARCH_OCTEON + bool "Support Marvell Octeon CN7xxx platforms" + select DISPLAY_CPUINFO + select DMA_ADDR_T_64BIT + select DM + select DM_SERIAL + select MIPS_CACHE_COHERENT + select MIPS_INIT_STACK_IN_SRAM + select MIPS_L2_CACHE + select MIPS_TUNE_OCTEON3 + select ROM_EXCEPTION_VECTORS + select SUPPORTS_BIG_ENDIAN + select SUPPORTS_CPU_MIPS64_OCTEON + select PHYS_64BIT + select OF_CONTROL + select OF_LIVE + imply CMD_DM + config MACH_PIC32 bool "Support Microchip PIC32" select DM @@ -160,6 +178,7 @@ source "arch/mips/mach-bmips/Kconfig" source "arch/mips/mach-jz47xx/Kconfig" source "arch/mips/mach-pic32/Kconfig" source "arch/mips/mach-mtmips/Kconfig" +source "arch/mips/mach-octeon/Kconfig" if MIPS @@ -233,6 +252,14 @@ config CPU_MIPS64_R6 Choose this option to build a kernel for release 6 or later of the MIPS64 architecture. +config CPU_MIPS64_OCTEON + bool "Marvell Octeon series of CPUs" + depends on SUPPORTS_CPU_MIPS64_OCTEON + select 64BIT + help + Choose this option for Marvell Octeon CPUs. These CPUs are between + MIPS64 R5 and R6 with other extensions. + endchoice menu "General setup" @@ -261,7 +288,7 @@ config MIPS_CM_BASE config MIPS_CACHE_INDEX_BASE hex "Index base address for cache initialisation" default 0x80000000 if CPU_MIPS32 - default 0xffffffff80000000 if CPU_MIPS64 + default 0xFFFFFFFFC0000000 if ARCH_OCTEON help This is the base address for a memory block, which is used for initialising the cache lines. This is also the base address of a memory @@ -342,6 +369,14 @@ config SPL_LOADER_SUPPORT help Enable this option if you want to use SPL loaders without DM enabled. +config MIPS_CACHE_COHERENT + bool "Set if MIPS processor is cache coherent" + help + Enable this if the MIPS architecture is cache coherent like the + Marvell Octeon series of SoCs. When this is set, cache flushes + and invalidates only flush the write buffer since the hardware + maintains cache coherency. + endmenu menu "OS boot interface" @@ -398,6 +433,9 @@ config SUPPORTS_CPU_MIPS64_R2 config SUPPORTS_CPU_MIPS64_R6 bool +config SUPPORTS_CPU_MIPS64_OCTEON + bool + config CPU_MIPS32 bool default y if CPU_MIPS32_R1 || CPU_MIPS32_R2 || CPU_MIPS32_R6 @@ -405,6 +443,7 @@ config CPU_MIPS32 config CPU_MIPS64 bool default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R6 + default y if CPU_MIPS64_OCTEON config MIPS_TUNE_4KC bool @@ -421,6 +460,9 @@ config MIPS_TUNE_34KC config MIPS_TUNE_74KC bool +config MIPS_TUNE_OCTEON3 + bool + config 32BIT bool @@ -453,6 +495,11 @@ config MIPS_SRAM_INIT before it can be used. If enabled, a function mips_sram_init() will be called just before setup_stack_gd. +config DMA_ADDR_T_64BIT + bool + help + Select this to enable 64-bit DMA addressing + config SYS_DCACHE_SIZE int default 0 diff --git a/arch/mips/Makefile b/arch/mips/Makefile index af3f227436..fa1ba7855a 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0+ +ifneq ($(CONFIG_ARCH_OCTEON),y) head-y := arch/mips/cpu/start.o +else +head-y := arch/mips/mach-octeon/start.o +endif ifeq ($(CONFIG_SPL_BUILD),y) ifneq ($(CONFIG_SPL_START_S_PATH),) @@ -17,6 +21,7 @@ machine-$(CONFIG_ARCH_JZ47XX) += jz47xx machine-$(CONFIG_MACH_PIC32) += pic32 machine-$(CONFIG_ARCH_MTMIPS) += mtmips machine-$(CONFIG_ARCH_MSCC) += mscc +machine-${CONFIG_ARCH_OCTEON} += octeon machdirs := $(patsubst %,arch/mips/mach-%/,$(machine-y)) libs-y += $(machdirs) @@ -30,6 +35,7 @@ arch-$(CONFIG_CPU_MIPS32_R6) += -march=mips32r6 -Wa,-mips32r6 arch-$(CONFIG_CPU_MIPS64_R1) += -march=mips64 -Wa,-mips64 arch-$(CONFIG_CPU_MIPS64_R2) += -march=mips64r2 -Wa,-mips64r2 arch-$(CONFIG_CPU_MIPS64_R6) += -march=mips64r6 -Wa,-mips64r6 +arch-${CONFIG_CPU_MIPS64_OCTEON} += -march=octeon3 # Allow extra optimization for specific CPUs/SoCs tune-$(CONFIG_MIPS_TUNE_4KC) += -mtune=4kc @@ -37,6 +43,7 @@ tune-$(CONFIG_MIPS_TUNE_14KC) += -mtune=14kc tune-$(CONFIG_MIPS_TUNE_24KC) += -mtune=24kc tune-$(CONFIG_MIPS_TUNE_34KC) += -mtune=34kc tune-$(CONFIG_MIPS_TUNE_74KC) += -mtune=74kc +tune-${CONFIG_MIPS_TUNE_OCTEON3} += -mtune=octeon3 # Include default header files cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic diff --git a/arch/mips/cpu/Makefile b/arch/mips/cpu/Makefile index 6df7bb4e48..732015d6f3 100644 --- a/arch/mips/cpu/Makefile +++ b/arch/mips/cpu/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0+ -extra-y = start.o +ifneq ($(CONFIG_ARCH_OCTEON),y) +extra-y = start.o +endif obj-y += time.o obj-y += interrupts.o diff --git a/arch/mips/include/asm/arch-octeon/cavm-reg.h b/arch/mips/include/asm/arch-octeon/cavm-reg.h new file mode 100644 index 0000000000..b961e54956 --- /dev/null +++ b/arch/mips/include/asm/arch-octeon/cavm-reg.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +#ifndef __CAVM_REG_H__ + +/* Register offsets */ +#define CAVM_CIU_FUSE ((u64 *)0x80010100000001a0) +#define CAVM_MIO_BOOT_REG_CFG0 ((u64 *)0x8001180000000000) +#define CAVM_RST_BOOT ((u64 *)0x8001180006001600) + +/* Register structs */ + +/** + * Register (RSL) rst_boot + * + * RST Boot Register + */ +union cavm_rst_boot { + u64 u; + struct cavm_rst_boot_s { + u64 chipkill : 1; + u64 jtcsrdis : 1; + u64 ejtagdis : 1; + u64 romen : 1; + u64 ckill_ppdis : 1; + u64 jt_tstmode : 1; + u64 vrm_err : 1; + u64 reserved_37_56 : 20; + u64 c_mul : 7; + u64 pnr_mul : 6; + u64 reserved_21_23 : 3; + u64 lboot_oci : 3; + u64 lboot_ext : 6; + u64 lboot : 10; + u64 rboot : 1; + u64 rboot_pin : 1; + } s; +}; + +#endif /* __CAVM_REG_H__ */ diff --git a/arch/mips/include/asm/arch-octeon/clock.h b/arch/mips/include/asm/arch-octeon/clock.h new file mode 100644 index 0000000000..a844a222c9 --- /dev/null +++ b/arch/mips/include/asm/arch-octeon/clock.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018, 2019 Marvell International Ltd. + * + * https://spdx.org/licenses + */ + +#ifndef __CLOCK_H__ + +/** System PLL reference clock */ +#define PLL_REF_CLK 50000000 /* 50 MHz */ +#define NS_PER_REF_CLK_TICK (1000000000 / PLL_REF_CLK) + +/** + * Returns the I/O clock speed in Hz + */ +u64 octeon_get_io_clock(void); + +/** + * Returns the core clock speed in Hz + */ +u64 octeon_get_core_clock(void); + +#endif /* __CLOCK_H__ */ diff --git a/arch/mips/mach-octeon/Kconfig b/arch/mips/mach-octeon/Kconfig new file mode 100644 index 0000000000..67fcb6058c --- /dev/null +++ b/arch/mips/mach-octeon/Kconfig @@ -0,0 +1,92 @@ +menu "Octeon platforms" + depends on ARCH_OCTEON + +config SYS_SOC + string + default "octeon" + +config OCTEON_CN7XXX + bool "Octeon CN7XXX SoC" + +config OCTEON_CN70XX + bool "Octeon CN70XX SoC" + select OCTEON_CN7XXX + +config OCTEON_CN73XX + bool "Octeon CN73XX SoC" + select OCTEON_CN7XXX + +config OCTEON_CN78XX + bool "Octeon CN78XX SoC" + select OCTEON_CN7XXX + +choice + prompt "Octeon MIPS family select" + +config SOC_OCTEON2 + bool "Octeon II family" + help + This selects the Octeon II SoC family + +config SOC_OCTEON3 + bool "Octeon III family" + help + This selects the Octeon III SoC family CN70xx, CN73XX, CN78xx + and CNF75XX. + +endchoice + +config SYS_DCACHE_SIZE + default 32768 + +config SYS_DCACHE_LINE_SIZE + default 128 + +config SYS_ICACHE_SIZE + default 79872 + +config SYS_ICACHE_LINE_SIZE + default 128 + +config OCTEON_BIG_STACK_SIZE + hex + default 0x4000 + help + This enables a larger stack needed for Octeon 3 DRAM initialization. + If this is disabled then a part of the L1 cache will be reserved for + the stack, resulting in a smaller image. If this is true then + a portion of the TEXT address space will be reserved for the stack. + Note that this requires that U-Boot MUST be able to fit entirely + within the L2 cache and cannot be executed from a parallel NOR flash. + The default size is 16KiB. + +config OCTEON_COPY_FROM_FLASH_TO_L2 + bool + default y + help + Set this for U-Boot to attempt to copy itself from flash memory into + the L2 cache. This significantly improvess the boot performance. + +config OCTEON_L2_MEMCPY_IN_CACHE + bool + default y + help + If this is set then the memcpy code that is used to copy U-Boot from + the flash to the L2 cache is written to the L2 cache. This + significantly speeds up the memcpy operation. + +config OCTEON_L2_UBOOT_ADDR + hex + default 0xffffffff81000000 + help + This specifies the address where U-Boot will be copied into the L2 + cache. + +config OCTEON_L2_MEMCPY_ADDR + hex + default 0xffffffff81400000 + help + This specifies where U-Boot will place the memcpy routine used for + copying U-Boot from flash to L2 cache. + +endmenu diff --git a/arch/mips/mach-octeon/Makefile b/arch/mips/mach-octeon/Makefile new file mode 100644 index 0000000000..a5fda682a7 --- /dev/null +++ b/arch/mips/mach-octeon/Makefile @@ -0,0 +1,10 @@ +# (C) Copyright 2019 Marvell, Inc. +# +# SPDX-License-Identifier: GPL-2.0+ +# + +extra-y = start.o + +obj-y += clock.o +obj-y += cpu.o +obj-y += dram.o diff --git a/arch/mips/mach-octeon/clock.c b/arch/mips/mach-octeon/clock.c new file mode 100644 index 0000000000..6e32008641 --- /dev/null +++ b/arch/mips/mach-octeon/clock.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018, 2019 Marvell International Ltd. + */ + +#include <common.h> +#include <asm/arch/clock.h> + +DECLARE_GLOBAL_DATA_PTR; + +int octeon_get_timer_freq(void) +{ + return gd->cpu_clk; +} + +/** + * Returns the I/O clock speed in Hz + */ +u64 octeon_get_io_clock(void) +{ + return gd->bus_clk; +} diff --git a/arch/mips/mach-octeon/cpu.c b/arch/mips/mach-octeon/cpu.c new file mode 100644 index 0000000000..a1373c6d56 --- /dev/null +++ b/arch/mips/mach-octeon/cpu.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +#include <common.h> +#include <linux/io.h> +#include <asm/arch/clock.h> +#include <asm/arch-octeon/cavm-reg.h> + +DECLARE_GLOBAL_DATA_PTR; + +static int get_clocks(void) +{ + const u64 ref_clock = PLL_REF_CLK; + union cavm_rst_boot rst_boot; + + rst_boot.u = ioread64(CAVM_RST_BOOT); + gd->cpu_clk = ref_clock * rst_boot.s.c_mul; + gd->bus_clk = ref_clock * rst_boot.s.pnr_mul; + + debug("%s: cpu: %lu, bus: %lu\n", __func__, gd->cpu_clk, gd->bus_clk); + + return 0; +} + +/* Early mach init code run from flash */ +int mach_cpu_init(void) +{ + /* Remap boot-bus 0x1fc0.0000 -> 0x1f40.0000 */ + /* ToDo: Move this to an early running bus (bootbus) DM driver */ + clrsetbits_be64(CAVM_MIO_BOOT_REG_CFG0, 0xffff, 0x1f40); + + /* Get clocks and store them in GD */ + get_clocks(); + + return 0; +} + +/** + * Returns number of cores + * + * @return number of CPU cores for the specified node + */ +static int cavm_octeon_num_cores(void) +{ + return fls64(ioread64(CAVM_CIU_FUSE) & 0xffffffffffff); +} + +int print_cpuinfo(void) +{ + printf("SoC: Octeon CN73xx (%d cores)\n", cavm_octeon_num_cores()); + + return 0; +} diff --git a/arch/mips/mach-octeon/dram.c b/arch/mips/mach-octeon/dram.c new file mode 100644 index 0000000000..c16a73e8e6 --- /dev/null +++ b/arch/mips/mach-octeon/dram.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2020 Marvell International Ltd. + */ + +#include <common.h> +#include <dm.h> +#include <ram.h> + +DECLARE_GLOBAL_DATA_PTR; + +int dram_init(void) +{ + /* + * No DDR init yet -> run in L2 cache + */ + gd->ram_size = (2 << 20); + gd->bd->bi_dram[0].size = gd->ram_size; + gd->bd->bi_dram[1].size = 0; + + return 0; +} + +ulong board_get_usable_ram_top(ulong total_size) +{ + return gd->ram_top; +} diff --git a/arch/mips/mach-octeon/include/ioremap.h b/arch/mips/mach-octeon/include/ioremap.h new file mode 100644 index 0000000000..59b75008a2 --- /dev/null +++ b/arch/mips/mach-octeon/include/ioremap.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MACH_OCTEON_IOREMAP_H +#define __ASM_MACH_OCTEON_IOREMAP_H + +#include <linux/types.h> + +/* + * Allow physical addresses to be fixed up to help peripherals located + * outside the low 32-bit range -- generic pass-through version. + */ +static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, + phys_addr_t size) +{ + return phys_addr; +} + +static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size, + unsigned long flags) +{ + return (void __iomem *)(XKPHYS | offset); +} + +static inline int plat_iounmap(const volatile void __iomem *addr) +{ + return 0; +} + +#define _page_cachable_default _CACHE_CACHABLE_NONCOHERENT + +#endif /* __ASM_MACH_OCTEON_IOREMAP_H */ diff --git a/arch/mips/mach-octeon/start.S b/arch/mips/mach-octeon/start.S new file mode 100644 index 0000000000..acb967201a --- /dev/null +++ b/arch/mips/mach-octeon/start.S @@ -0,0 +1,1241 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Startup Code for OCTEON 64-bit CPU-core + * + * Copyright (c) 2003 Wolfgang Denk <wd at denx.de> + * Copyright 2004, 2005, 2010 - 2015 Cavium Inc.. + */ + +#include <asm-offsets.h> +#include <config.h> +#include <asm/regdef.h> +#include <asm/mipsregs.h> +#include <asm/asm.h> + +#define BOOT_VECTOR_NUM_WORDS 8 + +#define OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET 0x70 +#define OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET 0x78 + +#define OCTEON_BOOT_MOVEABLE_MAGIC1_RAW 0xdb00110ad358eacd +#define OCTEON_BOOT_MOVEABLE_MAGIC1 OCTEON_BOOT_MOVEABLE_MAGIC1_RAW + +#define OCTEON_CIU_SOFT_RST 0x8001070000000740 + +#define OCTEON_L2C_WPAR_PP0 0x8001180080840000 +#define OCTEON_MIO_BOOT_BASE 0x8001180000000000 +#define OCTEON_MIO_BOOT_REG_CFG0_OFF 0x0000 +#define OCTEON_MIO_BOOT_LOC_CFG0_OFF 0x0080 +#define OCTEON_MIO_BOOT_LOC_ADR_OFF 0x0090 +#define OCTEON_MIO_BOOT_LOC_DAT_OFF 0x0098 +#define OCTEON_MIO_RST_BOOT 0x8001180000001600 +#define OCTEON_MIO_BOOT_REG_CFG0 0x8001180000000000 +#define OCTEON_MIO_BOOT_REG_TIM0 0x8001180000000040 +#define OCTEON_MIO_BOOT_LOC_CFG0 0x8001180000000080 +#define OCTEON_MIO_BOOT_LOC_ADR 0x8001180000000090 +#define OCTEON_MIO_BOOT_LOC_DAT 0x8001180000000098 +#define OCTEON_MIO_FUSE_DAT3 0x8001180000001418 +#define OCTEON_L2D_FUS3 0x80011800800007B8 +#define OCTEON_LMC0_DDR_PLL_CTL 0x8001180088000258 + +#define OCTEON_RST 0x8001180006000000 +#define OCTEON_RST_BOOT_OFFSET 0x1600 +#define OCTEON_RST_SOFT_RST_OFFSET 0x1680 +#define OCTEON_RST_COLD_DATAX_OFFSET(X) (0x17C0 + (X) * 8) +#define OCTEON_RST_BOOT 0x8001180006001600 +#define OCTEON_RST_SOFT_RST 0x8001180006001680 +#define OCTEON_RST_COLD_DATAX(X) (0x80011800060017C0 + (X) * 8) + +#define OCTEON_OCX_COM_NODE 0x8001180011000000 +#define OCTEON_L2C_OCI_CTL 0x8001180080800020 +#define OCTEON_L2C_TAD_CTL 0x8001180080800018 +#define OCTEON_L2C_CTL 0x8001180080800000 + +#define OCTEON_DBG_DATA 0x80011F00000001E8 +#define OCTEON_PCI_READ_CMD_E 0x80011F0000001188 +#define OCTEON_NPEI_DBG_DATA 0x80011F0000008510 +#define OCTEON_CIU_WDOG(X) (0x8001070000000500 + (X) * 8) +#define OCTEON_CIU_PP_POKE(X) (0x8001070000000580 + (X) * 8) +#define OCTEON_CIU3_WDOG(X) (0x8001010000020000 + (X) * 8) +#define OCTEON_CIU3_PP_POKE(X) (0x8001010000030000 + (X) * 8) +#define OCTEON_OCX_COM_LINKX_CTL(X) (0x8001180011000020 + (X) * 8) +#define OCTEON_SLI_CTL_STATUS 0x80011F0000028570 +#define OCTEON_GSERX_SCRATCH(X) (0x8001180090000020 + (X) * 0x1000000) + +/** PRID for CN56XX */ +#define OCTEON_PRID_CN56XX 0x04 +/** PRID for CN52XX */ +#define OCTEON_PRID_CN52XX 0x07 +/** PRID for CN63XX */ +#define OCTEON_PRID_CN63XX 0x90 +/** PRID for CN68XX */ +#define OCTEON_PRID_CN68XX 0x91 +/** PRID for CN66XX */ +#define OCTEON_PRID_CN66XX 0x92 +/** PRID for CN61XX */ +#define OCTEON_PRID_CN61XX 0x93 +/** PRID for CNF71XX */ +#define OCTEON_PRID_CNF71XX 0x94 +/** PRID for CN78XX */ +#define OCTEON_PRID_CN78XX 0x95 +/** PRID for CN70XX */ +#define OCTEON_PRID_CN70XX 0x96 +/** PRID for CN73XX */ +#define OCTEON_PRID_CN73XX 0x97 +/** PRID for CNF75XX */ +#define OCTEON_PRID_CNF75XX 0x98 + +/* func argument is used to create a mark, must be unique */ +#define GETOFFSET(reg, func) \ + .balign 8; \ + bal func ##_mark; \ + nop; \ + .dword .; \ +func ##_mark: \ + ld reg, 0(ra); \ + dsubu reg, ra, reg; + +#define JAL(func) \ + .balign 8; \ + bal func ##_mark; \ + nop; \ + .dword .; \ +func ##_mark: \ + ld t8, 0(ra); \ + dsubu t8, ra, t8; \ + dla t9, func; \ + daddu t9, t9, t8; \ + jalr t9; \ + nop; + + .set arch=octeon3 + .set noreorder + + .macro uhi_mips_exception + move k0, t9 # preserve t9 in k0 + move k1, a0 # preserve a0 in k1 + li t9, 15 # UHI exception operation + li a0, 0 # Use hard register context + sdbbp 1 # Invoke UHI operation + .endm + + .macro setup_stack_gd + li t0, -16 + PTR_LI t1, big_stack_start + and sp, t1, t0 # force 16 byte alignment + PTR_SUBU \ + sp, sp, GD_SIZE # reserve space for gd + and sp, sp, t0 # force 16 byte alignment + move k0, sp # save gd pointer +#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \ + !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F) + li t2, CONFIG_VAL(SYS_MALLOC_F_LEN) + PTR_SUBU \ + sp, sp, t2 # reserve space for early malloc + and sp, sp, t0 # force 16 byte alignment +#endif + move fp, sp + + /* Clear gd */ + move t0, k0 +1: + PTR_S zero, 0(t0) + PTR_ADDIU t0, PTRSIZE + blt t0, t1, 1b + nop + +#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \ + !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F) + PTR_S sp, GD_MALLOC_BASE(k0) # gd->malloc_base offset +#endif + .endm + +/* Saved register usage: + * s0: not used + * s1: not used + * s2: Address U-Boot loaded into in L2 cache + * s3: Start address + * s4: flags + * 1: booting from RAM + * 2: executing out of cache + * 4: booting from flash + * s5: u-boot size (data end - _start) + * s6: offset in flash. + * s7: _start physical address + * s8: + */ + +ENTRY(_start) + /* U-Boot entry point */ + b reset + + /* The above jump instruction/nop are considered part of the + * bootloader_header_t structure but are not changed when the header is + * updated. + */ + + /* Leave room for bootloader_header_t header at start of binary. This + * header is used to identify the board the bootloader is for, what + * address it is linked at, failsafe/normal, etc. It also contains a + * CRC of the entire image. + */ + +#if defined(CONFIG_ROM_EXCEPTION_VECTORS) + /* + * Exception vector entry points. When running from ROM, an exception + * cannot be handled. Halt execution and transfer control to debugger, + * if one is attached. + */ + .org 0x200 + /* TLB refill, 32 bit task */ + uhi_mips_exception + + .org 0x280 + /* XTLB refill, 64 bit task */ + uhi_mips_exception + + .org 0x300 + /* Cache error exception */ + uhi_mips_exception + + .org 0x380 + /* General exception */ + uhi_mips_exception + + .org 0x400 + /* Catch interrupt exceptions */ + uhi_mips_exception + + .org 0x480 + /* EJTAG debug exception */ +1: b 1b + nop + + .org 0x500 +#endif + +/* Reserve extra space so that when we use the boot bus local memory + * segment to remap the debug exception vector we don't overwrite + * anything useful + */ + +/* Basic exception handler (dump registers) in all ASM. When using the TLB for + * mapping u-boot C code, we can't branch to that C code for exception handling + * (TLB is disabled for some exceptions. + */ + +/* RESET/start here */ + .balign 8 +reset: + nop + synci 0(zero) + mfc0 k0, CP0_STATUS + ori k0, 0x00E0 /* enable 64 bit mode for CSR access */ + mtc0 k0, CP0_STATUS + + /* Save the address we're booting from, strip off low bits */ + bal 1f + nop +1: + move s3, ra + dins s3, zero, 0, 12 + + /* Disable boot bus moveable regions */ + PTR_LI k0, OCTEON_MIO_BOOT_LOC_CFG0 + sd zero, 0(k0) + sd zero, 8(k0) + + /* Disable the watchdog timer + * First we check if we're running on CN78XX, CN73XX or CNF75XX to see + * if we use CIU3 or CIU. + */ + mfc0 t0, CP0_PRID + ext t0, t0, 8, 8 + /* Assume CIU */ + PTR_LI t1, OCTEON_CIU_WDOG(0) + PTR_LI t2, OCTEON_CIU_PP_POKE(0) + blt t0, OCTEON_PRID_CN78XX, wd_use_ciu + nop + beq t0, OCTEON_PRID_CN70XX, wd_use_ciu + nop + /* Use CIU3 */ + PTR_LI t1, OCTEON_CIU3_WDOG(0) + PTR_LI t2, OCTEON_CIU3_PP_POKE(0) +wd_use_ciu: + sd zero, 0(t2) /* Pet the dog */ + sd zero, 0(t1) /* Disable watchdog timer */ + + /* Errata: CN76XX has a node ID of 3. change it to zero here. + * This needs to be done before we relocate to L2 as addresses change + * For 76XX pass 1.X we need to zero out the OCX_COM_NODE[ID], + * L2C_OCI_CTL[GKSEGNODE] and CP0 of Root.CvmMemCtl2[KSEGNODE]. + */ + mfc0 a4, CP0_PRID + /* Check for 78xx pass 1.x processor ID */ + andi a4, 0xffff + blt a4, (OCTEON_PRID_CN78XX << 8), 1f + nop + + /* Zero out alternate package for now */ + dins a4, zero, 6, 1 + bge a4, ((OCTEON_PRID_CN78XX << 8) | 0x08), 1f + nop + + /* 78xx or 76xx here, first check for bug #27141 */ + PTR_LI a5, OCTEON_SLI_CTL_STATUS + ld a6, 0(a5) + andi a7, a4, 0xff + andi a6, a6, 0xff + + beq a6, a7, not_bug27141 + nop + + /* core 0 proc_id rev_id field does not match SLI_CTL_STATUS rev_id */ + /* We just hit bug #27141. Need to reset the chip and try again */ + + PTR_LI a4, OCTEON_RST_SOFT_RST + ori a5, zero, 0x1 /* set the reset bit */ + +reset_78xx_27141: + sync + synci 0(zero) + cache 9, 0(zero) + sd a5, 0(a4) + wait + b reset_78xx_27141 + nop + +not_bug27141: + /* 76XX pass 1.x has the node number set to 3 */ + mfc0 a4, CP0_EBASE + ext a4, a4, 0, 10 + bne a4, 0x180, 1f /* Branch if not node 3 core 0 */ + nop + + /* Clear OCX_COM_NODE[ID] */ + PTR_LI a5, OCTEON_OCX_COM_NODE + ld a4, 0(a5) + dins a4, zero, 0, 2 + sd a4, 0(a5) + ld zero, 0(a5) + + /* Clear L2C_OCI_CTL[GKSEGNODE] */ + PTR_LI a5, OCTEON_L2C_OCI_CTL + ld a4, 0(a5) + dins a4, zero, 4, 2 + sd a4, 0(a5) + ld zero, 0(a5) + + /* Clear CP0 Root.CvmMemCtl2[KSEGNODE] */ + dmfc0 a4, CP0_CVMMEMCTL2 + dins a4, zero, 12, 2 + dmtc0 a4, CP0_CVMMEMCTL2 + + /* Put the flash address in the start of the EBASE register to + * enable our exception handler but only for core 0. + */ + mfc0 a4, CP0_EBASE + dext a4, a4, 0, 10 + bnez a4, no_flash + /* OK in delay slot */ + dext a6, a6, 0, 16 /* Get the base address in flash */ + sll a6, a6, 16 + mtc0 a6, CP0_EBASE /* Enable exceptions */ + +no_flash: + /* Zero out various registers */ + mtc0 zero, CP0_DEPC + mtc0 zero, CP0_EPC + mtc0 zero, CP0_CAUSE + mfc0 a4, CP0_PRID + ext a4, a4, 8, 8 + mtc0 zero, CP0_DESAVE + + /* The following are only available on Octeon 2 or later */ + mtc0 zero, CP0_KSCRATCH1 + mtc0 zero, CP0_KSCRATCH2 + mtc0 zero, CP0_KSCRATCH3 + mtc0 zero, CP0_USERLOCAL + + /* Turn off ROMEN bit to disable ROM */ + PTR_LI a1, OCTEON_MIO_RST_BOOT + /* For OCTEON 3 we use RST_BOOT instead of MIO_RST_BOOT. + * The difference is bits 24-26 are 6 instead of 0 for the address. + */ + /* For Octeon 2 and CN70XX we can ignore the watchdog */ + blt a4, OCTEON_PRID_CN78XX, watchdog_ok + nop + + PTR_LI a1, OCTEON_RST_BOOT + + beq a4, OCTEON_PRID_CN70XX, watchdog_ok + nop + + ld a2, 0(a1) + /* There is a bug where some registers don't get properly reset when + * the watchdog timer causes a reset. In this case we need to force + * a reset. + */ + bbit0 a2, 11, watchdog_ok /* Skip if watchdog not hit */ + dins a2, zero, 2, 18 /* Don't clear LBOOT, LBOOT_EXT or LBOOT_OCI */ + /* Clear bit indicating reset due to watchdog */ + ori a2, 1 << 11 + sd a2, 0(a1) + + /* Disable watchdog */ + PTR_LI a1, OCTEON_CIU3_PP_POKE(0) + sd zero, 0(a1) + PTR_LI a1, OCTEON_CIU3_WDOG(0) + sd zero, 0(a1) + + /* Record this in the GSER0_SCRATCH register in bit 11 */ + PTR_LI a1, OCTEON_GSERX_SCRATCH(0) + ld a2, 0(a1) + ori a2, 1 << 11 + sd a2, 0(a1) + + PTR_LI a1, OCTEON_RST_SOFT_RST + li a2, 1 + sd a2, 0(a1) + wait + + /* We should never get here */ + +watchdog_ok: + ld a2, 0(a1) + /* Don't clear LBOOT/LBOOT_EXT or LBOOT_OCI */ + dins a2, zero, 2, 18 + dins a2, zero, 60, 1 /* Clear ROMEN bit */ + sd a2, 0(a1) + + /* Start of Octeon setup */ + + /* Check what core we are - if core 0, branch to init tlb + * loop in flash. Otherwise, look up address of init tlb + * loop that was saved in the boot vector block. + */ + mfc0 a0, CP0_EBASE + andi a0, EBASE_CPUNUM /* get core */ + beqz a0, InitTLBStart_local + nop + + break + /* We should never get here - non-zero cores now go directly to + * tlb init from the boot stub in movable region. + */ + + .globl InitTLBStart +InitTLBStart: +InitTLBStart_local: + /* If we don't have working memory yet configure a bunch of + * scratch memory, and set the stack pointer to the top + * of it. This allows us to go to C code without having + * memory set up + * + * Warning: do not change SCRATCH_STACK_LINES as this can impact the + * transition from start.S to crti.asm. crti requires 590 bytes of + * stack space. + */ + cache 1,0(zero) /* Clear Dcache so cvmseg works right */ +#if CONFIG_OCTEON_BIG_STACK_SIZE + rdhwr v0, $0 + bnez v0, 1f + nop + PTR_LA sp, big_stack_start - 16 + b stack_clear_done + nop +1: +#endif +#define SCRATCH_STACK_LINES 0x36 /* MAX is 0x36 */ + dmfc0 v0, CP0_CVMMEMCTL + dins v0, zero, 0, 9 + /* setup SCRATCH_STACK_LINES scratch lines of scratch */ + ori v0, 0x100 | SCRATCH_STACK_LINES + dmtc0 v0, CP0_CVMMEMCTL + /* set stack to top of scratch memory */ + li sp, 0xffffffffffff8000 + (SCRATCH_STACK_LINES * 128) + /* Clear scratch for CN63XX pass 2.0 errata Core-15169*/ + li t0, 0xffffffffffff8000 +clear_scratch: + sd zero, 0(t0) + addiu t0, 8 + bne t0, sp, clear_scratch + nop + + /* This code run on all cores - core 0 from flash, + * the rest from DRAM. When booting from PCI, non-zero cores + * come directly here from the boot vector - no earlier code in this + * file is executed. + */ + + /* Some generic initialization is done here as well, as we need this + * done on all cores even when booting from PCI + */ +stack_clear_done: + /* Clear watch registers. */ + mtc0 zero, CP0_WATCHLO + mtc0 zero, CP0_WATCHHI + + /* STATUS register */ + mfc0 k0, CP0_STATUS + li k1, ~ST0_IE + and k0, k1 + mtc0 k0, CP0_STATUS + + /* CAUSE register */ + mtc0 zero, CP0_CAUSE + + /* Init Timer */ + dmtc0 zero, CP0_COUNT + dmtc0 zero, CP0_COMPARE + + + mfc0 a5, CP0_STATUS + li v0, 0xE0 /* enable 64 bit mode for CSR access */ + or v0, v0, a5 + mtc0 v0, CP0_STATUS + + + dli v0, 1 << 29 /* Enable large physical address support in TLB */ + mtc0 v0, CP0_PAGEGRAIN + +InitTLB: + dmtc0 zero, CP0_ENTRYLO0 + dmtc0 zero, CP0_ENTRYLO1 + mtc0 zero, CP0_PAGEMASK + dmtc0 zero, CP0_CONTEXT + /* Use an offset into kseg0 so we won't conflict with Mips1 legacy + * TLB clearing + */ + PTR_LI v0, 0xFFFFFFFF90000000 + mfc0 a0, CP0_CONFIG1 + srl a0, a0, 25 + /* Check if config4 reg present */ + mfc0 a1, CP0_CONFIG3 + bbit0 a1, 31, 2f + and a0, a0, 0x3F /* a0 now has the max mmu entry index */ + mfc0 a1, CP0_CONFIG4 + bbit0 a1, 14, 2f /* check config4[MMUExtDef] */ + nop + /* append config4[MMUSizeExt] to most significant bit of + * config1[MMUSize-1] + */ + ins a0, a1, 6, 8 + and a0, a0, 0x3fff /* a0 now includes max entries for cn6xxx */ +2: + dmtc0 zero, CP0_XCONTEXT + mtc0 zero, CP0_WIRED + +InitTLBloop: + dmtc0 v0, CP0_ENTRYHI + tlbp + mfc0 v1, CP0_INDEX + daddiu v0, v0, 1<<13 + bgez v1, InitTLBloop + + mtc0 a0, CP0_INDEX + tlbwi + bnez a0, InitTLBloop + daddiu a0, -1 + + mthi zero + mtlo zero + + /* Set up status register */ + mfc0 v0, CP0_STATUS + /* Enable COP0 and COP2 access */ + li a4, (1 << 28) | (1 << 30) + or v0, a4 + + /* Must leave BEV set here, as DRAM is not configured for core 0. + * Also, BEV must be 1 later on when the exception base address is set. + */ + + /* Mask all interrupts */ + ins v0, zero, 0, 16 + /* Clear NMI (used to start cores other than core 0) */ + ori v0, 0xE4 /* enable 64 bit, disable interrupts */ + mtc0 v0, CP0_STATUS + + dli v0,0xE000000F /* enable all readhw locations */ + mtc0 v0, CP0_HWRENA + + dmfc0 v0, CP0_CVMCTL + ori v0, 1<<14 /* enable fixup of unaligned mem access */ + dmtc0 v0, CP0_CVMCTL + + /* Setup scratch memory. This is also done in + * cvmx_user_app_init, and this code will be removed + * from the bootloader in the near future. + */ + + /* Set L2C_LAD_CTL[MAXLFB] = 0 on CN73XX */ + mfc0 a4, CP0_PRID + ext a4, a4, 8, 8 + blt a4, OCTEON_PRID_CN73XX, 72f + nop + PTR_LI v0, OCTEON_L2C_TAD_CTL + ld t1, 0(v0) + dins t1, zero, 0, 4 + sd t1, 0(v0) + ld zero, 0(v0) + +72: + + /* clear these to avoid immediate interrupt in noperf mode */ + dmtc0 zero, CP0_COMPARE /* clear timer interrupt */ + dmtc0 zero, CP0_COUNT /* clear timer interrupt */ + dmtc0 zero, CP0_PERF_CNT0 /* clear perfCnt0 */ + dmtc0 zero, CP0_PERF_CNT1 /* clear perfCnt1 */ + dmtc0 zero, CP0_PERF_CNT2 + dmtc0 zero, CP0_PERF_CNT3 + + /* If we're running on a node other than 0 then we need to set KSEGNODE + * to 0. The nice thing with this code is that it also autodetects if + * we're running on a processor that supports CVMMEMCTL2 or not since + * only processors that have this will have a non-zero node ID. Because + * of this there's no need to check if we're running on a 78XX. + */ + mfc0 t1, CP0_EBASE + dext t1, t1, 7, 3 /* Extract node number */ + beqz t1, is_node0 /* If non-zero then we're not node 0 */ + nop + dmfc0 t1, CP0_CVMMEMCTL2 + dins t1, zero, 12, 4 + dmtc0 t1, CP0_CVMMEMCTL2 +is_node0: + + /* Set up TLB mappings for u-boot code in flash. */ + + /* Use a bal to get the current PC into ra. Since this bal is to + * the address immediately following the delay slot, the ra is + * the address of the label. We then use this to get the actual + * address that we are executing from. + */ + bal __dummy + nop + +__dummy: + /* Get the actual address that we are running at */ + PTR_LA a6, _start /* Linked address of _start */ + PTR_LA a7, __dummy + dsubu t0, a7, a6 /* offset of __dummy label from _start*/ + dsubu a7, ra, t0 /* a7 now has actual address of _start*/ + + /* Save actual _start address in s7. This is where we + * are executing from, as opposed to where the code is + * linked. + */ + move s7, a7 + move s4, zero + + /* s7 has actual address of _start. If this is + * on the boot bus, it will be between 0xBFC000000 and 0xBFFFFFFF. + * If it is on the boot bus, use 0xBFC00000 as the physical address + * for the TLB mapping, as we will be adjusting the boot bus + * to make this adjustment. + * If we are running from DRAM (remote-boot), then we want to use the + * real address in DRAM. + */ + + /* Check to see if we are running from flash - we expect that to + * be 0xffffffffb0000000-0xffffffffbfffffff + * (0x10000000-0x1fffffff, unmapped/uncached) + */ + dli t2, 0xffffffffb0000000 + dsubu t2, s7 + slt s4, s7, t2 + bltz t2, uboot_in_flash + nop + + /* If we're not core 0 then we don't care about cache */ + mfc0 t2, CP0_EBASE + andi t2, EBASE_CPUNUM + bnez t2, uboot_in_ram + nop + + /* Find out if we're OCTEON I or OCTEON + which don't support running + * out of cache. + */ + mfc0 t2, CP0_PRID + ext t2, t2, 8, 8 + li s4, 1 + blt t2, 0x90, uboot_in_ram + nop + + /* U-Boot can be executing either in RAM or L2 cache. Now we need to + * check if DRAM is initialized. The way we do that is to look at + * the reset bit of the LMC0_DDR_PLL_CTL register (bit 7) + */ + PTR_LI t2, OCTEON_LMC0_DDR_PLL_CTL + ld t2, 0(t2) + bbit1 t2, 7, uboot_in_ram + nop + + /* We must be executing out of cache */ + b uboot_in_ram + li s4, 2 + +uboot_in_flash: + /* Set s4 to 4 to indicate we're running in FLASH */ + li s4, 4 + +#if defined(CONFIG_OCTEON_DISABLE_L2_CACHE_INDEX_ALIASING) + /* By default, L2C index aliasing is enabled. In some cases it may + * need to be disabled. The L2C index aliasing can only be disabled + * if U-Boot is running out of L2 cache and the L2 cache has not been + * used to store anything. + */ + PTR_LI t1, OCTEON_L2C_CTL + ld t2, 0(t1) + ori t2, 1 + sd t2, 0(t1) +#endif + + /* Use BFC00000 as physical address for TLB mappings when booting + * from flash, as we will adjust the boot bus mappings to make this + * mapping correct. + */ + dli a7, 0xFFFFFFFFBFC00000 + dsubu s6, s7, a7 /* Save flash offset in s6 */ + +#if defined(CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2) + /* For OCTEON II we check to see if the L2 cache is big enough to hold + * U-Boot. If it is big enough then we copy ourself from flash to the + * L2 cache in order to speed up execution. + */ + + /* Check for OCTEON 2 */ + mfc0 t1, CP0_PRID + ext t1, t1, 8, 8 + /* Get number of L2 cache sets */ + beq t1, OCTEON_PRID_CNF71XX, got_l2_sets /* CNF71XX */ + li t2, 1 << 9 + beq t1, OCTEON_PRID_CN78XX, got_l2_sets /* CN78XX */ + li t2, 1 << 13 + beq t1, OCTEON_PRID_CN70XX, got_l2_sets /* CN70XX */ + li t2, 1 << 10 + beq t1, OCTEON_PRID_CN73XX, got_l2_sets /* CN73XX */ + li t2, 1 << 11 + beq t1, OCTEON_PRID_CNF75XX, got_l2_sets /* CNF75XX */ + li t2, 1 << 11 + b l2_cache_too_small /* Unknown OCTEON model */ + nop + +got_l2_sets: + /* Get number of associations */ + PTR_LI t0, OCTEON_MIO_FUSE_DAT3 + ld t0, 0(t0) + dext t0, t0, 32, 3 + + beq t1, OCTEON_PRID_CN70XX, process_70xx_l2sets + nop + /* 0 = 16-way, 1 = 12-way, 2 = 8-way, 3 = 4-way, 4-7 reserved */ + beqz t0, got_l2_ways + li t3, 16 + beq t0, 1, got_l2_ways + li t3, 12 + beq t0, 2, got_l2_ways + li t3, 8 + beq t0, 3, got_l2_ways + li t3, 4 + b l2_cache_too_small + nop + +process_70xx_l2sets: + /* For 70XX, the number of ways is defined as: + * 0 - full cache (4-way) 512K + * 1 - 3/4 ways (3-way) 384K + * 2 - 1/2 ways (2-way) 256K + * 3 - 1/4 ways (1-way) 128K + * 4-7 illegal (aliased to 0-3) + */ + andi t0, 3 + beqz t0, got_l2_ways + li t3, 4 + beq t0, 1, got_l2_ways + li t3, 3 + beq t0, 2, got_l2_ways + li t3, 2 + li t3, 1 + +got_l2_ways: + dmul a1, t2, t3 /* Calculate cache size */ + dsll a1, 7 /* Ways * Sets * cache line sz (128) */ + daddiu a1, a1, -128 /* Adjust cache size for copy code */ + + /* Calculate size of U-Boot image */ + /* + * "uboot_end - _start" is not correct, as the image also + * includes the DTB appended to the end (OF_EMBED is deprecated). + * Lets use a defined max for now here. + */ + PTR_LI s5, CONFIG_BOARD_SIZE_LIMIT + + daddu t2, s5, s7 /* t2 = end address */ + daddiu t2, t2, 127 + ins t2, zero, 0, 7 /* Round up to cache line for memcpy */ + + slt t1, a1, s5 /* See if we're bigger than the L2 cache */ + bnez t1, l2_cache_too_small + nop + /* Address we plan to load at in the L2 cache */ + PTR_LI t9, CONFIG_OCTEON_L2_UBOOT_ADDR +# ifdef CONFIG_OCTEON_L2_MEMCPY_IN_CACHE + /* Enable all ways for PP0. Authentik ROM may have disabled these */ + PTR_LI a1, OCTEON_L2C_WPAR_PP0 + sd zero, 0(a1) + + /* Address to place our memcpy code */ + PTR_LI a0, CONFIG_OCTEON_L2_MEMCPY_ADDR + /* The following code writes a simple memcpy routine into the cache + * to copy ourself from flash into the L2 cache. This makes the + * memcpy routine a lot faster since each instruction can potentially + * require four read cycles to flash over the boot bus. + */ + /* Zero cache line in the L2 cache */ + zcb (a0) + synci 0(zero) + dli a1, 0xdd840000dd850008 /* ld a0, 0(t0); ld a1, 8(t0) */ + sd a1, 0(a0) + dli a1, 0xdd860010dd870018 /* ld a2, 16(t0); ld a3, 24(t0) */ + sd a1, 8(a0) + dli a1, 0xfda40000fda50008 /* sd a0, 0(t1); sd a1, 8(t1) */ + sd a1, 16(a0) + dli a1, 0xfda60010fda70018 /* sd a2, 16(t1); sd a3, 24(t1) */ + sd a1, 24(a0) + dli a1, 0x258c0020158efff6 /* addiu t0, 32; bne t0, t2, -40 */ + sd a1, 32(a0) + dli a1, 0x25ad002003e00008 /* addiu t1, 32; jr ra */ + sd a1, 40(a0) + sd zero, 48(a0) /* nop; nop */ + + /* Synchronize the caches */ + sync + synci 0(zero) + + move t0, s7 + move t1, t9 + + /* Do the memcpy operation in L2 cache to copy ourself from flash + * to the L2 cache. + */ + jalr a0 + nop + +# else + /* Copy ourself to the L2 cache from flash, 32 bytes at a time */ + /* This code is now written to the L2 cache using the code above */ +1: + ld a0, 0(t0) + ld a1, 8(t0) + ld a2, 16(t0) + ld a3, 24(t0) + sd a0, 0(t1) + sd a1, 8(t1) + sd a2, 16(t1) + sd a3, 24(t1) + addiu t0, 32 + bne t0, t2, 1b + addiu t1, 32 +# endif /* CONFIG_OCTEON_L2_MEMCPY_IN_CACHE */ + + /* Adjust the start address of U-Boot and the global pointer */ + subu t0, s7, t9 /* t0 = address difference */ + move s7, t9 /* Update physical address */ + move s2, t9 + sync + synci 0(zero) + + /* Now we branch to the L2 cache. We first get our PC then adjust it + */ + bal 3f + nop +3: + /* Don't add any instructions here! */ + subu t9, ra, t0 + /* Give ourself 16 bytes */ + addiu t9, 0x10 + + jal t9 /* Branch to address in L2 cache */ + + nop + nop + /* Add instructions after here */ + + move a7, s7 + + b uboot_in_ram + ori s4, 2 /* Running out of L2 cache */ + +l2_cache_too_small: /* We go here if we can't copy ourself to L2 */ +#endif /* CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2 */ + + /* This code is only executed if booting from flash. */ + /* For flash boot (_not_ RAM boot), we do a workaround for + * an LLM errata on CN38XX and CN58XX parts. + */ + +uboot_in_ram: + /* U-boot address is now in reg a7, and is 4 MByte aligned. + * (boot bus addressing has been adjusted to make this happen for flash, + * and for DRAM this alignment must be provided by the remote boot + * utility. + */ + /* See if we're in KSEG0 range, if so set EBASE register to handle + * exceptions. + */ + dli a1, 0x20000000 + bge a7, a1, 1f + nop + /* Convert our physical address to KSEG0 */ + PTR_LI a1, 0xffffffff80000000 + or a1, a1, a7 + mtc0 a1, CP0_EBASE +1: + /* U-boot now starts at 0xBFC00000. Use a single 4 MByte TLB mapping + * to map u-boot. + */ + move a0, a6 /* Virtual addr in a0 */ + dins a0, zero, 0, 16 /* Zero out offset bits */ + move a1, a7 /* Physical addr in a1 */ + + /* Now we need to remove the MIPS address space bits. For this we + * need to determine if it is a 32 bit compatibility address or not. + */ + + /* 'lowest' address in compatibility space */ + PTR_LI t0, 0xffffffff80000000 + dsubu t0, t0, a1 + bltz t0, compat_space + nop + + /* We have a xkphys address, so strip off top bit */ + b addr_fixup_done + dins a1, zero, 63, 1 + +compat_space: + PTR_LI a2, 0x1fffffff + and a1, a1, a2 /* Mask phy addr to remove address space bits */ + +addr_fixup_done: + /* Currenty the u-boot image size is limited to 4 MBytes. In order to + * support larger images the flash mapping will need to be changed to + * be able to access more than that before C code is run. Until that + * is done, we just use a 4 MByte mapping for the secondary cores as + * well. + */ + /* page size (only support 4 Meg binary size for now for core 0) + * This limitation is due to the fact that the boot vector is + * 0xBFC00000 which only makes 4MB available. Later more flash + * address space will be available after U-Boot has been copied to + * RAM. For now assume that it is in flash. + */ + li a2, 2*1024*1024 + + mfc0 a4, CP0_EBASE + andi a4, EBASE_CPUNUM /* get core */ + beqz a4, core_0_tlb + nop + + /* Now determine how big a mapping to use for secondary cores, + * which need to map all of u-boot + heap in DRAM + */ + /* Here we look at the alignment of the the physical address, + * and use the largest page size possible. In some cases + * this can result in an oversize mapping, but for secondary cores + * this mapping is very short lived. + */ + + /* Physical address in a1 */ + li a2, 1 +1: + sll a2, 1 + and a5, a1, a2 + beqz a5, 1b + nop + + /* a2 now contains largest page size we can use */ +core_0_tlb: + JAL(single_tlb_setup) + + /* Check if we're running from cache */ + bbit1 s4, 1, uboot_in_cache + nop + + /* If we are already running from ram, we don't need to muck + * with boot bus mappings. + */ + PTR_LI t2, 0xffffffffb0000000 + dsubu t2, s7 + /* See if our starting address is lower than the boot bus */ + bgez t2, uboot_in_ram2 /* If yes, booting from RAM */ + nop + +uboot_in_cache: +#if CONFIG_OCTEON_BIG_STACK_SIZE + /* The large stack is only for core 0. For all other cores we need to + * use the L1 cache otherwise the other cores will stomp on top of each + * other unless even more space is reserved for the stack space for + * each core. With potentially 96 cores this gets excessive. + */ + mfc0 v0, CP0_EBASE + andi a0, EBASE_CPUNUM + bnez a0, no_big_stack + nop + PTR_LA sp, big_stack_start + daddiu sp, -16 + +no_big_stack: +#endif + /* We now have the TLB set up, so we need to remap the boot bus. + * This is tricky, as we are running from flash, and will be changing + * the addressing of the flash. + */ + /* Enable movable boot bus region 0, at address 0x10000000 */ + PTR_LI a4, OCTEON_MIO_BOOT_BASE + dli a5, 0x81000000 /* EN + base address 0x11000000 */ + sd a5, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4) + + /* Copy code to that remaps the boot bus to movable region */ + sd zero, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) + + PTR_LA a6, change_boot_mappings + GETOFFSET(a5, change_boot_mappings); + daddu a5, a5, a6 + + /* The code is 16 bytes (2 DWORDS) */ + ld a7, 0(a5) + sd a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) + ld a7, 8(a5) + sd a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) + + /* Read from an RML register to ensure that the previous writes have + * completed before we branch to the movable region. + */ + ld zero, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4) + + /* Compute value for boot bus configuration register */ + /* Read region 0 config so we can _modify_ the base address field */ + PTR_LI a4, OCTEON_MIO_BOOT_REG_CFG0 /* region 0 config */ + ld a0, 0(a4) + dli a4, 0xf0000000 /* Mask off bits we want to save */ + and a4, a4, a0 + dli a0, 0x0fff0000 /* Force size to max */ + or a4, a4, a0 + + move a5, s6 + /* Convert to 64k blocks, as used by boot bus config */ + srl a5, 16 + li a6, 0x1fc0 /* 'normal' boot bus base config value */ + subu a6, a6, a5 /* Subtract offset */ + /* combine into register value to pass to boot bus routine */ + or a0, a4, a6 + + /* Branch there */ + PTR_LA a1, __mapped_continue_label + PTR_LI a2, OCTEON_MIO_BOOT_REG_CFG0 + /* If region 0 is not enabled we can skip it */ + ld a4, 0(a2) + bbit0 a4, 31, __mapped_continue_label + nop + li a4, 0x10000000 + j a4 + synci 0(zero) + + /* We never get here, as we go directly to __mapped_continue_label */ + break + + +uboot_in_ram2: + + /* Now jump to address in TLB mapped memory to continue execution */ + PTR_LA a4, __mapped_continue_label + synci 0(a4) + j a4 + nop + +__mapped_continue_label: + /* Check if we are core 0, if we are not then we need + * to vector to code in DRAM to do application setup, and + * skip the rest of the bootloader. Only core 0 runs the bootloader + * and sets up the tables that the other cores will use for + * configuration. + */ + mfc0 a0, CP0_EBASE + andi a0, EBASE_CPUNUM /* get core */ + /* if (__all_cores_are_equal==0 && core==0), + * then jump to execute BL on core 0; else 'go to next line' + * (core_0_cont1 is executed ONLY when k0=a0=0(core0_ID)) + */ + lw t0, __all_cores_are_equal + beq a0, t0, core_0_cont1 + nop + + /* other cores look up addr from dram */ + /* DRAM controller already set up by first core */ + li a1, (BOOT_VECTOR_NUM_WORDS * 4) + mul a0, a0, a1 + + /* Now find out the boot vector base address from the moveable boot + * bus region. + */ + + /* Get the address of the boot bus moveable region */ + PTR_LI t8, OCTEON_MIO_BOOT_BASE + ld t9, OCTEON_MIO_BOOT_LOC_CFG0_OFF(t8) + /* Make sure it's enabled */ + bbit0 t9, 31, invalid_boot_vector + dext t9, t9, 3, 24 + dsll t9, t9, 7 + /* Make address XKPHYS */ + li t0, 1 + dins t9, t0, 63, 1 + + ld t0, OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET(t9) + dli t1, OCTEON_BOOT_MOVEABLE_MAGIC1 + bne t0, t1, invalid_boot_vector + nop + + /* Load base address of boot vector table */ + ld t0, OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET(t9) + /* Add offset for core */ + daddu a1, t0, a0 + + mfc0 v0, CP0_STATUS + move v1, v0 + ins v1, zero, 19, 1 /* Clear NMI bit */ + mtc0 v1, CP0_STATUS + + /* Get app start function address */ + lw t9, 8(a1) + beqz t9, invalid_boot_vector + nop + + j t9 + lw k0, 12(a1) /* Load global data (deprecated) */ + +invalid_boot_vector: + wait + b invalid_boot_vector + nop + +__all_cores_are_equal: + /* The following .word tell if 'all_cores_are_equal' or core0 is special + * By default (for the first execution) the core0 should be special, + * in order to behave like the old(existing not-modified) bootloader + * and run the bootloader on core 0 to follow the existing design. + * However after that we make 'all_cores_equal' which allows to run SE + * applications on core0 like on any other core. NOTE that value written + * to '__all_cores_are_equal' should not match any core ID. + */ + .word 0 + +core_0_cont1: + li t0, 0xffffffff + sw t0, __all_cores_are_equal + /* From here on, only core 0 runs, other cores have branched + * away. + */ +#ifdef CONFIG_MIPS_INIT_STACK_IN_SRAM + /* Set up initial stack and global data */ + setup_stack_gd +# ifdef CONFIG_DEBUG_UART + PTR_LA t9, debug_uart_init + jalr t9 + nop +# endif +#endif + move a0, zero # a0 <-- boot_flags = 0 + PTR_LA t9, board_init_f + + jr t9 + move ra, zero + END(_start) + + .balign 8 + .globl single_tlb_setup + .ent single_tlb_setup + /* Sets up a single TLB entry. Virtual/physical addresses + * must be properly aligned. + * a0 Virtual address + * a1 Physical address + * a2 page (_not_ mapping) size + */ +single_tlb_setup: + /* Determine the number of TLB entries available, and + * use the top one. + */ + mfc0 a3, CP0_CONFIG1 + dext a3, a3, 25, 6 /* a3 now has the max mmu entry index */ + mfc0 a5, CP0_CONFIG3 /* Check if config4 reg present */ + bbit0 a5, 31, single_tlb_setup_cont + nop + mfc0 a5, CP0_CONFIG4 + bbit0 a5, 14, single_tlb_setup_cont /* check config4[MMUExtDef] */ + nop + /* append config4[MMUSizeExt] to most significant bit of + * config1[MMUSize-1] + */ + dins a3, a5, 6, 8 + and a3, a3, 0x3fff /* a3 now includes max entries for cn6xxx */ + +single_tlb_setup_cont: + + /* Format physical address for entry low */ + nop + dsrl a1, a1, 12 + dsll a1, a1, 6 + ori a1, a1, 0x7 /* set DVG bits */ + + move a4, a2 + daddu a5, a4, a4 /* mapping size */ + dsll a6, a4, 1 + daddiu a6, a6, -1 /* pagemask */ + dsrl a4, a4, 6 /* adjust for adding with entrylo */ + + /* Now set up mapping */ + mtc0 a6, CP0_PAGEMASK + mtc0 a3, CP0_INDEX + + dmtc0 a1, CP0_ENTRYLO0 + daddu a1, a1, a4 + + dmtc0 a1, CP0_ENTRYLO1 + daddu a1, a1, a4 + + dmtc0 a0, CP0_ENTRYHI + daddu a0, a0, a5 + + ehb + tlbwi + jr ra + nop + .end single_tlb_setup + + +/** + * This code is moved to a movable boot bus region, + * and it is responsible for changing the flash mappings and + * jumping to run from the TLB mapped address. + * + * @param a0 New address for boot bus region 0 + * @param a1 Address to branch to afterwards + * @param a2 Address of MIO_BOOT_REG_CFG0 + */ + .balign 8 +change_boot_mappings: + sd a0, 0(a2) + sync + j a1 /* Jump to new TLB mapped location */ + synci 0(zero) + +/* If we need a large stack, allocate it here. */ +#if CONFIG_OCTEON_BIG_STACK_SIZE + /* Allocate the stack here so it's in L2 cache or DRAM */ + .balign 16 +big_stack_end: + .skip CONFIG_OCTEON_BIG_STACK_SIZE, 0 +big_stack_start: + .dword 0 +#endif