Message ID | 1425652482-31411-2-git-send-email-ard.biesheuvel@linaro.org |
---|---|
State | New |
Headers | show |
On Fri, Mar 06, 2015 at 03:34:39PM +0100, Ard Biesheuvel wrote: > The page size and the number of translation levels, and hence the supported > virtual address range, are build-time configurables on arm64 whose optimal > values are use case dependent. However, in the current implementation, if > the system's RAM is located at a very high offset, the virtual address range > needs to reflect that merely because the identity mapping, which is only used > to enable or disable the MMU, requires the extended virtual range to map the > physical memory at an equal virtual offset. > > This patch relaxes that requirement, by increasing the number of translation > levels for the identity mapping only, and only when actually needed, i.e., > when system RAM's offset is found to be out of reach at runtime. > > Tested-by: Laura Abbott <lauraa@codeaurora.org> > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> > Tested-by: Marc Zyngier <marc.zyngier@arm.com> > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> > --- > arch/arm64/include/asm/mmu_context.h | 43 ++++++++++++++++++++++++++++++++++ > arch/arm64/include/asm/page.h | 6 +++-- > arch/arm64/include/asm/pgtable-hwdef.h | 7 +++++- > arch/arm64/kernel/head.S | 38 ++++++++++++++++++++++++++++++ > arch/arm64/kernel/smp.c | 1 + > arch/arm64/mm/mmu.c | 7 +++++- > arch/arm64/mm/proc-macros.S | 11 +++++++++ > arch/arm64/mm/proc.S | 3 +++ > 8 files changed, 112 insertions(+), 4 deletions(-) > > diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h > index a9eee33dfa62..ecf2d060036b 100644 > --- a/arch/arm64/include/asm/mmu_context.h > +++ b/arch/arm64/include/asm/mmu_context.h > @@ -64,6 +64,49 @@ static inline void cpu_set_reserved_ttbr0(void) > : "r" (ttbr)); > } > > +/* > + * TCR.T0SZ value to use when the ID map is active. Usually equals > + * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in > + * physical memory, in which case it will be smaller. > + */ > +extern u64 idmap_t0sz; > + > +static inline bool __cpu_uses_extended_idmap(void) > +{ > + return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) && > + unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); > +} > + > +static inline void __cpu_set_tcr_t0sz(u64 t0sz) > +{ > + unsigned long tcr; > + > + if (__cpu_uses_extended_idmap()) > + asm volatile ( > + " mrs %0, tcr_el1 ;" > + " bfi %0, %1, %2, %3 ;" > + " msr tcr_el1, %0 ;" > + " isb" > + : "=&r" (tcr) > + : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); > +} > + > +/* > + * Set TCR.T0SZ to the value appropriate for activating the identity map. > + */ > +static inline void cpu_set_idmap_tcr_t0sz(void) > +{ > + __cpu_set_tcr_t0sz(idmap_t0sz); > +} > + > +/* > + * Set TCR.T0SZ to its default value (based on VA_BITS) > + */ > +static inline void cpu_set_default_tcr_t0sz(void) > +{ > + __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)); > +} > + > static inline void switch_new_context(struct mm_struct *mm) > { > unsigned long flags; > diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h > index 22b16232bd60..3d02b1869eb8 100644 > --- a/arch/arm64/include/asm/page.h > +++ b/arch/arm64/include/asm/page.h > @@ -33,7 +33,9 @@ > * image. Both require pgd, pud (4 levels only) and pmd tables to (section) > * map the kernel. With the 64K page configuration, swapper and idmap need to > * map to pte level. The swapper also maps the FDT (see __create_page_tables > - * for more information). > + * for more information). Note that the number of ID map translation levels > + * could be increased on the fly if system RAM is out of reach for the default > + * VA range, so 3 pages are reserved in all cases. > */ > #ifdef CONFIG_ARM64_64K_PAGES > #define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS) > @@ -42,7 +44,7 @@ > #endif > > #define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) > -#define IDMAP_DIR_SIZE (SWAPPER_DIR_SIZE) > +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) > > #ifndef __ASSEMBLY__ > > diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h > index 5f930cc9ea83..847e864202cc 100644 > --- a/arch/arm64/include/asm/pgtable-hwdef.h > +++ b/arch/arm64/include/asm/pgtable-hwdef.h > @@ -143,7 +143,12 @@ > /* > * TCR flags. > */ > -#define TCR_TxSZ(x) (((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0)) > +#define TCR_T0SZ_OFFSET 0 > +#define TCR_T1SZ_OFFSET 16 > +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) > +#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET) > +#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) > +#define TCR_TxSZ_WIDTH 6 > #define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24)) > #define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24)) > #define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24)) > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S > index 8ce88e08c030..a3612eadab3c 100644 > --- a/arch/arm64/kernel/head.S > +++ b/arch/arm64/kernel/head.S > @@ -387,6 +387,44 @@ __create_page_tables: > mov x0, x25 // idmap_pg_dir > ldr x3, =KERNEL_START > add x3, x3, x28 // __pa(KERNEL_START) > + > +#ifndef CONFIG_ARM64_VA_BITS_48 > +#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) > +#define EXTRA_PTRS (1 << (48 - EXTRA_SHIFT)) How does this math work exactly? I also had to look at the create_pgd_entry macros to understand that these mean the shift for the 'extra' pgtable, and not the extra amount of shifts compared to PGDIR_SHIFT. Not sure if that warrants a comment? > + > + /* > + * If VA_BITS < 48, it may be too small to allow for an ID mapping to be > + * created that covers system RAM if that is located sufficiently high > + * in the physical address space. So for the ID map, use an extended > + * virtual range in that case, by configuring an additional translation > + * level. > + * First, we have to verify our assumption that the current value of > + * VA_BITS was chosen such that all translation levels are fully > + * utilised, and that lowering T0SZ will always result in an additional > + * translation level to be configured. > + */ > +#if VA_BITS != EXTRA_SHIFT > +#error "Mismatch between VA_BITS and page size/number of translation levels" > +#endif > + > + /* > + * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the > + * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used), > + * this number conveniently equals the number of leading zeroes in > + * the physical address of KERNEL_END. > + */ > + adrp x5, KERNEL_END > + clz x5, x5 > + cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? > + b.ge 1f // .. then skip additional level > + > + adrp x6, idmap_t0sz > + str x5, [x6, :lo12:idmap_t0sz] > + > + create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6 can you explain me how the subsequent call to create_pgd_entry with the same tbl (x0) value ends up passing the right pointer from the extra level to the pgd to the block mappings? > +1: > +#endif > + > create_pgd_entry x0, x3, x5, x6 > ldr x6, =KERNEL_END > mov x5, x3 // __pa(KERNEL_START) > diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c > index 328b8ce4b007..74554dfcce73 100644 > --- a/arch/arm64/kernel/smp.c > +++ b/arch/arm64/kernel/smp.c > @@ -151,6 +151,7 @@ asmlinkage void secondary_start_kernel(void) > */ > cpu_set_reserved_ttbr0(); > flush_tlb_all(); > + cpu_set_default_tcr_t0sz(); > > preempt_disable(); > trace_hardirqs_off(); > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index c6daaf6c6f97..c4f60393383e 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -40,6 +40,8 @@ > > #include "mm.h" > > +u64 idmap_t0sz = TCR_T0SZ(VA_BITS); > + > /* > * Empty_zero_page is a special page that is used for zero-initialized data > * and COW. > @@ -454,6 +456,7 @@ void __init paging_init(void) > */ > cpu_set_reserved_ttbr0(); > flush_tlb_all(); > + cpu_set_default_tcr_t0sz(); > } > > /* > @@ -461,8 +464,10 @@ void __init paging_init(void) > */ > void setup_mm_for_reboot(void) > { > - cpu_switch_mm(idmap_pg_dir, &init_mm); > + cpu_set_reserved_ttbr0(); > flush_tlb_all(); > + cpu_set_idmap_tcr_t0sz(); > + cpu_switch_mm(idmap_pg_dir, &init_mm); > } > > /* > diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S > index 005d29e2977d..c17fdd6a19bc 100644 > --- a/arch/arm64/mm/proc-macros.S > +++ b/arch/arm64/mm/proc-macros.S > @@ -52,3 +52,14 @@ > mov \reg, #4 // bytes per word > lsl \reg, \reg, \tmp // actual cache line size > .endm > + > +/* > + * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map > + */ > + .macro tcr_set_idmap_t0sz, valreg, tmpreg > +#ifndef CONFIG_ARM64_VA_BITS_48 > + adrp \tmpreg, idmap_t0sz > + ldr \tmpreg, [\tmpreg, #:lo12:idmap_t0sz] > + bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH > +#endif > + .endm > diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S > index 28eebfb6af76..cdd754e19b9b 100644 > --- a/arch/arm64/mm/proc.S > +++ b/arch/arm64/mm/proc.S > @@ -156,6 +156,7 @@ ENTRY(cpu_do_resume) > msr cpacr_el1, x6 > msr ttbr0_el1, x1 > msr ttbr1_el1, x7 > + tcr_set_idmap_t0sz x8, x7 > msr tcr_el1, x8 > msr vbar_el1, x9 > msr mdscr_el1, x10 > @@ -233,6 +234,8 @@ ENTRY(__cpu_setup) > */ > ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ > TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 > + tcr_set_idmap_t0sz x10, x9 > + > /* > * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in > * TCR_EL1. > -- > 1.8.3.2 >
On 16 March 2015 at 15:28, Christoffer Dall <christoffer.dall@linaro.org> wrote: > On Fri, Mar 06, 2015 at 03:34:39PM +0100, Ard Biesheuvel wrote: >> The page size and the number of translation levels, and hence the supported >> virtual address range, are build-time configurables on arm64 whose optimal >> values are use case dependent. However, in the current implementation, if >> the system's RAM is located at a very high offset, the virtual address range >> needs to reflect that merely because the identity mapping, which is only used >> to enable or disable the MMU, requires the extended virtual range to map the >> physical memory at an equal virtual offset. >> >> This patch relaxes that requirement, by increasing the number of translation >> levels for the identity mapping only, and only when actually needed, i.e., >> when system RAM's offset is found to be out of reach at runtime. >> >> Tested-by: Laura Abbott <lauraa@codeaurora.org> >> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> >> Tested-by: Marc Zyngier <marc.zyngier@arm.com> >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> >> --- >> arch/arm64/include/asm/mmu_context.h | 43 ++++++++++++++++++++++++++++++++++ >> arch/arm64/include/asm/page.h | 6 +++-- >> arch/arm64/include/asm/pgtable-hwdef.h | 7 +++++- >> arch/arm64/kernel/head.S | 38 ++++++++++++++++++++++++++++++ >> arch/arm64/kernel/smp.c | 1 + >> arch/arm64/mm/mmu.c | 7 +++++- >> arch/arm64/mm/proc-macros.S | 11 +++++++++ >> arch/arm64/mm/proc.S | 3 +++ >> 8 files changed, 112 insertions(+), 4 deletions(-) >> >> diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h >> index a9eee33dfa62..ecf2d060036b 100644 >> --- a/arch/arm64/include/asm/mmu_context.h >> +++ b/arch/arm64/include/asm/mmu_context.h >> @@ -64,6 +64,49 @@ static inline void cpu_set_reserved_ttbr0(void) >> : "r" (ttbr)); >> } >> >> +/* >> + * TCR.T0SZ value to use when the ID map is active. Usually equals >> + * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in >> + * physical memory, in which case it will be smaller. >> + */ >> +extern u64 idmap_t0sz; >> + >> +static inline bool __cpu_uses_extended_idmap(void) >> +{ >> + return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) && >> + unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); >> +} >> + >> +static inline void __cpu_set_tcr_t0sz(u64 t0sz) >> +{ >> + unsigned long tcr; >> + >> + if (__cpu_uses_extended_idmap()) >> + asm volatile ( >> + " mrs %0, tcr_el1 ;" >> + " bfi %0, %1, %2, %3 ;" >> + " msr tcr_el1, %0 ;" >> + " isb" >> + : "=&r" (tcr) >> + : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); >> +} >> + >> +/* >> + * Set TCR.T0SZ to the value appropriate for activating the identity map. >> + */ >> +static inline void cpu_set_idmap_tcr_t0sz(void) >> +{ >> + __cpu_set_tcr_t0sz(idmap_t0sz); >> +} >> + >> +/* >> + * Set TCR.T0SZ to its default value (based on VA_BITS) >> + */ >> +static inline void cpu_set_default_tcr_t0sz(void) >> +{ >> + __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)); >> +} >> + >> static inline void switch_new_context(struct mm_struct *mm) >> { >> unsigned long flags; >> diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h >> index 22b16232bd60..3d02b1869eb8 100644 >> --- a/arch/arm64/include/asm/page.h >> +++ b/arch/arm64/include/asm/page.h >> @@ -33,7 +33,9 @@ >> * image. Both require pgd, pud (4 levels only) and pmd tables to (section) >> * map the kernel. With the 64K page configuration, swapper and idmap need to >> * map to pte level. The swapper also maps the FDT (see __create_page_tables >> - * for more information). >> + * for more information). Note that the number of ID map translation levels >> + * could be increased on the fly if system RAM is out of reach for the default >> + * VA range, so 3 pages are reserved in all cases. >> */ >> #ifdef CONFIG_ARM64_64K_PAGES >> #define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS) >> @@ -42,7 +44,7 @@ >> #endif >> >> #define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) >> -#define IDMAP_DIR_SIZE (SWAPPER_DIR_SIZE) >> +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) >> >> #ifndef __ASSEMBLY__ >> >> diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h >> index 5f930cc9ea83..847e864202cc 100644 >> --- a/arch/arm64/include/asm/pgtable-hwdef.h >> +++ b/arch/arm64/include/asm/pgtable-hwdef.h >> @@ -143,7 +143,12 @@ >> /* >> * TCR flags. >> */ >> -#define TCR_TxSZ(x) (((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0)) >> +#define TCR_T0SZ_OFFSET 0 >> +#define TCR_T1SZ_OFFSET 16 >> +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) >> +#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET) >> +#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) >> +#define TCR_TxSZ_WIDTH 6 >> #define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24)) >> #define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24)) >> #define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24)) >> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S >> index 8ce88e08c030..a3612eadab3c 100644 >> --- a/arch/arm64/kernel/head.S >> +++ b/arch/arm64/kernel/head.S >> @@ -387,6 +387,44 @@ __create_page_tables: >> mov x0, x25 // idmap_pg_dir >> ldr x3, =KERNEL_START >> add x3, x3, x28 // __pa(KERNEL_START) >> + >> +#ifndef CONFIG_ARM64_VA_BITS_48 >> +#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) >> +#define EXTRA_PTRS (1 << (48 - EXTRA_SHIFT)) > > How does this math work exactly? > PAGE_SHIFT - 3 is the number of bits translated at each level. EXTRA_SHIFT is the number of VA low bits that is translated by the higher tables. EXTRA_PTRS is the size of the root table (in 64-bit words) > I also had to look at the create_pgd_entry macros to understand that these > mean the shift for the 'extra' pgtable, and not the extra amount of > shifts compared to PGDIR_SHIFT. Not sure if that warrants a comment? > I am not sure if I understand what 'the extra amount of shifts' means, so I should at least add a comment that that's not it :-) But yes, I can clarify that. > >> + >> + /* >> + * If VA_BITS < 48, it may be too small to allow for an ID mapping to be >> + * created that covers system RAM if that is located sufficiently high >> + * in the physical address space. So for the ID map, use an extended >> + * virtual range in that case, by configuring an additional translation >> + * level. >> + * First, we have to verify our assumption that the current value of >> + * VA_BITS was chosen such that all translation levels are fully >> + * utilised, and that lowering T0SZ will always result in an additional >> + * translation level to be configured. >> + */ >> +#if VA_BITS != EXTRA_SHIFT >> +#error "Mismatch between VA_BITS and page size/number of translation levels" >> +#endif >> + >> + /* >> + * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the >> + * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used), >> + * this number conveniently equals the number of leading zeroes in >> + * the physical address of KERNEL_END. >> + */ >> + adrp x5, KERNEL_END >> + clz x5, x5 >> + cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? >> + b.ge 1f // .. then skip additional level >> + >> + adrp x6, idmap_t0sz >> + str x5, [x6, :lo12:idmap_t0sz] >> + >> + create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6 > > can you explain me how the subsequent call to create_pgd_entry with the > same tbl (x0) value ends up passing the right pointer from the extra > level to the pgd to the block mappings? > x0 is not preserved by the macro but incremented by 1 page. Look at create_pgd_entry: it calls create_table_entry twice with the same \tbl register, but each call sets another level. >> +1: >> +#endif >> + >> create_pgd_entry x0, x3, x5, x6 >> ldr x6, =KERNEL_END >> mov x5, x3 // __pa(KERNEL_START) >> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c >> index 328b8ce4b007..74554dfcce73 100644 >> --- a/arch/arm64/kernel/smp.c >> +++ b/arch/arm64/kernel/smp.c >> @@ -151,6 +151,7 @@ asmlinkage void secondary_start_kernel(void) >> */ >> cpu_set_reserved_ttbr0(); >> flush_tlb_all(); >> + cpu_set_default_tcr_t0sz(); >> >> preempt_disable(); >> trace_hardirqs_off(); >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c >> index c6daaf6c6f97..c4f60393383e 100644 >> --- a/arch/arm64/mm/mmu.c >> +++ b/arch/arm64/mm/mmu.c >> @@ -40,6 +40,8 @@ >> >> #include "mm.h" >> >> +u64 idmap_t0sz = TCR_T0SZ(VA_BITS); >> + >> /* >> * Empty_zero_page is a special page that is used for zero-initialized data >> * and COW. >> @@ -454,6 +456,7 @@ void __init paging_init(void) >> */ >> cpu_set_reserved_ttbr0(); >> flush_tlb_all(); >> + cpu_set_default_tcr_t0sz(); >> } >> >> /* >> @@ -461,8 +464,10 @@ void __init paging_init(void) >> */ >> void setup_mm_for_reboot(void) >> { >> - cpu_switch_mm(idmap_pg_dir, &init_mm); >> + cpu_set_reserved_ttbr0(); >> flush_tlb_all(); >> + cpu_set_idmap_tcr_t0sz(); >> + cpu_switch_mm(idmap_pg_dir, &init_mm); >> } >> >> /* >> diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S >> index 005d29e2977d..c17fdd6a19bc 100644 >> --- a/arch/arm64/mm/proc-macros.S >> +++ b/arch/arm64/mm/proc-macros.S >> @@ -52,3 +52,14 @@ >> mov \reg, #4 // bytes per word >> lsl \reg, \reg, \tmp // actual cache line size >> .endm >> + >> +/* >> + * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map >> + */ >> + .macro tcr_set_idmap_t0sz, valreg, tmpreg >> +#ifndef CONFIG_ARM64_VA_BITS_48 >> + adrp \tmpreg, idmap_t0sz >> + ldr \tmpreg, [\tmpreg, #:lo12:idmap_t0sz] >> + bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH >> +#endif >> + .endm >> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S >> index 28eebfb6af76..cdd754e19b9b 100644 >> --- a/arch/arm64/mm/proc.S >> +++ b/arch/arm64/mm/proc.S >> @@ -156,6 +156,7 @@ ENTRY(cpu_do_resume) >> msr cpacr_el1, x6 >> msr ttbr0_el1, x1 >> msr ttbr1_el1, x7 >> + tcr_set_idmap_t0sz x8, x7 >> msr tcr_el1, x8 >> msr vbar_el1, x9 >> msr mdscr_el1, x10 >> @@ -233,6 +234,8 @@ ENTRY(__cpu_setup) >> */ >> ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ >> TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 >> + tcr_set_idmap_t0sz x10, x9 >> + >> /* >> * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in >> * TCR_EL1. >> -- >> 1.8.3.2 >>
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index a9eee33dfa62..ecf2d060036b 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -64,6 +64,49 @@ static inline void cpu_set_reserved_ttbr0(void) : "r" (ttbr)); } +/* + * TCR.T0SZ value to use when the ID map is active. Usually equals + * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in + * physical memory, in which case it will be smaller. + */ +extern u64 idmap_t0sz; + +static inline bool __cpu_uses_extended_idmap(void) +{ + return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) && + unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); +} + +static inline void __cpu_set_tcr_t0sz(u64 t0sz) +{ + unsigned long tcr; + + if (__cpu_uses_extended_idmap()) + asm volatile ( + " mrs %0, tcr_el1 ;" + " bfi %0, %1, %2, %3 ;" + " msr tcr_el1, %0 ;" + " isb" + : "=&r" (tcr) + : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); +} + +/* + * Set TCR.T0SZ to the value appropriate for activating the identity map. + */ +static inline void cpu_set_idmap_tcr_t0sz(void) +{ + __cpu_set_tcr_t0sz(idmap_t0sz); +} + +/* + * Set TCR.T0SZ to its default value (based on VA_BITS) + */ +static inline void cpu_set_default_tcr_t0sz(void) +{ + __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)); +} + static inline void switch_new_context(struct mm_struct *mm) { unsigned long flags; diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 22b16232bd60..3d02b1869eb8 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -33,7 +33,9 @@ * image. Both require pgd, pud (4 levels only) and pmd tables to (section) * map the kernel. With the 64K page configuration, swapper and idmap need to * map to pte level. The swapper also maps the FDT (see __create_page_tables - * for more information). + * for more information). Note that the number of ID map translation levels + * could be increased on the fly if system RAM is out of reach for the default + * VA range, so 3 pages are reserved in all cases. */ #ifdef CONFIG_ARM64_64K_PAGES #define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS) @@ -42,7 +44,7 @@ #endif #define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) -#define IDMAP_DIR_SIZE (SWAPPER_DIR_SIZE) +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 5f930cc9ea83..847e864202cc 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -143,7 +143,12 @@ /* * TCR flags. */ -#define TCR_TxSZ(x) (((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0)) +#define TCR_T0SZ_OFFSET 0 +#define TCR_T1SZ_OFFSET 16 +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) +#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET) +#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) +#define TCR_TxSZ_WIDTH 6 #define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24)) #define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24)) #define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24)) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 8ce88e08c030..a3612eadab3c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -387,6 +387,44 @@ __create_page_tables: mov x0, x25 // idmap_pg_dir ldr x3, =KERNEL_START add x3, x3, x28 // __pa(KERNEL_START) + +#ifndef CONFIG_ARM64_VA_BITS_48 +#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) +#define EXTRA_PTRS (1 << (48 - EXTRA_SHIFT)) + + /* + * If VA_BITS < 48, it may be too small to allow for an ID mapping to be + * created that covers system RAM if that is located sufficiently high + * in the physical address space. So for the ID map, use an extended + * virtual range in that case, by configuring an additional translation + * level. + * First, we have to verify our assumption that the current value of + * VA_BITS was chosen such that all translation levels are fully + * utilised, and that lowering T0SZ will always result in an additional + * translation level to be configured. + */ +#if VA_BITS != EXTRA_SHIFT +#error "Mismatch between VA_BITS and page size/number of translation levels" +#endif + + /* + * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the + * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used), + * this number conveniently equals the number of leading zeroes in + * the physical address of KERNEL_END. + */ + adrp x5, KERNEL_END + clz x5, x5 + cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? + b.ge 1f // .. then skip additional level + + adrp x6, idmap_t0sz + str x5, [x6, :lo12:idmap_t0sz] + + create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6 +1: +#endif + create_pgd_entry x0, x3, x5, x6 ldr x6, =KERNEL_END mov x5, x3 // __pa(KERNEL_START) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 328b8ce4b007..74554dfcce73 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -151,6 +151,7 @@ asmlinkage void secondary_start_kernel(void) */ cpu_set_reserved_ttbr0(); flush_tlb_all(); + cpu_set_default_tcr_t0sz(); preempt_disable(); trace_hardirqs_off(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c6daaf6c6f97..c4f60393383e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -40,6 +40,8 @@ #include "mm.h" +u64 idmap_t0sz = TCR_T0SZ(VA_BITS); + /* * Empty_zero_page is a special page that is used for zero-initialized data * and COW. @@ -454,6 +456,7 @@ void __init paging_init(void) */ cpu_set_reserved_ttbr0(); flush_tlb_all(); + cpu_set_default_tcr_t0sz(); } /* @@ -461,8 +464,10 @@ void __init paging_init(void) */ void setup_mm_for_reboot(void) { - cpu_switch_mm(idmap_pg_dir, &init_mm); + cpu_set_reserved_ttbr0(); flush_tlb_all(); + cpu_set_idmap_tcr_t0sz(); + cpu_switch_mm(idmap_pg_dir, &init_mm); } /* diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S index 005d29e2977d..c17fdd6a19bc 100644 --- a/arch/arm64/mm/proc-macros.S +++ b/arch/arm64/mm/proc-macros.S @@ -52,3 +52,14 @@ mov \reg, #4 // bytes per word lsl \reg, \reg, \tmp // actual cache line size .endm + +/* + * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map + */ + .macro tcr_set_idmap_t0sz, valreg, tmpreg +#ifndef CONFIG_ARM64_VA_BITS_48 + adrp \tmpreg, idmap_t0sz + ldr \tmpreg, [\tmpreg, #:lo12:idmap_t0sz] + bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH +#endif + .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 28eebfb6af76..cdd754e19b9b 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -156,6 +156,7 @@ ENTRY(cpu_do_resume) msr cpacr_el1, x6 msr ttbr0_el1, x1 msr ttbr1_el1, x7 + tcr_set_idmap_t0sz x8, x7 msr tcr_el1, x8 msr vbar_el1, x9 msr mdscr_el1, x10 @@ -233,6 +234,8 @@ ENTRY(__cpu_setup) */ ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 + tcr_set_idmap_t0sz x10, x9 + /* * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in * TCR_EL1.