diff mbox

[roundup,1/4] arm64: mm: increase VA range of identity map

Message ID 1425652482-31411-2-git-send-email-ard.biesheuvel@linaro.org
State New
Headers show

Commit Message

Ard Biesheuvel March 6, 2015, 2:34 p.m. UTC
The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.

This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.

Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/include/asm/mmu_context.h   | 43 ++++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/page.h          |  6 +++--
 arch/arm64/include/asm/pgtable-hwdef.h |  7 +++++-
 arch/arm64/kernel/head.S               | 38 ++++++++++++++++++++++++++++++
 arch/arm64/kernel/smp.c                |  1 +
 arch/arm64/mm/mmu.c                    |  7 +++++-
 arch/arm64/mm/proc-macros.S            | 11 +++++++++
 arch/arm64/mm/proc.S                   |  3 +++
 8 files changed, 112 insertions(+), 4 deletions(-)

Comments

Christoffer Dall March 16, 2015, 2:28 p.m. UTC | #1
On Fri, Mar 06, 2015 at 03:34:39PM +0100, Ard Biesheuvel wrote:
> The page size and the number of translation levels, and hence the supported
> virtual address range, are build-time configurables on arm64 whose optimal
> values are use case dependent. However, in the current implementation, if
> the system's RAM is located at a very high offset, the virtual address range
> needs to reflect that merely because the identity mapping, which is only used
> to enable or disable the MMU, requires the extended virtual range to map the
> physical memory at an equal virtual offset.
> 
> This patch relaxes that requirement, by increasing the number of translation
> levels for the identity mapping only, and only when actually needed, i.e.,
> when system RAM's offset is found to be out of reach at runtime.
> 
> Tested-by: Laura Abbott <lauraa@codeaurora.org>
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> Tested-by: Marc Zyngier <marc.zyngier@arm.com>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
>  arch/arm64/include/asm/mmu_context.h   | 43 ++++++++++++++++++++++++++++++++++
>  arch/arm64/include/asm/page.h          |  6 +++--
>  arch/arm64/include/asm/pgtable-hwdef.h |  7 +++++-
>  arch/arm64/kernel/head.S               | 38 ++++++++++++++++++++++++++++++
>  arch/arm64/kernel/smp.c                |  1 +
>  arch/arm64/mm/mmu.c                    |  7 +++++-
>  arch/arm64/mm/proc-macros.S            | 11 +++++++++
>  arch/arm64/mm/proc.S                   |  3 +++
>  8 files changed, 112 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
> index a9eee33dfa62..ecf2d060036b 100644
> --- a/arch/arm64/include/asm/mmu_context.h
> +++ b/arch/arm64/include/asm/mmu_context.h
> @@ -64,6 +64,49 @@ static inline void cpu_set_reserved_ttbr0(void)
>  	: "r" (ttbr));
>  }
>  
> +/*
> + * TCR.T0SZ value to use when the ID map is active. Usually equals
> + * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
> + * physical memory, in which case it will be smaller.
> + */
> +extern u64 idmap_t0sz;
> +
> +static inline bool __cpu_uses_extended_idmap(void)
> +{
> +	return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) &&
> +		unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
> +}
> +
> +static inline void __cpu_set_tcr_t0sz(u64 t0sz)
> +{
> +	unsigned long tcr;
> +
> +	if (__cpu_uses_extended_idmap())
> +		asm volatile (
> +		"	mrs	%0, tcr_el1	;"
> +		"	bfi	%0, %1, %2, %3	;"
> +		"	msr	tcr_el1, %0	;"
> +		"	isb"
> +		: "=&r" (tcr)
> +		: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
> +}
> +
> +/*
> + * Set TCR.T0SZ to the value appropriate for activating the identity map.
> + */
> +static inline void cpu_set_idmap_tcr_t0sz(void)
> +{
> +	__cpu_set_tcr_t0sz(idmap_t0sz);
> +}
> +
> +/*
> + * Set TCR.T0SZ to its default value (based on VA_BITS)
> + */
> +static inline void cpu_set_default_tcr_t0sz(void)
> +{
> +	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
> +}
> +
>  static inline void switch_new_context(struct mm_struct *mm)
>  {
>  	unsigned long flags;
> diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
> index 22b16232bd60..3d02b1869eb8 100644
> --- a/arch/arm64/include/asm/page.h
> +++ b/arch/arm64/include/asm/page.h
> @@ -33,7 +33,9 @@
>   * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
>   * map the kernel. With the 64K page configuration, swapper and idmap need to
>   * map to pte level. The swapper also maps the FDT (see __create_page_tables
> - * for more information).
> + * for more information). Note that the number of ID map translation levels
> + * could be increased on the fly if system RAM is out of reach for the default
> + * VA range, so 3 pages are reserved in all cases.
>   */
>  #ifdef CONFIG_ARM64_64K_PAGES
>  #define SWAPPER_PGTABLE_LEVELS	(CONFIG_ARM64_PGTABLE_LEVELS)
> @@ -42,7 +44,7 @@
>  #endif
>  
>  #define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
> -#define IDMAP_DIR_SIZE		(SWAPPER_DIR_SIZE)
> +#define IDMAP_DIR_SIZE		(3 * PAGE_SIZE)
>  
>  #ifndef __ASSEMBLY__
>  
> diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
> index 5f930cc9ea83..847e864202cc 100644
> --- a/arch/arm64/include/asm/pgtable-hwdef.h
> +++ b/arch/arm64/include/asm/pgtable-hwdef.h
> @@ -143,7 +143,12 @@
>  /*
>   * TCR flags.
>   */
> -#define TCR_TxSZ(x)		(((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0))
> +#define TCR_T0SZ_OFFSET		0
> +#define TCR_T1SZ_OFFSET		16
> +#define TCR_T0SZ(x)		((UL(64) - (x)) << TCR_T0SZ_OFFSET)
> +#define TCR_T1SZ(x)		((UL(64) - (x)) << TCR_T1SZ_OFFSET)
> +#define TCR_TxSZ(x)		(TCR_T0SZ(x) | TCR_T1SZ(x))
> +#define TCR_TxSZ_WIDTH		6
>  #define TCR_IRGN_NC		((UL(0) << 8) | (UL(0) << 24))
>  #define TCR_IRGN_WBWA		((UL(1) << 8) | (UL(1) << 24))
>  #define TCR_IRGN_WT		((UL(2) << 8) | (UL(2) << 24))
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 8ce88e08c030..a3612eadab3c 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -387,6 +387,44 @@ __create_page_tables:
>  	mov	x0, x25				// idmap_pg_dir
>  	ldr	x3, =KERNEL_START
>  	add	x3, x3, x28			// __pa(KERNEL_START)
> +
> +#ifndef CONFIG_ARM64_VA_BITS_48
> +#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
> +#define EXTRA_PTRS	(1 << (48 - EXTRA_SHIFT))

How does this math work exactly?

I also had to look at the create_pgd_entry macros to understand that these
mean the shift for the 'extra' pgtable, and not the extra amount of
shifts compared to PGDIR_SHIFT.  Not sure if that warrants a comment?


> +
> +	/*
> +	 * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
> +	 * created that covers system RAM if that is located sufficiently high
> +	 * in the physical address space. So for the ID map, use an extended
> +	 * virtual range in that case, by configuring an additional translation
> +	 * level.
> +	 * First, we have to verify our assumption that the current value of
> +	 * VA_BITS was chosen such that all translation levels are fully
> +	 * utilised, and that lowering T0SZ will always result in an additional
> +	 * translation level to be configured.
> +	 */
> +#if VA_BITS != EXTRA_SHIFT
> +#error "Mismatch between VA_BITS and page size/number of translation levels"
> +#endif
> +
> +	/*
> +	 * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
> +	 * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used),
> +	 * this number conveniently equals the number of leading zeroes in
> +	 * the physical address of KERNEL_END.
> +	 */
> +	adrp	x5, KERNEL_END
> +	clz	x5, x5
> +	cmp	x5, TCR_T0SZ(VA_BITS)	// default T0SZ small enough?
> +	b.ge	1f			// .. then skip additional level
> +
> +	adrp	x6, idmap_t0sz
> +	str	x5, [x6, :lo12:idmap_t0sz]
> +
> +	create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6

can you explain me how the subsequent call to create_pgd_entry with the
same tbl (x0) value ends up passing the right pointer from the extra
level to the pgd to the block mappings?

> +1:
> +#endif
> +
>  	create_pgd_entry x0, x3, x5, x6
>  	ldr	x6, =KERNEL_END
>  	mov	x5, x3				// __pa(KERNEL_START)
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 328b8ce4b007..74554dfcce73 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -151,6 +151,7 @@ asmlinkage void secondary_start_kernel(void)
>  	 */
>  	cpu_set_reserved_ttbr0();
>  	flush_tlb_all();
> +	cpu_set_default_tcr_t0sz();
>  
>  	preempt_disable();
>  	trace_hardirqs_off();
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index c6daaf6c6f97..c4f60393383e 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -40,6 +40,8 @@
>  
>  #include "mm.h"
>  
> +u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
> +
>  /*
>   * Empty_zero_page is a special page that is used for zero-initialized data
>   * and COW.
> @@ -454,6 +456,7 @@ void __init paging_init(void)
>  	 */
>  	cpu_set_reserved_ttbr0();
>  	flush_tlb_all();
> +	cpu_set_default_tcr_t0sz();
>  }
>  
>  /*
> @@ -461,8 +464,10 @@ void __init paging_init(void)
>   */
>  void setup_mm_for_reboot(void)
>  {
> -	cpu_switch_mm(idmap_pg_dir, &init_mm);
> +	cpu_set_reserved_ttbr0();
>  	flush_tlb_all();
> +	cpu_set_idmap_tcr_t0sz();
> +	cpu_switch_mm(idmap_pg_dir, &init_mm);
>  }
>  
>  /*
> diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
> index 005d29e2977d..c17fdd6a19bc 100644
> --- a/arch/arm64/mm/proc-macros.S
> +++ b/arch/arm64/mm/proc-macros.S
> @@ -52,3 +52,14 @@
>  	mov	\reg, #4			// bytes per word
>  	lsl	\reg, \reg, \tmp		// actual cache line size
>  	.endm
> +
> +/*
> + * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
> + */
> +	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
> +#ifndef CONFIG_ARM64_VA_BITS_48
> +	adrp	\tmpreg, idmap_t0sz
> +	ldr	\tmpreg, [\tmpreg, #:lo12:idmap_t0sz]
> +	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
> +#endif
> +	.endm
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 28eebfb6af76..cdd754e19b9b 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -156,6 +156,7 @@ ENTRY(cpu_do_resume)
>  	msr	cpacr_el1, x6
>  	msr	ttbr0_el1, x1
>  	msr	ttbr1_el1, x7
> +	tcr_set_idmap_t0sz x8, x7
>  	msr	tcr_el1, x8
>  	msr	vbar_el1, x9
>  	msr	mdscr_el1, x10
> @@ -233,6 +234,8 @@ ENTRY(__cpu_setup)
>  	 */
>  	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
>  			TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
> +	tcr_set_idmap_t0sz	x10, x9
> +
>  	/*
>  	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
>  	 * TCR_EL1.
> -- 
> 1.8.3.2
>
Ard Biesheuvel March 16, 2015, 2:39 p.m. UTC | #2
On 16 March 2015 at 15:28, Christoffer Dall <christoffer.dall@linaro.org> wrote:
> On Fri, Mar 06, 2015 at 03:34:39PM +0100, Ard Biesheuvel wrote:
>> The page size and the number of translation levels, and hence the supported
>> virtual address range, are build-time configurables on arm64 whose optimal
>> values are use case dependent. However, in the current implementation, if
>> the system's RAM is located at a very high offset, the virtual address range
>> needs to reflect that merely because the identity mapping, which is only used
>> to enable or disable the MMU, requires the extended virtual range to map the
>> physical memory at an equal virtual offset.
>>
>> This patch relaxes that requirement, by increasing the number of translation
>> levels for the identity mapping only, and only when actually needed, i.e.,
>> when system RAM's offset is found to be out of reach at runtime.
>>
>> Tested-by: Laura Abbott <lauraa@codeaurora.org>
>> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
>> Tested-by: Marc Zyngier <marc.zyngier@arm.com>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> ---
>>  arch/arm64/include/asm/mmu_context.h   | 43 ++++++++++++++++++++++++++++++++++
>>  arch/arm64/include/asm/page.h          |  6 +++--
>>  arch/arm64/include/asm/pgtable-hwdef.h |  7 +++++-
>>  arch/arm64/kernel/head.S               | 38 ++++++++++++++++++++++++++++++
>>  arch/arm64/kernel/smp.c                |  1 +
>>  arch/arm64/mm/mmu.c                    |  7 +++++-
>>  arch/arm64/mm/proc-macros.S            | 11 +++++++++
>>  arch/arm64/mm/proc.S                   |  3 +++
>>  8 files changed, 112 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
>> index a9eee33dfa62..ecf2d060036b 100644
>> --- a/arch/arm64/include/asm/mmu_context.h
>> +++ b/arch/arm64/include/asm/mmu_context.h
>> @@ -64,6 +64,49 @@ static inline void cpu_set_reserved_ttbr0(void)
>>       : "r" (ttbr));
>>  }
>>
>> +/*
>> + * TCR.T0SZ value to use when the ID map is active. Usually equals
>> + * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
>> + * physical memory, in which case it will be smaller.
>> + */
>> +extern u64 idmap_t0sz;
>> +
>> +static inline bool __cpu_uses_extended_idmap(void)
>> +{
>> +     return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) &&
>> +             unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
>> +}
>> +
>> +static inline void __cpu_set_tcr_t0sz(u64 t0sz)
>> +{
>> +     unsigned long tcr;
>> +
>> +     if (__cpu_uses_extended_idmap())
>> +             asm volatile (
>> +             "       mrs     %0, tcr_el1     ;"
>> +             "       bfi     %0, %1, %2, %3  ;"
>> +             "       msr     tcr_el1, %0     ;"
>> +             "       isb"
>> +             : "=&r" (tcr)
>> +             : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
>> +}
>> +
>> +/*
>> + * Set TCR.T0SZ to the value appropriate for activating the identity map.
>> + */
>> +static inline void cpu_set_idmap_tcr_t0sz(void)
>> +{
>> +     __cpu_set_tcr_t0sz(idmap_t0sz);
>> +}
>> +
>> +/*
>> + * Set TCR.T0SZ to its default value (based on VA_BITS)
>> + */
>> +static inline void cpu_set_default_tcr_t0sz(void)
>> +{
>> +     __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
>> +}
>> +
>>  static inline void switch_new_context(struct mm_struct *mm)
>>  {
>>       unsigned long flags;
>> diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
>> index 22b16232bd60..3d02b1869eb8 100644
>> --- a/arch/arm64/include/asm/page.h
>> +++ b/arch/arm64/include/asm/page.h
>> @@ -33,7 +33,9 @@
>>   * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
>>   * map the kernel. With the 64K page configuration, swapper and idmap need to
>>   * map to pte level. The swapper also maps the FDT (see __create_page_tables
>> - * for more information).
>> + * for more information). Note that the number of ID map translation levels
>> + * could be increased on the fly if system RAM is out of reach for the default
>> + * VA range, so 3 pages are reserved in all cases.
>>   */
>>  #ifdef CONFIG_ARM64_64K_PAGES
>>  #define SWAPPER_PGTABLE_LEVELS       (CONFIG_ARM64_PGTABLE_LEVELS)
>> @@ -42,7 +44,7 @@
>>  #endif
>>
>>  #define SWAPPER_DIR_SIZE     (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
>> -#define IDMAP_DIR_SIZE               (SWAPPER_DIR_SIZE)
>> +#define IDMAP_DIR_SIZE               (3 * PAGE_SIZE)
>>
>>  #ifndef __ASSEMBLY__
>>
>> diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
>> index 5f930cc9ea83..847e864202cc 100644
>> --- a/arch/arm64/include/asm/pgtable-hwdef.h
>> +++ b/arch/arm64/include/asm/pgtable-hwdef.h
>> @@ -143,7 +143,12 @@
>>  /*
>>   * TCR flags.
>>   */
>> -#define TCR_TxSZ(x)          (((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0))
>> +#define TCR_T0SZ_OFFSET              0
>> +#define TCR_T1SZ_OFFSET              16
>> +#define TCR_T0SZ(x)          ((UL(64) - (x)) << TCR_T0SZ_OFFSET)
>> +#define TCR_T1SZ(x)          ((UL(64) - (x)) << TCR_T1SZ_OFFSET)
>> +#define TCR_TxSZ(x)          (TCR_T0SZ(x) | TCR_T1SZ(x))
>> +#define TCR_TxSZ_WIDTH               6
>>  #define TCR_IRGN_NC          ((UL(0) << 8) | (UL(0) << 24))
>>  #define TCR_IRGN_WBWA                ((UL(1) << 8) | (UL(1) << 24))
>>  #define TCR_IRGN_WT          ((UL(2) << 8) | (UL(2) << 24))
>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>> index 8ce88e08c030..a3612eadab3c 100644
>> --- a/arch/arm64/kernel/head.S
>> +++ b/arch/arm64/kernel/head.S
>> @@ -387,6 +387,44 @@ __create_page_tables:
>>       mov     x0, x25                         // idmap_pg_dir
>>       ldr     x3, =KERNEL_START
>>       add     x3, x3, x28                     // __pa(KERNEL_START)
>> +
>> +#ifndef CONFIG_ARM64_VA_BITS_48
>> +#define EXTRA_SHIFT  (PGDIR_SHIFT + PAGE_SHIFT - 3)
>> +#define EXTRA_PTRS   (1 << (48 - EXTRA_SHIFT))
>
> How does this math work exactly?
>

PAGE_SHIFT - 3 is the number of bits translated at each level.
EXTRA_SHIFT is the number of VA low bits that is translated by the
higher tables.
EXTRA_PTRS is the size of the root table (in 64-bit words)

> I also had to look at the create_pgd_entry macros to understand that these
> mean the shift for the 'extra' pgtable, and not the extra amount of
> shifts compared to PGDIR_SHIFT.  Not sure if that warrants a comment?
>

I am not sure if I understand what 'the extra amount of shifts' means,
so I should at least add a comment that that's not it :-)
But yes, I can clarify that.

>
>> +
>> +     /*
>> +      * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
>> +      * created that covers system RAM if that is located sufficiently high
>> +      * in the physical address space. So for the ID map, use an extended
>> +      * virtual range in that case, by configuring an additional translation
>> +      * level.
>> +      * First, we have to verify our assumption that the current value of
>> +      * VA_BITS was chosen such that all translation levels are fully
>> +      * utilised, and that lowering T0SZ will always result in an additional
>> +      * translation level to be configured.
>> +      */
>> +#if VA_BITS != EXTRA_SHIFT
>> +#error "Mismatch between VA_BITS and page size/number of translation levels"
>> +#endif
>> +
>> +     /*
>> +      * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
>> +      * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used),
>> +      * this number conveniently equals the number of leading zeroes in
>> +      * the physical address of KERNEL_END.
>> +      */
>> +     adrp    x5, KERNEL_END
>> +     clz     x5, x5
>> +     cmp     x5, TCR_T0SZ(VA_BITS)   // default T0SZ small enough?
>> +     b.ge    1f                      // .. then skip additional level
>> +
>> +     adrp    x6, idmap_t0sz
>> +     str     x5, [x6, :lo12:idmap_t0sz]
>> +
>> +     create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6
>
> can you explain me how the subsequent call to create_pgd_entry with the
> same tbl (x0) value ends up passing the right pointer from the extra
> level to the pgd to the block mappings?
>

x0 is not preserved by the macro but incremented by 1 page.

Look at create_pgd_entry: it calls create_table_entry twice with the
same \tbl register, but each call sets another level.

>> +1:
>> +#endif
>> +
>>       create_pgd_entry x0, x3, x5, x6
>>       ldr     x6, =KERNEL_END
>>       mov     x5, x3                          // __pa(KERNEL_START)
>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>> index 328b8ce4b007..74554dfcce73 100644
>> --- a/arch/arm64/kernel/smp.c
>> +++ b/arch/arm64/kernel/smp.c
>> @@ -151,6 +151,7 @@ asmlinkage void secondary_start_kernel(void)
>>        */
>>       cpu_set_reserved_ttbr0();
>>       flush_tlb_all();
>> +     cpu_set_default_tcr_t0sz();
>>
>>       preempt_disable();
>>       trace_hardirqs_off();
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index c6daaf6c6f97..c4f60393383e 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -40,6 +40,8 @@
>>
>>  #include "mm.h"
>>
>> +u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
>> +
>>  /*
>>   * Empty_zero_page is a special page that is used for zero-initialized data
>>   * and COW.
>> @@ -454,6 +456,7 @@ void __init paging_init(void)
>>        */
>>       cpu_set_reserved_ttbr0();
>>       flush_tlb_all();
>> +     cpu_set_default_tcr_t0sz();
>>  }
>>
>>  /*
>> @@ -461,8 +464,10 @@ void __init paging_init(void)
>>   */
>>  void setup_mm_for_reboot(void)
>>  {
>> -     cpu_switch_mm(idmap_pg_dir, &init_mm);
>> +     cpu_set_reserved_ttbr0();
>>       flush_tlb_all();
>> +     cpu_set_idmap_tcr_t0sz();
>> +     cpu_switch_mm(idmap_pg_dir, &init_mm);
>>  }
>>
>>  /*
>> diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
>> index 005d29e2977d..c17fdd6a19bc 100644
>> --- a/arch/arm64/mm/proc-macros.S
>> +++ b/arch/arm64/mm/proc-macros.S
>> @@ -52,3 +52,14 @@
>>       mov     \reg, #4                        // bytes per word
>>       lsl     \reg, \reg, \tmp                // actual cache line size
>>       .endm
>> +
>> +/*
>> + * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
>> + */
>> +     .macro  tcr_set_idmap_t0sz, valreg, tmpreg
>> +#ifndef CONFIG_ARM64_VA_BITS_48
>> +     adrp    \tmpreg, idmap_t0sz
>> +     ldr     \tmpreg, [\tmpreg, #:lo12:idmap_t0sz]
>> +     bfi     \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
>> +#endif
>> +     .endm
>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
>> index 28eebfb6af76..cdd754e19b9b 100644
>> --- a/arch/arm64/mm/proc.S
>> +++ b/arch/arm64/mm/proc.S
>> @@ -156,6 +156,7 @@ ENTRY(cpu_do_resume)
>>       msr     cpacr_el1, x6
>>       msr     ttbr0_el1, x1
>>       msr     ttbr1_el1, x7
>> +     tcr_set_idmap_t0sz x8, x7
>>       msr     tcr_el1, x8
>>       msr     vbar_el1, x9
>>       msr     mdscr_el1, x10
>> @@ -233,6 +234,8 @@ ENTRY(__cpu_setup)
>>        */
>>       ldr     x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
>>                       TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
>> +     tcr_set_idmap_t0sz      x10, x9
>> +
>>       /*
>>        * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
>>        * TCR_EL1.
>> --
>> 1.8.3.2
>>
diff mbox

Patch

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index a9eee33dfa62..ecf2d060036b 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -64,6 +64,49 @@  static inline void cpu_set_reserved_ttbr0(void)
 	: "r" (ttbr));
 }
 
+/*
+ * TCR.T0SZ value to use when the ID map is active. Usually equals
+ * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
+ * physical memory, in which case it will be smaller.
+ */
+extern u64 idmap_t0sz;
+
+static inline bool __cpu_uses_extended_idmap(void)
+{
+	return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) &&
+		unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
+}
+
+static inline void __cpu_set_tcr_t0sz(u64 t0sz)
+{
+	unsigned long tcr;
+
+	if (__cpu_uses_extended_idmap())
+		asm volatile (
+		"	mrs	%0, tcr_el1	;"
+		"	bfi	%0, %1, %2, %3	;"
+		"	msr	tcr_el1, %0	;"
+		"	isb"
+		: "=&r" (tcr)
+		: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+}
+
+/*
+ * Set TCR.T0SZ to the value appropriate for activating the identity map.
+ */
+static inline void cpu_set_idmap_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(idmap_t0sz);
+}
+
+/*
+ * Set TCR.T0SZ to its default value (based on VA_BITS)
+ */
+static inline void cpu_set_default_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
+}
+
 static inline void switch_new_context(struct mm_struct *mm)
 {
 	unsigned long flags;
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 22b16232bd60..3d02b1869eb8 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -33,7 +33,9 @@ 
  * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
  * map the kernel. With the 64K page configuration, swapper and idmap need to
  * map to pte level. The swapper also maps the FDT (see __create_page_tables
- * for more information).
+ * for more information). Note that the number of ID map translation levels
+ * could be increased on the fly if system RAM is out of reach for the default
+ * VA range, so 3 pages are reserved in all cases.
  */
 #ifdef CONFIG_ARM64_64K_PAGES
 #define SWAPPER_PGTABLE_LEVELS	(CONFIG_ARM64_PGTABLE_LEVELS)
@@ -42,7 +44,7 @@ 
 #endif
 
 #define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
-#define IDMAP_DIR_SIZE		(SWAPPER_DIR_SIZE)
+#define IDMAP_DIR_SIZE		(3 * PAGE_SIZE)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 5f930cc9ea83..847e864202cc 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -143,7 +143,12 @@ 
 /*
  * TCR flags.
  */
-#define TCR_TxSZ(x)		(((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0))
+#define TCR_T0SZ_OFFSET		0
+#define TCR_T1SZ_OFFSET		16
+#define TCR_T0SZ(x)		((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+#define TCR_T1SZ(x)		((UL(64) - (x)) << TCR_T1SZ_OFFSET)
+#define TCR_TxSZ(x)		(TCR_T0SZ(x) | TCR_T1SZ(x))
+#define TCR_TxSZ_WIDTH		6
 #define TCR_IRGN_NC		((UL(0) << 8) | (UL(0) << 24))
 #define TCR_IRGN_WBWA		((UL(1) << 8) | (UL(1) << 24))
 #define TCR_IRGN_WT		((UL(2) << 8) | (UL(2) << 24))
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 8ce88e08c030..a3612eadab3c 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -387,6 +387,44 @@  __create_page_tables:
 	mov	x0, x25				// idmap_pg_dir
 	ldr	x3, =KERNEL_START
 	add	x3, x3, x28			// __pa(KERNEL_START)
+
+#ifndef CONFIG_ARM64_VA_BITS_48
+#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
+#define EXTRA_PTRS	(1 << (48 - EXTRA_SHIFT))
+
+	/*
+	 * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
+	 * created that covers system RAM if that is located sufficiently high
+	 * in the physical address space. So for the ID map, use an extended
+	 * virtual range in that case, by configuring an additional translation
+	 * level.
+	 * First, we have to verify our assumption that the current value of
+	 * VA_BITS was chosen such that all translation levels are fully
+	 * utilised, and that lowering T0SZ will always result in an additional
+	 * translation level to be configured.
+	 */
+#if VA_BITS != EXTRA_SHIFT
+#error "Mismatch between VA_BITS and page size/number of translation levels"
+#endif
+
+	/*
+	 * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
+	 * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used),
+	 * this number conveniently equals the number of leading zeroes in
+	 * the physical address of KERNEL_END.
+	 */
+	adrp	x5, KERNEL_END
+	clz	x5, x5
+	cmp	x5, TCR_T0SZ(VA_BITS)	// default T0SZ small enough?
+	b.ge	1f			// .. then skip additional level
+
+	adrp	x6, idmap_t0sz
+	str	x5, [x6, :lo12:idmap_t0sz]
+
+	create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6
+1:
+#endif
+
 	create_pgd_entry x0, x3, x5, x6
 	ldr	x6, =KERNEL_END
 	mov	x5, x3				// __pa(KERNEL_START)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 328b8ce4b007..74554dfcce73 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -151,6 +151,7 @@  asmlinkage void secondary_start_kernel(void)
 	 */
 	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_default_tcr_t0sz();
 
 	preempt_disable();
 	trace_hardirqs_off();
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index c6daaf6c6f97..c4f60393383e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -40,6 +40,8 @@ 
 
 #include "mm.h"
 
+u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+
 /*
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.
@@ -454,6 +456,7 @@  void __init paging_init(void)
 	 */
 	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_default_tcr_t0sz();
 }
 
 /*
@@ -461,8 +464,10 @@  void __init paging_init(void)
  */
 void setup_mm_for_reboot(void)
 {
-	cpu_switch_mm(idmap_pg_dir, &init_mm);
+	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_idmap_tcr_t0sz();
+	cpu_switch_mm(idmap_pg_dir, &init_mm);
 }
 
 /*
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index 005d29e2977d..c17fdd6a19bc 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -52,3 +52,14 @@ 
 	mov	\reg, #4			// bytes per word
 	lsl	\reg, \reg, \tmp		// actual cache line size
 	.endm
+
+/*
+ * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ */
+	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
+#ifndef CONFIG_ARM64_VA_BITS_48
+	adrp	\tmpreg, idmap_t0sz
+	ldr	\tmpreg, [\tmpreg, #:lo12:idmap_t0sz]
+	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+#endif
+	.endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 28eebfb6af76..cdd754e19b9b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -156,6 +156,7 @@  ENTRY(cpu_do_resume)
 	msr	cpacr_el1, x6
 	msr	ttbr0_el1, x1
 	msr	ttbr1_el1, x7
+	tcr_set_idmap_t0sz x8, x7
 	msr	tcr_el1, x8
 	msr	vbar_el1, x9
 	msr	mdscr_el1, x10
@@ -233,6 +234,8 @@  ENTRY(__cpu_setup)
 	 */
 	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
 			TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
+	tcr_set_idmap_t0sz	x10, x9
+
 	/*
 	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
 	 * TCR_EL1.