Message ID | 1405537792-23666-8-git-send-email-catalin.marinas@arm.com |
---|---|
State | Accepted |
Commit | c79b954bf6c006f2d3dd9d01f231abeead13a410 |
Headers | show |
On 07/16/2014 02:09 PM, Catalin Marinas wrote: > From: Jungseok Lee <jays.lee@samsung.com> > > This patch implements 4 levels of translation tables since 3 levels > of page tables with 4KB pages cannot support 40-bit physical address > space described in [1] due to the following issue. > > It is a restriction that kernel logical memory map with 4KB + 3 levels > (0xffffffc000000000-0xffffffffffffffff) cannot cover RAM region from > 544GB to 1024GB in [1]. Specifically, ARM64 kernel fails to create > mapping for this region in map_mem function since __phys_to_virt for > this region reaches to address overflow. > > If SoC design follows the document, [1], over 32GB RAM would be placed > from 544GB. Even 64GB system is supposed to use the region from 544GB > to 576GB for only 32GB RAM. Naturally, it would reach to enable 4 levels > of page tables to avoid hacking __virt_to_phys and __phys_to_virt. > > However, it is recommended 4 levels of page table should be only enabled > if memory map is too sparse or there is about 512GB RAM. > > References > ---------- > [1]: Principles of ARM Memory Maps, White Paper, Issue C > > Signed-off-by: Jungseok Lee <jays.lee@samsung.com> > Reviewed-by: Sungjinn Chung <sungjinn.chung@samsung.com> > Acked-by: Kukjin Kim <kgene.kim@samsung.com> > Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org> > Reviewed-by: Steve Capper <steve.capper@linaro.org> > [catalin.marinas@arm.com: MEMBLOCK_INITIAL_LIMIT removed, same as PUD_SIZE] > [catalin.marinas@arm.com: early_ioremap_init() updated for 4 levels] > [catalin.marinas@arm.com: 4 page tables levels only if !KVM] > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> > --- > arch/arm64/Kconfig | 9 ++++++++ > arch/arm64/include/asm/page.h | 13 ++++++++--- > arch/arm64/include/asm/pgalloc.h | 20 ++++++++++++++++ > arch/arm64/include/asm/pgtable-hwdef.h | 6 +++-- > arch/arm64/include/asm/pgtable.h | 40 ++++++++++++++++++++++++++++++++ > arch/arm64/include/asm/tlb.h | 9 ++++++++ > arch/arm64/kernel/head.S | 42 +++++++++++++++++++++++++++------- > arch/arm64/kernel/traps.c | 5 ++++ > arch/arm64/mm/fault.c | 1 + > arch/arm64/mm/ioremap.c | 6 ++++- > arch/arm64/mm/mmu.c | 14 +++++++++--- > 11 files changed, 148 insertions(+), 17 deletions(-) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 4daf11f5b403..24cbe72c0da9 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -196,12 +196,18 @@ config ARM64_VA_BITS_42 > bool "42-bit" > depends on ARM64_64K_PAGES > > +config ARM64_VA_BITS_48 > + bool "48-bit" > + depends on !KVM > + depends on ARM64_4K_PAGES > + > endchoice Shouldn't we be able to support 48 bit VA with 3 level 64K pages? If so why the dependency on ARM64_4K_PAGES? More generally it seems like a problem to tie the equate the VA_BITS the page table could address with the VA_BITS the hardware could address. Even with 4 level 4K page tables that can address 48 bits the hardware may only support say 42 bit address space. > > config ARM64_VA_BITS > int > default 39 if ARM64_VA_BITS_39 > default 42 if ARM64_VA_BITS_42 > + default 48 if ARM64_VA_BITS_48 > > config ARM64_2_LEVELS > def_bool y if ARM64_64K_PAGES && ARM64_VA_BITS_42 > @@ -209,6 +215,9 @@ config ARM64_2_LEVELS > config ARM64_3_LEVELS > def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_39 > > +config ARM64_4_LEVELS > + def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_48 > + It seems like we should also do ARM64_4K_PAGES and ARM64_VA_BITS_42 as a valid combination for ARM64_4_LEVELS. At least if we are assuming the VA_BITS correspond to hardware. > config CPU_BIG_ENDIAN > bool "Build big-endian kernel" > help > diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h > index 6bf139188792..cf9afa0366b6 100644 > --- a/arch/arm64/include/asm/page.h > +++ b/arch/arm64/include/asm/page.h > @@ -33,19 +33,26 @@ > > /* > * The idmap and swapper page tables need some space reserved in the kernel > - * image. Both require a pgd and a next level table to (section) map the > - * kernel. The the swapper also maaps the FDT (see __create_page_tables for > + * image. Both require pgd, pud (4 levels only) and pmd tables to (section) > + * map the kernel. The swapper also maps the FDT (see __create_page_tables for > * more information). > */ > +#ifdef CONFIG_ARM64_4_LEVELS > +#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) > +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) > +#else > #define SWAPPER_DIR_SIZE (2 * PAGE_SIZE) > #define IDMAP_DIR_SIZE (2 * PAGE_SIZE) > +#endif > > #ifndef __ASSEMBLY__ > > #ifdef CONFIG_ARM64_2_LEVELS > #include <asm/pgtable-2level-types.h> > -#else > +#elif defined(CONFIG_ARM64_3_LEVELS) > #include <asm/pgtable-3level-types.h> > +#else > +#include <asm/pgtable-4level-types.h> > #endif > > extern void __cpu_clear_user_page(void *p, unsigned long user); > diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h > index 48298376e46a..8d745fae4c2d 100644 > --- a/arch/arm64/include/asm/pgalloc.h > +++ b/arch/arm64/include/asm/pgalloc.h > @@ -26,6 +26,26 @@ > > #define check_pgt_cache() do { } while (0) > > +#ifdef CONFIG_ARM64_4_LEVELS > + > +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) > +{ > + return (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); > +} > + > +static inline void pud_free(struct mm_struct *mm, pud_t *pud) > +{ > + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); > + free_page((unsigned long)pud); > +} > + > +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) > +{ > + set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); > +} > + > +#endif /* CONFIG_ARM64_4_LEVELS */ > + > #ifndef CONFIG_ARM64_2_LEVELS > > static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) > diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h > index c7c603b489b8..fddcc3efa569 100644 > --- a/arch/arm64/include/asm/pgtable-hwdef.h > +++ b/arch/arm64/include/asm/pgtable-hwdef.h > @@ -18,8 +18,10 @@ > > #ifdef CONFIG_ARM64_2_LEVELS > #include <asm/pgtable-2level-hwdef.h> > -#else > +#elif defined(CONFIG_ARM64_3_LEVELS) > #include <asm/pgtable-3level-hwdef.h> > +#else > +#include <asm/pgtable-4level-hwdef.h> > #endif > > /* > @@ -27,7 +29,7 @@ > * > * Level 1 descriptor (PUD). > */ > - > +#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) > #define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) > #define PUD_TYPE_MASK (_AT(pgdval_t, 3) << 0) > #define PUD_TYPE_SECT (_AT(pgdval_t, 1) << 0) > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index 6d5854972a77..d9b23efdaded 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -35,7 +35,11 @@ > * VMALLOC and SPARSEMEM_VMEMMAP ranges. > */ > #define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) Here's a good example of where we run into trouble equating page table addressable bits with hardware addressable bits. If VA_BITS is 48 due to 4K 4 level page tables but is running on a 42 bit system this will end up being out of range. > +#ifndef CONFIG_ARM64_4_LEVELS > #define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K) > +#else > +#define VMALLOC_END (PAGE_OFFSET - UL(0x40000000000) - SZ_64K) > +#endif > > #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) > > @@ -44,12 +48,16 @@ > #ifndef __ASSEMBLY__ > extern void __pte_error(const char *file, int line, unsigned long val); > extern void __pmd_error(const char *file, int line, unsigned long val); > +extern void __pud_error(const char *file, int line, unsigned long val); > extern void __pgd_error(const char *file, int line, unsigned long val); > > #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) > #ifndef CONFIG_ARM64_2_LEVELS > #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) > #endif > +#ifdef CONFIG_ARM64_4_LEVELS > +#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) > +#endif > #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) > > #ifdef CONFIG_SMP > @@ -347,6 +355,30 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) > > #endif /* CONFIG_ARM64_2_LEVELS */ > > +#ifdef CONFIG_ARM64_4_LEVELS > + > +#define pgd_none(pgd) (!pgd_val(pgd)) > +#define pgd_bad(pgd) (!(pgd_val(pgd) & 2)) > +#define pgd_present(pgd) (pgd_val(pgd)) > + > +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) > +{ > + *pgdp = pgd; > + dsb(ishst); > +} > + > +static inline void pgd_clear(pgd_t *pgdp) > +{ > + set_pgd(pgdp, __pgd(0)); > +} > + > +static inline pud_t *pgd_page_vaddr(pgd_t pgd) > +{ > + return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK); > +} > + > +#endif /* CONFIG_ARM64_4_LEVELS */ > + > /* to find an entry in a page-table-directory */ > #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) > > @@ -355,6 +387,14 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) > /* to find an entry in a kernel page-table-directory */ > #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) > > +#ifdef CONFIG_ARM64_4_LEVELS > +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) > +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) > +{ > + return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); > +} > +#endif > + > /* Find an entry in the second-level page table.. */ > #ifndef CONFIG_ARM64_2_LEVELS > #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) > diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h > index bc19101edaeb..49dc8f03362f 100644 > --- a/arch/arm64/include/asm/tlb.h > +++ b/arch/arm64/include/asm/tlb.h > @@ -100,6 +100,15 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, > } > #endif > > +#ifdef CONFIG_ARM64_4_LEVELS > +static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, > + unsigned long addr) > +{ > + tlb_add_flush(tlb, addr); > + tlb_remove_page(tlb, virt_to_page(pudp)); > +} > +#endif > + > static inline void __tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, > unsigned long address) > { > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S > index fa3b7fb8a77a..847b99daad79 100644 > --- a/arch/arm64/kernel/head.S > +++ b/arch/arm64/kernel/head.S > @@ -476,16 +476,42 @@ ENDPROC(__calc_phys_offset) > .quad PAGE_OFFSET > > /* > - * Macro to populate the PGD for the corresponding block entry in the next > - * level (tbl) for the given virtual address. > + * Macro to populate the PUD for the corresponding block entry in the next > + * level (tbl) for the given virtual address in case of 4 levels. > * > - * Preserves: pgd, tbl, virt > - * Corrupts: tmp1, tmp2 > + * Preserves: pgd, virt > + * Corrupts: tbl, tmp1, tmp2 > + * Returns: pud > */ > - .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2 > + .macro create_pud_entry, pgd, tbl, virt, pud, tmp1, tmp2 > +#ifdef CONFIG_ARM64_4_LEVELS > + add \tbl, \tbl, #PAGE_SIZE // bump tbl 1 page up. > + // to make room for pud > + add \pud, \pgd, #PAGE_SIZE // pgd points to pud which > + // follows pgd > + lsr \tmp1, \virt, #PUD_SHIFT > + and \tmp1, \tmp1, #PTRS_PER_PUD - 1 // PUD index > + orr \tmp2, \tbl, #3 // PUD entry table type > + str \tmp2, [\pud, \tmp1, lsl #3] > +#else > + mov \pud, \tbl > +#endif > + .endm > + > +/* > + * Macro to populate the PGD (and possibily PUD) for the corresponding > + * block entry in the next level (tbl) for the given virtual address. > + * > + * Preserves: pgd, virt > + * Corrupts: tmp1, tmp2, tmp3 > + * Returns: tbl -> page where block mappings can be placed > + * (changed to make room for pud with 4 levels, preserved otherwise) > + */ > + .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2, tmp3 > + create_pud_entry \pgd, \tbl, \virt, \tmp3, \tmp1, \tmp2 > lsr \tmp1, \virt, #PGDIR_SHIFT > and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index > - orr \tmp2, \tbl, #3 // PGD entry table type > + orr \tmp2, \tmp3, #3 // PGD entry table type > str \tmp2, [\pgd, \tmp1, lsl #3] > .endm > > @@ -550,7 +576,7 @@ __create_page_tables: > add x0, x25, #PAGE_SIZE // section table address > ldr x3, =KERNEL_START > add x3, x3, x28 // __pa(KERNEL_START) > - create_pgd_entry x25, x0, x3, x5, x6 > + create_pgd_entry x25, x0, x3, x1, x5, x6 > ldr x6, =KERNEL_END > mov x5, x3 // __pa(KERNEL_START) > add x6, x6, x28 // __pa(KERNEL_END) > @@ -561,7 +587,7 @@ __create_page_tables: > */ > add x0, x26, #PAGE_SIZE // section table address > mov x5, #PAGE_OFFSET > - create_pgd_entry x26, x0, x5, x3, x6 > + create_pgd_entry x26, x0, x5, x1, x3, x6 > ldr x6, =KERNEL_END > mov x3, x24 // phys offset > create_block_map x0, x7, x3, x5, x6 > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c > index 506f7814e305..02cd3f023e9a 100644 > --- a/arch/arm64/kernel/traps.c > +++ b/arch/arm64/kernel/traps.c > @@ -339,6 +339,11 @@ void __pmd_error(const char *file, int line, unsigned long val) > pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); > } > > +void __pud_error(const char *file, int line, unsigned long val) > +{ > + pr_crit("%s:%d: bad pud %016lx.\n", file, line, val); > +} > + > void __pgd_error(const char *file, int line, unsigned long val) > { > pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c > index bcc965e2cce1..41cb6d3d6075 100644 > --- a/arch/arm64/mm/fault.c > +++ b/arch/arm64/mm/fault.c > @@ -62,6 +62,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr) > break; > > pud = pud_offset(pgd, addr); > + printk(", *pud=%016llx", pud_val(*pud)); > if (pud_none(*pud) || pud_bad(*pud)) > break; > > diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c > index 69000efa015e..fa324bd5a5c4 100644 > --- a/arch/arm64/mm/ioremap.c > +++ b/arch/arm64/mm/ioremap.c > @@ -104,9 +104,12 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) > EXPORT_SYMBOL(ioremap_cache); > > static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; > -#ifndef CONFIG_ARM64_64K_PAGES > +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 > static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; > #endif > +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 > +static pte_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; > +#endif > > static inline pud_t * __init early_ioremap_pud(unsigned long addr) > { > @@ -144,6 +147,7 @@ void __init early_ioremap_init(void) > unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN); > > pgd = pgd_offset_k(addr); > + pgd_populate(&init_mm, pgd, bm_pud); > pud = pud_offset(pgd, addr); > pud_populate(&init_mm, pud, bm_pmd); > pmd = pmd_offset(pud, addr); > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index c43f1dd19489..c55567283cde 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -32,6 +32,7 @@ > #include <asm/setup.h> > #include <asm/sizes.h> > #include <asm/tlb.h> > +#include <asm/memblock.h> > #include <asm/mmu_context.h> > > #include "mm.h" > @@ -204,9 +205,16 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, > unsigned long end, unsigned long phys, > int map_io) > { > - pud_t *pud = pud_offset(pgd, addr); > + pud_t *pud; > unsigned long next; > > + if (pgd_none(*pgd)) { > + pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); > + pgd_populate(&init_mm, pgd, pud); > + } > + BUG_ON(pgd_bad(*pgd)); > + > + pud = pud_offset(pgd, addr); > do { > next = pud_addr_end(addr, end); > > @@ -290,10 +298,10 @@ static void __init map_mem(void) > * memory addressable from the initial direct kernel mapping. > * > * The initial direct kernel mapping, located at swapper_pg_dir, > - * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (which must be > + * gives us PUD_SIZE memory starting from PHYS_OFFSET (which must be > * aligned to 2MB as per Documentation/arm64/booting.txt). > */ > - limit = PHYS_OFFSET + PGDIR_SIZE; > + limit = PHYS_OFFSET + PUD_SIZE; > memblock_set_current_limit(limit); > > /* map all the memory banks */ > > _______________________________________________ > linux-arm-kernel mailing list > linux-arm-kernel@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4daf11f5b403..24cbe72c0da9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -196,12 +196,18 @@ config ARM64_VA_BITS_42 bool "42-bit" depends on ARM64_64K_PAGES +config ARM64_VA_BITS_48 + bool "48-bit" + depends on !KVM + depends on ARM64_4K_PAGES + endchoice config ARM64_VA_BITS int default 39 if ARM64_VA_BITS_39 default 42 if ARM64_VA_BITS_42 + default 48 if ARM64_VA_BITS_48 config ARM64_2_LEVELS def_bool y if ARM64_64K_PAGES && ARM64_VA_BITS_42 @@ -209,6 +215,9 @@ config ARM64_2_LEVELS config ARM64_3_LEVELS def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_39 +config ARM64_4_LEVELS + def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_48 + config CPU_BIG_ENDIAN bool "Build big-endian kernel" help diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 6bf139188792..cf9afa0366b6 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -33,19 +33,26 @@ /* * The idmap and swapper page tables need some space reserved in the kernel - * image. Both require a pgd and a next level table to (section) map the - * kernel. The the swapper also maaps the FDT (see __create_page_tables for + * image. Both require pgd, pud (4 levels only) and pmd tables to (section) + * map the kernel. The swapper also maps the FDT (see __create_page_tables for * more information). */ +#ifdef CONFIG_ARM64_4_LEVELS +#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) +#else #define SWAPPER_DIR_SIZE (2 * PAGE_SIZE) #define IDMAP_DIR_SIZE (2 * PAGE_SIZE) +#endif #ifndef __ASSEMBLY__ #ifdef CONFIG_ARM64_2_LEVELS #include <asm/pgtable-2level-types.h> -#else +#elif defined(CONFIG_ARM64_3_LEVELS) #include <asm/pgtable-3level-types.h> +#else +#include <asm/pgtable-4level-types.h> #endif extern void __cpu_clear_user_page(void *p, unsigned long user); diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 48298376e46a..8d745fae4c2d 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,6 +26,26 @@ #define check_pgt_cache() do { } while (0) +#ifdef CONFIG_ARM64_4_LEVELS + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); + free_page((unsigned long)pud); +} + +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); +} + +#endif /* CONFIG_ARM64_4_LEVELS */ + #ifndef CONFIG_ARM64_2_LEVELS static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index c7c603b489b8..fddcc3efa569 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -18,8 +18,10 @@ #ifdef CONFIG_ARM64_2_LEVELS #include <asm/pgtable-2level-hwdef.h> -#else +#elif defined(CONFIG_ARM64_3_LEVELS) #include <asm/pgtable-3level-hwdef.h> +#else +#include <asm/pgtable-4level-hwdef.h> #endif /* @@ -27,7 +29,7 @@ * * Level 1 descriptor (PUD). */ - +#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) #define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) #define PUD_TYPE_MASK (_AT(pgdval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pgdval_t, 1) << 0) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6d5854972a77..d9b23efdaded 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -35,7 +35,11 @@ * VMALLOC and SPARSEMEM_VMEMMAP ranges. */ #define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) +#ifndef CONFIG_ARM64_4_LEVELS #define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K) +#else +#define VMALLOC_END (PAGE_OFFSET - UL(0x40000000000) - SZ_64K) +#endif #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) @@ -44,12 +48,16 @@ #ifndef __ASSEMBLY__ extern void __pte_error(const char *file, int line, unsigned long val); extern void __pmd_error(const char *file, int line, unsigned long val); +extern void __pud_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) #ifndef CONFIG_ARM64_2_LEVELS #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) #endif +#ifdef CONFIG_ARM64_4_LEVELS +#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) +#endif #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) #ifdef CONFIG_SMP @@ -347,6 +355,30 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) #endif /* CONFIG_ARM64_2_LEVELS */ +#ifdef CONFIG_ARM64_4_LEVELS + +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_bad(pgd) (!(pgd_val(pgd) & 2)) +#define pgd_present(pgd) (pgd_val(pgd)) + +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + *pgdp = pgd; + dsb(ishst); +} + +static inline void pgd_clear(pgd_t *pgdp) +{ + set_pgd(pgdp, __pgd(0)); +} + +static inline pud_t *pgd_page_vaddr(pgd_t pgd) +{ + return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK); +} + +#endif /* CONFIG_ARM64_4_LEVELS */ + /* to find an entry in a page-table-directory */ #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) @@ -355,6 +387,14 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) +#ifdef CONFIG_ARM64_4_LEVELS +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) +{ + return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); +} +#endif + /* Find an entry in the second-level page table.. */ #ifndef CONFIG_ARM64_2_LEVELS #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index bc19101edaeb..49dc8f03362f 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -100,6 +100,15 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, } #endif +#ifdef CONFIG_ARM64_4_LEVELS +static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, + unsigned long addr) +{ + tlb_add_flush(tlb, addr); + tlb_remove_page(tlb, virt_to_page(pudp)); +} +#endif + static inline void __tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long address) { diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index fa3b7fb8a77a..847b99daad79 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -476,16 +476,42 @@ ENDPROC(__calc_phys_offset) .quad PAGE_OFFSET /* - * Macro to populate the PGD for the corresponding block entry in the next - * level (tbl) for the given virtual address. + * Macro to populate the PUD for the corresponding block entry in the next + * level (tbl) for the given virtual address in case of 4 levels. * - * Preserves: pgd, tbl, virt - * Corrupts: tmp1, tmp2 + * Preserves: pgd, virt + * Corrupts: tbl, tmp1, tmp2 + * Returns: pud */ - .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2 + .macro create_pud_entry, pgd, tbl, virt, pud, tmp1, tmp2 +#ifdef CONFIG_ARM64_4_LEVELS + add \tbl, \tbl, #PAGE_SIZE // bump tbl 1 page up. + // to make room for pud + add \pud, \pgd, #PAGE_SIZE // pgd points to pud which + // follows pgd + lsr \tmp1, \virt, #PUD_SHIFT + and \tmp1, \tmp1, #PTRS_PER_PUD - 1 // PUD index + orr \tmp2, \tbl, #3 // PUD entry table type + str \tmp2, [\pud, \tmp1, lsl #3] +#else + mov \pud, \tbl +#endif + .endm + +/* + * Macro to populate the PGD (and possibily PUD) for the corresponding + * block entry in the next level (tbl) for the given virtual address. + * + * Preserves: pgd, virt + * Corrupts: tmp1, tmp2, tmp3 + * Returns: tbl -> page where block mappings can be placed + * (changed to make room for pud with 4 levels, preserved otherwise) + */ + .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2, tmp3 + create_pud_entry \pgd, \tbl, \virt, \tmp3, \tmp1, \tmp2 lsr \tmp1, \virt, #PGDIR_SHIFT and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index - orr \tmp2, \tbl, #3 // PGD entry table type + orr \tmp2, \tmp3, #3 // PGD entry table type str \tmp2, [\pgd, \tmp1, lsl #3] .endm @@ -550,7 +576,7 @@ __create_page_tables: add x0, x25, #PAGE_SIZE // section table address ldr x3, =KERNEL_START add x3, x3, x28 // __pa(KERNEL_START) - create_pgd_entry x25, x0, x3, x5, x6 + create_pgd_entry x25, x0, x3, x1, x5, x6 ldr x6, =KERNEL_END mov x5, x3 // __pa(KERNEL_START) add x6, x6, x28 // __pa(KERNEL_END) @@ -561,7 +587,7 @@ __create_page_tables: */ add x0, x26, #PAGE_SIZE // section table address mov x5, #PAGE_OFFSET - create_pgd_entry x26, x0, x5, x3, x6 + create_pgd_entry x26, x0, x5, x1, x3, x6 ldr x6, =KERNEL_END mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 506f7814e305..02cd3f023e9a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -339,6 +339,11 @@ void __pmd_error(const char *file, int line, unsigned long val) pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); } +void __pud_error(const char *file, int line, unsigned long val) +{ + pr_crit("%s:%d: bad pud %016lx.\n", file, line, val); +} + void __pgd_error(const char *file, int line, unsigned long val) { pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index bcc965e2cce1..41cb6d3d6075 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -62,6 +62,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; pud = pud_offset(pgd, addr); + printk(", *pud=%016llx", pud_val(*pud)); if (pud_none(*pud) || pud_bad(*pud)) break; diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 69000efa015e..fa324bd5a5c4 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -104,9 +104,12 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) EXPORT_SYMBOL(ioremap_cache); static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -#ifndef CONFIG_ARM64_64K_PAGES +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; #endif +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +static pte_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; +#endif static inline pud_t * __init early_ioremap_pud(unsigned long addr) { @@ -144,6 +147,7 @@ void __init early_ioremap_init(void) unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN); pgd = pgd_offset_k(addr); + pgd_populate(&init_mm, pgd, bm_pud); pud = pud_offset(pgd, addr); pud_populate(&init_mm, pud, bm_pmd); pmd = pmd_offset(pud, addr); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c43f1dd19489..c55567283cde 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -32,6 +32,7 @@ #include <asm/setup.h> #include <asm/sizes.h> #include <asm/tlb.h> +#include <asm/memblock.h> #include <asm/mmu_context.h> #include "mm.h" @@ -204,9 +205,16 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, unsigned long phys, int map_io) { - pud_t *pud = pud_offset(pgd, addr); + pud_t *pud; unsigned long next; + if (pgd_none(*pgd)) { + pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); + pgd_populate(&init_mm, pgd, pud); + } + BUG_ON(pgd_bad(*pgd)); + + pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); @@ -290,10 +298,10 @@ static void __init map_mem(void) * memory addressable from the initial direct kernel mapping. * * The initial direct kernel mapping, located at swapper_pg_dir, - * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (which must be + * gives us PUD_SIZE memory starting from PHYS_OFFSET (which must be * aligned to 2MB as per Documentation/arm64/booting.txt). */ - limit = PHYS_OFFSET + PGDIR_SIZE; + limit = PHYS_OFFSET + PUD_SIZE; memblock_set_current_limit(limit); /* map all the memory banks */