Message ID | 502bf9da77574d6b2edeee0abec3df6b1510aaa0.1662459668.git.baskov@ispras.ru |
---|---|
State | New |
Headers | show |
Series | x86_64: Improvements at compressed kernel stage | expand |
On Tue, 6 Sept 2022 at 12:42, Evgeniy Baskov <baskov@ispras.ru> wrote: > > Doing it that way allows setting up stricter memory attributes, > simplifies boot code path and removes potential relocation > of kernel image. > > Wire up required interfaces and minimally initialize zero page > fields needed for it to function correctly. > > Signed-off-by: Evgeniy Baskov <baskov@ispras.ru> > > create mode 100644 arch/x86/include/asm/shared/extract.h > create mode 100644 drivers/firmware/efi/libstub/x86-extract-direct.c > --- > arch/x86/boot/compressed/head_32.S | 6 +- > arch/x86/boot/compressed/head_64.S | 45 ++++ > arch/x86/include/asm/shared/extract.h | 25 ++ > drivers/firmware/efi/Kconfig | 14 ++ > drivers/firmware/efi/libstub/Makefile | 1 + > drivers/firmware/efi/libstub/efistub.h | 5 + > .../firmware/efi/libstub/x86-extract-direct.c | 220 ++++++++++++++++++ > drivers/firmware/efi/libstub/x86-stub.c | 45 ++-- > 8 files changed, 343 insertions(+), 18 deletions(-) > create mode 100644 arch/x86/include/asm/shared/extract.h > create mode 100644 drivers/firmware/efi/libstub/x86-extract-direct.c > > diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S > index b46a1c4109cf..d2866f06bc9f 100644 > --- a/arch/x86/boot/compressed/head_32.S > +++ b/arch/x86/boot/compressed/head_32.S > @@ -155,7 +155,11 @@ SYM_FUNC_START(efi32_stub_entry) > add $0x4, %esp > movl 8(%esp), %esi /* save boot_params pointer */ > call efi_main > - /* efi_main returns the possibly relocated address of startup_32 */ > + > + /* > + * efi_main returns the possibly > + * relocated address of exteracted kernel entry point. extracted > + */ > jmp *%eax > SYM_FUNC_END(efi32_stub_entry) > SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) > diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S > index 37ce094571b5..b6bae8e7ee71 100644 > --- a/arch/x86/boot/compressed/head_64.S > +++ b/arch/x86/boot/compressed/head_64.S > @@ -555,9 +555,54 @@ SYM_FUNC_START(efi64_stub_entry) > and $~0xf, %rsp /* realign the stack */ > movq %rdx, %rbx /* save boot_params pointer */ > call efi_main > + > +#ifdef CONFIG_EFI_STUB_EXTRACT_DIRECT > + cld > + cli > + > + movq %rbx, %rdi /* boot_params */ > + movq %rax, %rsi /* decompressed kernel address */ > + > + /* Make sure we have GDT with 32-bit code segment */ > + leaq gdt64(%rip), %rax > + addq %rax, 2(%rax) > + lgdt (%rax) > + > + /* Setup data segments. */ > + xorl %eax, %eax > + movl %eax, %ds > + movl %eax, %es > + movl %eax, %ss > + movl %eax, %fs > + movl %eax, %gs > + > + pushq %rsi > + pushq %rdi > + > + call startup32_enable_nx_if_supported > + > + call trampoline_pgtable_init > + movq %rax, %rdx > + > + > + /* Swap %rsi and %rsi */ > + popq %rsi > + popq %rdi > + > + /* Save the trampoline address in RCX */ > + movq trampoline_32bit(%rip), %rcx > + > + /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ > + pushq $__KERNEL32_CS > + leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rcx), %rax > + pushq %rax > + lretq > +#else > movq %rbx,%rsi > leaq rva(startup_64)(%rax), %rax > jmp *%rax > +#endif > + > SYM_FUNC_END(efi64_stub_entry) > SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) > #endif > diff --git a/arch/x86/include/asm/shared/extract.h b/arch/x86/include/asm/shared/extract.h > new file mode 100644 > index 000000000000..163678145884 > --- /dev/null > +++ b/arch/x86/include/asm/shared/extract.h > @@ -0,0 +1,25 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef ASM_SHARED_EXTRACT_H > +#define ASM_SHARED_EXTRACT_H > + > +#define MAP_WRITE 0x02 /* Writable memory */ > +#define MAP_EXEC 0x04 /* Executable memory */ > +#define MAP_ALLOC 0x10 /* Range needs to be allocated */ > +#define MAP_PROTECT 0x20 /* Set exact memory attributes for memory range */ > + > +struct efi_iofunc { > + void (*putstr)(const char *msg); > + void (*puthex)(unsigned long x); > + unsigned long (*map_range)(unsigned long start, > + unsigned long end, > + unsigned int flags); This looks a bit random - having a map_range() routine as a member of the console I/O struct. Can we make this abstraction a bit more natural? > +}; > + > +void *efi_extract_kernel(struct boot_params *rmode, > + struct efi_iofunc *iofunc, > + unsigned char *input_data, > + unsigned long input_len, > + unsigned char *output, > + unsigned long output_len); > + > +#endif /* ASM_SHARED_EXTRACT_H */ > diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig > index 6cb7384ad2ac..2418402a0bda 100644 > --- a/drivers/firmware/efi/Kconfig > +++ b/drivers/firmware/efi/Kconfig > @@ -91,6 +91,20 @@ config EFI_DXE_MEM_ATTRIBUTES > Use DXE services to check and alter memory protection > attributes during boot via EFISTUB to ensure that memory > ranges used by the kernel are writable and executable. > + This option also enables stricter memory attributes > + on compressed kernel PE image. > + > +config EFI_STUB_EXTRACT_DIRECT > + bool "Extract kernel directly from UEFI environment" > + depends on EFI && EFI_STUB && X86_64 > + default y What is the reason for making this configurable? Couldn't we just enable it unconditionally? > + help > + Extract kernel before exiting EFI boot services > + This allows maintaining W^X for kernel image for > + the whole execution of compressed kernel code. > + This also slightly improves efficiency of extraction > + code by removing the need to copy the kernel around > + and rebuild page tables. > > config EFI_PARAMS_FROM_FDT > bool > diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile > index d0537573501e..1cea7d913356 100644 > --- a/drivers/firmware/efi/libstub/Makefile > +++ b/drivers/firmware/efi/libstub/Makefile > @@ -69,6 +69,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o fdt.o string.o \ > lib-$(CONFIG_ARM) += arm32-stub.o > lib-$(CONFIG_ARM64) += arm64-stub.o > lib-$(CONFIG_X86) += x86-stub.o > +lib-$(CONFIG_EFI_STUB_EXTRACT_DIRECT) += x86-extract-direct.o > lib-$(CONFIG_RISCV) += riscv-stub.o > CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) > > diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h > index 22fe28385db7..cdd1bb50c786 100644 > --- a/drivers/firmware/efi/libstub/efistub.h > +++ b/drivers/firmware/efi/libstub/efistub.h > @@ -968,6 +968,11 @@ static inline void > efi_enable_reset_attack_mitigation(void) { } > #endif > > +#ifdef CONFIG_X86 > +unsigned long extract_kernel_direct(struct boot_params *boot_params); > +void startup_32(struct boot_params *boot_params); > +#endif > + Please put this somewhere else > void efi_retrieve_tpm2_eventlog(void); > > #endif > diff --git a/drivers/firmware/efi/libstub/x86-extract-direct.c b/drivers/firmware/efi/libstub/x86-extract-direct.c > new file mode 100644 > index 000000000000..6076bd75cfd6 > --- /dev/null > +++ b/drivers/firmware/efi/libstub/x86-extract-direct.c > @@ -0,0 +1,220 @@ > +// SPDX-License-Identifier: GPL-2.0-only > + > +#include <linux/acpi.h> > +#include <linux/efi.h> > +#include <linux/elf.h> > +#include <linux/stddef.h> > + > +#include <asm/efi.h> > +#include <asm/e820/types.h> > +#include <asm/desc.h> > +#include <asm/boot.h> > +#include <asm/bootparam_utils.h> > +#include <asm/shared/extract.h> > +#include <asm/shared/pgtable.h> > + > +#include "efistub.h" > + > +static void do_puthex(unsigned long value); > +static void do_putstr(const char *msg); > + Can we get rid of these forward declarations? > +static unsigned long do_map_range(unsigned long start, > + unsigned long end, > + unsigned int flags) > +{ > + efi_status_t status; > + > + unsigned long size = end - start; > + > + if (flags & MAP_ALLOC) { > + if (start == (unsigned long)startup_32) > + start = LOAD_PHYSICAL_ADDR; > + > + unsigned long addr; > + > + status = efi_low_alloc_above(size, CONFIG_PHYSICAL_ALIGN, > + &addr, start); > + if (status != EFI_SUCCESS) > + efi_err("Unable to allocate memory for uncompressed kernel"); > + > + if (start != addr) { > + efi_debug("Unable to allocate at given address" > + " (desired=0x%lx, actual=0x%lx)", > + (unsigned long)start, addr); > + start = addr; > + } > + } > + > + if (flags & (MAP_PROTECT | MAP_ALLOC)) { > + unsigned long attr = 0; > + > + if (!(flags & MAP_EXEC)) > + attr |= EFI_MEMORY_XP; > + > + if (!(flags & MAP_WRITE)) > + attr |= EFI_MEMORY_RO; > + > + status = efi_adjust_memory_range_protection(start, > + end - start, > + attr); > + if (status != EFI_SUCCESS) > + efi_err("Unable to protect memory range"); > + } > + > + return start; > +} > + > +/* > + * Trampoline takes 3 pages and can be loaded in first megabyte of memory > + * with its end placed between 0 and 640k where BIOS might start. > + * (see arch/x86/boot/compressed/pgtable_64.c) > + */ > + > +#ifdef CONFIG_64BIT > +static efi_status_t prepare_trampoline(void) > +{ > + efi_status_t status; > + > + status = efi_allocate_pages(TRAMPOLINE_32BIT_SIZE, > + (unsigned long *)&trampoline_32bit, > + TRAMPOLINE_32BIT_PLACEMENT_MAX); > + > + if (status != EFI_SUCCESS) > + return status; > + > + unsigned long trampoline_start = (unsigned long)trampoline_32bit; > + > + memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); > + > + /* First page of trampoline is a top level page table */ > + efi_adjust_memory_range_protection(trampoline_start, > + PAGE_SIZE, > + EFI_MEMORY_XP); > + > + /* Second page of trampoline is the code (with a padding) */ > + > + void *caddr = (void *)trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET; > + > + memcpy(caddr, trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); > + > + efi_adjust_memory_range_protection((unsigned long)caddr, > + PAGE_SIZE, > + EFI_MEMORY_RO); > + > + /* And the last page of trampoline is the stack */ > + > + efi_adjust_memory_range_protection(trampoline_start + 2 * PAGE_SIZE, > + PAGE_SIZE, > + EFI_MEMORY_XP); > + > + return EFI_SUCCESS; > +} > +#else > +static inline efi_status_t prepare_trampoline(void) > +{ > + return EFI_SUCCESS; > +} > +#endif > + > +static efi_status_t init_loader_data(struct boot_params *params) > +{ > + struct efi_info *efi = (void *)¶ms->efi_info; > + efi_status_t status; > + > + unsigned long map_size, desc_size, buff_size; > + u32 desc_ver; > + efi_memory_desc_t *mmap; > + > + struct efi_boot_memmap map = { > + .map = &mmap, > + .map_size = &map_size, > + .desc_size = &desc_size, > + .desc_ver = &desc_ver, > + .key_ptr = NULL, > + .buff_size = &buff_size, > + }; > + > + status = efi_get_memory_map(&map); efi_get_memory_map() has been updated in the mean time, so this needs a rewrite. > + > + if (status != EFI_SUCCESS) { > + efi_err("Unable to get EFI memory map...\n"); > + return status; > + } > + > + const char *signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE > + : EFI32_LOADER_SIGNATURE; > + > + memcpy(&efi->efi_loader_signature, signature, sizeof(__u32)); > + > + efi->efi_memdesc_size = desc_size; > + efi->efi_memdesc_version = desc_ver; > + efi->efi_memmap_size = map_size; > + > + efi_set_u64_split((unsigned long)mmap, > + &efi->efi_memmap, &efi->efi_memmap_hi); > + > + efi_set_u64_split((unsigned long)efi_system_table, > + &efi->efi_systab, &efi->efi_systab_hi); > + > + return EFI_SUCCESS; > +} > + > +static void free_loader_data(struct boot_params *params) > +{ > + struct efi_info *efi = (void *)¶ms->efi_info; > + unsigned long mmap = efi->efi_memmap; > + > +#ifdef CONFIG_64BIT > + mmap |= ((unsigned long)efi->efi_memmap_hi << 32); > +#endif > + > + efi_bs_call(free_pool, (void *)mmap); > + > + efi->efi_memdesc_size = 0; > + efi->efi_memdesc_version = 0; > + efi->efi_memmap_size = 0; > + efi_set_u64_split(0, &efi->efi_memmap, &efi->efi_memmap_hi); > +} > + > +unsigned long extract_kernel_direct(struct boot_params *params) > +{ > + > + extern unsigned char input_data[]; > + extern unsigned int output_len, input_len; > + > + void *res; > + efi_status_t status; > + struct efi_iofunc iof = { 0 }; > + > + status = prepare_trampoline(); > + > + if (status != EFI_SUCCESS) > + return 0; > + > + /* Prepare environment for do_extract_kernel() call */ > + status = init_loader_data(params); > + > + if (status != EFI_SUCCESS) > + return 0; > + > + iof.puthex = do_puthex; > + iof.putstr = do_putstr; > + iof.map_range = do_map_range; > + > + res = efi_extract_kernel(params, &iof, input_data, input_len, > + (unsigned char *)startup_32, output_len); > + > + free_loader_data(params); > + > + return (unsigned long)res; > +} > + > +static void do_puthex(unsigned long value) > +{ > + efi_printk("%08lx", value); > +} > + > +static void do_putstr(const char *msg) > +{ > + efi_printk("%s", msg); > +} > diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c > index 678f9c2ccafc..680184034cb7 100644 > --- a/drivers/firmware/efi/libstub/x86-stub.c > +++ b/drivers/firmware/efi/libstub/x86-stub.c > @@ -230,26 +230,25 @@ static void > setup_memory_protection(unsigned long image_base, unsigned long image_size) > { > /* > - * Allow execution of possible trampoline used > - * for switching between 4- and 5-level page tables > - * and relocated kernel image. > - */ > + * Allow execution of possible trampoline used > + * for switching between 4- and 5-level page tables > + * and relocated kernel image. > + */ > Drop this hunk please > efi_adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE, > TRAMPOLINE_PLACEMENT_SIZE, 0); > > #ifdef CONFIG_64BIT > - if (image_base != (unsigned long)startup_32) > - efi_adjust_memory_range_protection(image_base, image_size, 0); > + efi_adjust_memory_range_protection(image_base, image_size, 0); > #else > /* > - * Clear protection flags on a whole range of possible > - * addresses used for KASLR. We don't need to do that > - * on x86_64, since KASLR/extraction is performed after > - * dedicated identity page tables are built and we only > - * need to remove possible protection on relocated image > - * itself disregarding further relocations. > - */ > + * Clear protection flags on a whole range of possible > + * addresses used for KASLR. We don't need to do that > + * on x86_64, since KASLR/extraction is performed after > + * dedicated identity page tables are built and we only > + * need to remove possible protection on relocated image > + * itself disregarding further relocations. > + */ Drop this hunk please > efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, > KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR, > 0); > @@ -270,8 +269,10 @@ static void setup_quirks(struct boot_params *boot_params, > retrieve_apple_device_properties(boot_params); > } > > - if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) > + if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES) && > + !IS_ENABLED(CONFIG_EFI_STUB_EXTRACT_DIRECT)) { > setup_memory_protection(image_base, image_size); > + } > } > > /* > @@ -710,8 +711,10 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) > } > > /* > - * On success, we return the address of startup_32, which has potentially been > - * relocated by efi_relocate_kernel. > + * On success, we return: > + * - the address of startup_32, which has potentially been > + * relocated by efi_relocate_kernel, if libstub direct extraction is disabled. > + * - extracted kernel entry point if libstub direct extraction is enabled. > * On failure, we exit to the firmware via efi_exit instead of returning. > */ > unsigned long efi_main(efi_handle_t handle, > @@ -736,6 +739,7 @@ unsigned long efi_main(efi_handle_t handle, > efi_dxe_table = NULL; > } > > +#ifndef CONFIG_EFI_STUB_EXTRACT_DIRECT > /* > * If the kernel isn't already loaded at a suitable address, > * relocate it. > @@ -789,6 +793,7 @@ unsigned long efi_main(efi_handle_t handle, > */ > image_offset = 0; > } > +#endif > > #ifdef CONFIG_CMDLINE_BOOL > status = efi_parse_options(CONFIG_CMDLINE); > @@ -845,7 +850,13 @@ unsigned long efi_main(efi_handle_t handle, > > setup_efi_pci(boot_params); > > - setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start); > + setup_quirks(boot_params, buffer_start, buffer_end - buffer_start); > + > +#ifdef CONFIG_EFI_STUB_EXTRACT_DIRECT > + bzimage_addr = extract_kernel_direct(boot_params); > + if (!bzimage_addr) > + goto fail; > +#endif > > status = exit_boot(boot_params, handle); > if (status != EFI_SUCCESS) { > -- > 2.35.1 >
On 2022-10-19 10:35, Ard Biesheuvel wrote: > On Tue, 6 Sept 2022 at 12:42, Evgeniy Baskov <baskov@ispras.ru> wrote: >> >> Doing it that way allows setting up stricter memory attributes, >> simplifies boot code path and removes potential relocation >> of kernel image. >> >> Wire up required interfaces and minimally initialize zero page >> fields needed for it to function correctly. >> >> Signed-off-by: Evgeniy Baskov <baskov@ispras.ru> >> >> create mode 100644 arch/x86/include/asm/shared/extract.h >> create mode 100644 drivers/firmware/efi/libstub/x86-extract-direct.c >> --- >> arch/x86/boot/compressed/head_32.S | 6 +- >> arch/x86/boot/compressed/head_64.S | 45 ++++ >> arch/x86/include/asm/shared/extract.h | 25 ++ >> drivers/firmware/efi/Kconfig | 14 ++ >> drivers/firmware/efi/libstub/Makefile | 1 + >> drivers/firmware/efi/libstub/efistub.h | 5 + >> .../firmware/efi/libstub/x86-extract-direct.c | 220 >> ++++++++++++++++++ >> drivers/firmware/efi/libstub/x86-stub.c | 45 ++-- >> 8 files changed, 343 insertions(+), 18 deletions(-) >> create mode 100644 arch/x86/include/asm/shared/extract.h >> create mode 100644 drivers/firmware/efi/libstub/x86-extract-direct.c >> >> diff --git a/arch/x86/boot/compressed/head_32.S >> b/arch/x86/boot/compressed/head_32.S >> index b46a1c4109cf..d2866f06bc9f 100644 >> --- a/arch/x86/boot/compressed/head_32.S >> +++ b/arch/x86/boot/compressed/head_32.S >> @@ -155,7 +155,11 @@ SYM_FUNC_START(efi32_stub_entry) >> add $0x4, %esp >> movl 8(%esp), %esi /* save boot_params pointer */ >> call efi_main >> - /* efi_main returns the possibly relocated address of >> startup_32 */ >> + >> + /* >> + * efi_main returns the possibly >> + * relocated address of exteracted kernel entry point. > > extracted Thanks, will fix. > ... >> diff --git a/arch/x86/include/asm/shared/extract.h >> b/arch/x86/include/asm/shared/extract.h >> new file mode 100644 >> index 000000000000..163678145884 >> --- /dev/null >> +++ b/arch/x86/include/asm/shared/extract.h >> @@ -0,0 +1,25 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +#ifndef ASM_SHARED_EXTRACT_H >> +#define ASM_SHARED_EXTRACT_H >> + >> +#define MAP_WRITE 0x02 /* Writable memory */ >> +#define MAP_EXEC 0x04 /* Executable memory */ >> +#define MAP_ALLOC 0x10 /* Range needs to be allocated */ >> +#define MAP_PROTECT 0x20 /* Set exact memory attributes for memory >> range */ >> + >> +struct efi_iofunc { >> + void (*putstr)(const char *msg); >> + void (*puthex)(unsigned long x); >> + unsigned long (*map_range)(unsigned long start, >> + unsigned long end, >> + unsigned int flags); > > This looks a bit random - having a map_range() routine as a member of > the console I/O struct. Can we make this abstraction a bit more > natural? Hmm, I can either change the name of this stucture to something more generic (like efi_extract_callbacks) or split map_range separately as a separate function argument. (Renaming seems simpler, so I will do that for now.) >> +}; >> + >> +void *efi_extract_kernel(struct boot_params *rmode, >> + struct efi_iofunc *iofunc, >> + unsigned char *input_data, >> + unsigned long input_len, >> + unsigned char *output, >> + unsigned long output_len); >> + >> +#endif /* ASM_SHARED_EXTRACT_H */ >> diff --git a/drivers/firmware/efi/Kconfig >> b/drivers/firmware/efi/Kconfig >> index 6cb7384ad2ac..2418402a0bda 100644 >> --- a/drivers/firmware/efi/Kconfig >> +++ b/drivers/firmware/efi/Kconfig >> @@ -91,6 +91,20 @@ config EFI_DXE_MEM_ATTRIBUTES >> Use DXE services to check and alter memory protection >> attributes during boot via EFISTUB to ensure that memory >> ranges used by the kernel are writable and executable. >> + This option also enables stricter memory attributes >> + on compressed kernel PE image. >> + >> +config EFI_STUB_EXTRACT_DIRECT >> + bool "Extract kernel directly from UEFI environment" >> + depends on EFI && EFI_STUB && X86_64 >> + default y > > What is the reason for making this configurable? Couldn't we just > enable it unconditionally? > When I first implemented it it was too hackish, but now it seems OK, so I can make it unconditional, and it will make things simpler in several places. Although making it work on x86_32 will require some additional work. Also kernel with EFI_STUB_EXTRACT_DIRECT disabled breaks boot process with Mu firmware when W^X enabled, as pointed out by Peter. So, I guess, I will just remove the switch. >> >> +#ifdef CONFIG_X86 >> +unsigned long extract_kernel_direct(struct boot_params *boot_params); >> +void startup_32(struct boot_params *boot_params); >> +#endif >> + > > Please put this somewhere else > Will adding little x86-specific header file for these be appropriate? >> + >> +#include "efistub.h" >> + >> +static void do_puthex(unsigned long value); >> +static void do_putstr(const char *msg); >> + > > Can we get rid of these forward declarations? > Yes, I will move those functions here and remove declarations. ... >> + /* First page of trampoline is a top level page table */ >> + efi_adjust_memory_range_protection(trampoline_start, >> + PAGE_SIZE, >> + EFI_MEMORY_XP); >> + >> + /* Second page of trampoline is the code (with a padding) */ >> + status = efi_get_memory_map(&map); > > efi_get_memory_map() has been updated in the mean time, so this needs a > rewrite. Yep, it needs a rebase now. > ... >> setup_memory_protection(unsigned long image_base, unsigned long >> image_size) >> { >> /* >> - * Allow execution of possible trampoline used >> - * for switching between 4- and 5-level page tables >> - * and relocated kernel image. >> - */ >> + * Allow execution of possible trampoline used >> + * for switching between 4- and 5-level page tables >> + * and relocated kernel image. >> + */ >> > > Drop this hunk please That was unintentional, thanks.
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index b46a1c4109cf..d2866f06bc9f 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -155,7 +155,11 @@ SYM_FUNC_START(efi32_stub_entry) add $0x4, %esp movl 8(%esp), %esi /* save boot_params pointer */ call efi_main - /* efi_main returns the possibly relocated address of startup_32 */ + + /* + * efi_main returns the possibly + * relocated address of exteracted kernel entry point. + */ jmp *%eax SYM_FUNC_END(efi32_stub_entry) SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 37ce094571b5..b6bae8e7ee71 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -555,9 +555,54 @@ SYM_FUNC_START(efi64_stub_entry) and $~0xf, %rsp /* realign the stack */ movq %rdx, %rbx /* save boot_params pointer */ call efi_main + +#ifdef CONFIG_EFI_STUB_EXTRACT_DIRECT + cld + cli + + movq %rbx, %rdi /* boot_params */ + movq %rax, %rsi /* decompressed kernel address */ + + /* Make sure we have GDT with 32-bit code segment */ + leaq gdt64(%rip), %rax + addq %rax, 2(%rax) + lgdt (%rax) + + /* Setup data segments. */ + xorl %eax, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + movl %eax, %fs + movl %eax, %gs + + pushq %rsi + pushq %rdi + + call startup32_enable_nx_if_supported + + call trampoline_pgtable_init + movq %rax, %rdx + + + /* Swap %rsi and %rsi */ + popq %rsi + popq %rdi + + /* Save the trampoline address in RCX */ + movq trampoline_32bit(%rip), %rcx + + /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ + pushq $__KERNEL32_CS + leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rcx), %rax + pushq %rax + lretq +#else movq %rbx,%rsi leaq rva(startup_64)(%rax), %rax jmp *%rax +#endif + SYM_FUNC_END(efi64_stub_entry) SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) #endif diff --git a/arch/x86/include/asm/shared/extract.h b/arch/x86/include/asm/shared/extract.h new file mode 100644 index 000000000000..163678145884 --- /dev/null +++ b/arch/x86/include/asm/shared/extract.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_SHARED_EXTRACT_H +#define ASM_SHARED_EXTRACT_H + +#define MAP_WRITE 0x02 /* Writable memory */ +#define MAP_EXEC 0x04 /* Executable memory */ +#define MAP_ALLOC 0x10 /* Range needs to be allocated */ +#define MAP_PROTECT 0x20 /* Set exact memory attributes for memory range */ + +struct efi_iofunc { + void (*putstr)(const char *msg); + void (*puthex)(unsigned long x); + unsigned long (*map_range)(unsigned long start, + unsigned long end, + unsigned int flags); +}; + +void *efi_extract_kernel(struct boot_params *rmode, + struct efi_iofunc *iofunc, + unsigned char *input_data, + unsigned long input_len, + unsigned char *output, + unsigned long output_len); + +#endif /* ASM_SHARED_EXTRACT_H */ diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 6cb7384ad2ac..2418402a0bda 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -91,6 +91,20 @@ config EFI_DXE_MEM_ATTRIBUTES Use DXE services to check and alter memory protection attributes during boot via EFISTUB to ensure that memory ranges used by the kernel are writable and executable. + This option also enables stricter memory attributes + on compressed kernel PE image. + +config EFI_STUB_EXTRACT_DIRECT + bool "Extract kernel directly from UEFI environment" + depends on EFI && EFI_STUB && X86_64 + default y + help + Extract kernel before exiting EFI boot services + This allows maintaining W^X for kernel image for + the whole execution of compressed kernel code. + This also slightly improves efficiency of extraction + code by removing the need to copy the kernel around + and rebuild page tables. config EFI_PARAMS_FROM_FDT bool diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index d0537573501e..1cea7d913356 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -69,6 +69,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o fdt.o string.o \ lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM64) += arm64-stub.o lib-$(CONFIG_X86) += x86-stub.o +lib-$(CONFIG_EFI_STUB_EXTRACT_DIRECT) += x86-extract-direct.o lib-$(CONFIG_RISCV) += riscv-stub.o CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 22fe28385db7..cdd1bb50c786 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -968,6 +968,11 @@ static inline void efi_enable_reset_attack_mitigation(void) { } #endif +#ifdef CONFIG_X86 +unsigned long extract_kernel_direct(struct boot_params *boot_params); +void startup_32(struct boot_params *boot_params); +#endif + void efi_retrieve_tpm2_eventlog(void); #endif diff --git a/drivers/firmware/efi/libstub/x86-extract-direct.c b/drivers/firmware/efi/libstub/x86-extract-direct.c new file mode 100644 index 000000000000..6076bd75cfd6 --- /dev/null +++ b/drivers/firmware/efi/libstub/x86-extract-direct.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/acpi.h> +#include <linux/efi.h> +#include <linux/elf.h> +#include <linux/stddef.h> + +#include <asm/efi.h> +#include <asm/e820/types.h> +#include <asm/desc.h> +#include <asm/boot.h> +#include <asm/bootparam_utils.h> +#include <asm/shared/extract.h> +#include <asm/shared/pgtable.h> + +#include "efistub.h" + +static void do_puthex(unsigned long value); +static void do_putstr(const char *msg); + +static unsigned long do_map_range(unsigned long start, + unsigned long end, + unsigned int flags) +{ + efi_status_t status; + + unsigned long size = end - start; + + if (flags & MAP_ALLOC) { + if (start == (unsigned long)startup_32) + start = LOAD_PHYSICAL_ADDR; + + unsigned long addr; + + status = efi_low_alloc_above(size, CONFIG_PHYSICAL_ALIGN, + &addr, start); + if (status != EFI_SUCCESS) + efi_err("Unable to allocate memory for uncompressed kernel"); + + if (start != addr) { + efi_debug("Unable to allocate at given address" + " (desired=0x%lx, actual=0x%lx)", + (unsigned long)start, addr); + start = addr; + } + } + + if (flags & (MAP_PROTECT | MAP_ALLOC)) { + unsigned long attr = 0; + + if (!(flags & MAP_EXEC)) + attr |= EFI_MEMORY_XP; + + if (!(flags & MAP_WRITE)) + attr |= EFI_MEMORY_RO; + + status = efi_adjust_memory_range_protection(start, + end - start, + attr); + if (status != EFI_SUCCESS) + efi_err("Unable to protect memory range"); + } + + return start; +} + +/* + * Trampoline takes 3 pages and can be loaded in first megabyte of memory + * with its end placed between 0 and 640k where BIOS might start. + * (see arch/x86/boot/compressed/pgtable_64.c) + */ + +#ifdef CONFIG_64BIT +static efi_status_t prepare_trampoline(void) +{ + efi_status_t status; + + status = efi_allocate_pages(TRAMPOLINE_32BIT_SIZE, + (unsigned long *)&trampoline_32bit, + TRAMPOLINE_32BIT_PLACEMENT_MAX); + + if (status != EFI_SUCCESS) + return status; + + unsigned long trampoline_start = (unsigned long)trampoline_32bit; + + memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); + + /* First page of trampoline is a top level page table */ + efi_adjust_memory_range_protection(trampoline_start, + PAGE_SIZE, + EFI_MEMORY_XP); + + /* Second page of trampoline is the code (with a padding) */ + + void *caddr = (void *)trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET; + + memcpy(caddr, trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); + + efi_adjust_memory_range_protection((unsigned long)caddr, + PAGE_SIZE, + EFI_MEMORY_RO); + + /* And the last page of trampoline is the stack */ + + efi_adjust_memory_range_protection(trampoline_start + 2 * PAGE_SIZE, + PAGE_SIZE, + EFI_MEMORY_XP); + + return EFI_SUCCESS; +} +#else +static inline efi_status_t prepare_trampoline(void) +{ + return EFI_SUCCESS; +} +#endif + +static efi_status_t init_loader_data(struct boot_params *params) +{ + struct efi_info *efi = (void *)¶ms->efi_info; + efi_status_t status; + + unsigned long map_size, desc_size, buff_size; + u32 desc_ver; + efi_memory_desc_t *mmap; + + struct efi_boot_memmap map = { + .map = &mmap, + .map_size = &map_size, + .desc_size = &desc_size, + .desc_ver = &desc_ver, + .key_ptr = NULL, + .buff_size = &buff_size, + }; + + status = efi_get_memory_map(&map); + + if (status != EFI_SUCCESS) { + efi_err("Unable to get EFI memory map...\n"); + return status; + } + + const char *signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE + : EFI32_LOADER_SIGNATURE; + + memcpy(&efi->efi_loader_signature, signature, sizeof(__u32)); + + efi->efi_memdesc_size = desc_size; + efi->efi_memdesc_version = desc_ver; + efi->efi_memmap_size = map_size; + + efi_set_u64_split((unsigned long)mmap, + &efi->efi_memmap, &efi->efi_memmap_hi); + + efi_set_u64_split((unsigned long)efi_system_table, + &efi->efi_systab, &efi->efi_systab_hi); + + return EFI_SUCCESS; +} + +static void free_loader_data(struct boot_params *params) +{ + struct efi_info *efi = (void *)¶ms->efi_info; + unsigned long mmap = efi->efi_memmap; + +#ifdef CONFIG_64BIT + mmap |= ((unsigned long)efi->efi_memmap_hi << 32); +#endif + + efi_bs_call(free_pool, (void *)mmap); + + efi->efi_memdesc_size = 0; + efi->efi_memdesc_version = 0; + efi->efi_memmap_size = 0; + efi_set_u64_split(0, &efi->efi_memmap, &efi->efi_memmap_hi); +} + +unsigned long extract_kernel_direct(struct boot_params *params) +{ + + extern unsigned char input_data[]; + extern unsigned int output_len, input_len; + + void *res; + efi_status_t status; + struct efi_iofunc iof = { 0 }; + + status = prepare_trampoline(); + + if (status != EFI_SUCCESS) + return 0; + + /* Prepare environment for do_extract_kernel() call */ + status = init_loader_data(params); + + if (status != EFI_SUCCESS) + return 0; + + iof.puthex = do_puthex; + iof.putstr = do_putstr; + iof.map_range = do_map_range; + + res = efi_extract_kernel(params, &iof, input_data, input_len, + (unsigned char *)startup_32, output_len); + + free_loader_data(params); + + return (unsigned long)res; +} + +static void do_puthex(unsigned long value) +{ + efi_printk("%08lx", value); +} + +static void do_putstr(const char *msg) +{ + efi_printk("%s", msg); +} diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 678f9c2ccafc..680184034cb7 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -230,26 +230,25 @@ static void setup_memory_protection(unsigned long image_base, unsigned long image_size) { /* - * Allow execution of possible trampoline used - * for switching between 4- and 5-level page tables - * and relocated kernel image. - */ + * Allow execution of possible trampoline used + * for switching between 4- and 5-level page tables + * and relocated kernel image. + */ efi_adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE, TRAMPOLINE_PLACEMENT_SIZE, 0); #ifdef CONFIG_64BIT - if (image_base != (unsigned long)startup_32) - efi_adjust_memory_range_protection(image_base, image_size, 0); + efi_adjust_memory_range_protection(image_base, image_size, 0); #else /* - * Clear protection flags on a whole range of possible - * addresses used for KASLR. We don't need to do that - * on x86_64, since KASLR/extraction is performed after - * dedicated identity page tables are built and we only - * need to remove possible protection on relocated image - * itself disregarding further relocations. - */ + * Clear protection flags on a whole range of possible + * addresses used for KASLR. We don't need to do that + * on x86_64, since KASLR/extraction is performed after + * dedicated identity page tables are built and we only + * need to remove possible protection on relocated image + * itself disregarding further relocations. + */ efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR, 0); @@ -270,8 +269,10 @@ static void setup_quirks(struct boot_params *boot_params, retrieve_apple_device_properties(boot_params); } - if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) + if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES) && + !IS_ENABLED(CONFIG_EFI_STUB_EXTRACT_DIRECT)) { setup_memory_protection(image_base, image_size); + } } /* @@ -710,8 +711,10 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) } /* - * On success, we return the address of startup_32, which has potentially been - * relocated by efi_relocate_kernel. + * On success, we return: + * - the address of startup_32, which has potentially been + * relocated by efi_relocate_kernel, if libstub direct extraction is disabled. + * - extracted kernel entry point if libstub direct extraction is enabled. * On failure, we exit to the firmware via efi_exit instead of returning. */ unsigned long efi_main(efi_handle_t handle, @@ -736,6 +739,7 @@ unsigned long efi_main(efi_handle_t handle, efi_dxe_table = NULL; } +#ifndef CONFIG_EFI_STUB_EXTRACT_DIRECT /* * If the kernel isn't already loaded at a suitable address, * relocate it. @@ -789,6 +793,7 @@ unsigned long efi_main(efi_handle_t handle, */ image_offset = 0; } +#endif #ifdef CONFIG_CMDLINE_BOOL status = efi_parse_options(CONFIG_CMDLINE); @@ -845,7 +850,13 @@ unsigned long efi_main(efi_handle_t handle, setup_efi_pci(boot_params); - setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start); + setup_quirks(boot_params, buffer_start, buffer_end - buffer_start); + +#ifdef CONFIG_EFI_STUB_EXTRACT_DIRECT + bzimage_addr = extract_kernel_direct(boot_params); + if (!bzimage_addr) + goto fail; +#endif status = exit_boot(boot_params, handle); if (status != EFI_SUCCESS) {
Doing it that way allows setting up stricter memory attributes, simplifies boot code path and removes potential relocation of kernel image. Wire up required interfaces and minimally initialize zero page fields needed for it to function correctly. Signed-off-by: Evgeniy Baskov <baskov@ispras.ru> create mode 100644 arch/x86/include/asm/shared/extract.h create mode 100644 drivers/firmware/efi/libstub/x86-extract-direct.c --- arch/x86/boot/compressed/head_32.S | 6 +- arch/x86/boot/compressed/head_64.S | 45 ++++ arch/x86/include/asm/shared/extract.h | 25 ++ drivers/firmware/efi/Kconfig | 14 ++ drivers/firmware/efi/libstub/Makefile | 1 + drivers/firmware/efi/libstub/efistub.h | 5 + .../firmware/efi/libstub/x86-extract-direct.c | 220 ++++++++++++++++++ drivers/firmware/efi/libstub/x86-stub.c | 45 ++-- 8 files changed, 343 insertions(+), 18 deletions(-) create mode 100644 arch/x86/include/asm/shared/extract.h create mode 100644 drivers/firmware/efi/libstub/x86-extract-direct.c