diff mbox series

[RFC,27/28] x86/kernel: Switch to PIE linking for the core kernel

Message ID 20240925150059.3955569-57-ardb+git@google.com
State New
Headers show
Series [RFC,01/28] x86/pvh: Call C code via the kernel virtual mapping | expand

Commit Message

Ard Biesheuvel Sept. 25, 2024, 3:01 p.m. UTC
From: Ard Biesheuvel <ardb@kernel.org>

Build the kernel as a Position Independent Executable (PIE). This
results in more efficient relocation processing for the virtual
displacement of the kernel (for KASLR). More importantly, it instructs
the linker to generate what is actually needed (a program that can be
moved around in memory before execution), which is better than having to
rely on the linker to create a position dependent binary that happens to
tolerate being moved around after poking it in exactly the right manner.

Note that this means that all codegen should be compatible with PIE,
including Rust objects, so this needs to switch to the small code model
with the PIE relocation model as well.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/Kconfig                        |  2 +-
 arch/x86/Makefile                       | 11 +++++++----
 arch/x86/boot/compressed/misc.c         |  2 ++
 arch/x86/kernel/vmlinux.lds.S           |  5 +++++
 drivers/firmware/efi/libstub/x86-stub.c |  2 ++
 5 files changed, 17 insertions(+), 5 deletions(-)

Comments

Ard Biesheuvel Sept. 26, 2024, 1:38 p.m. UTC | #1
On Wed, 25 Sept 2024 at 22:25, Vegard Nossum <vegard.nossum@oracle.com> wrote:
>
>
> On 25/09/2024 17:01, Ard Biesheuvel wrote:
> > From: Ard Biesheuvel <ardb@kernel.org>
> >
> > Build the kernel as a Position Independent Executable (PIE). This
> > results in more efficient relocation processing for the virtual
> > displacement of the kernel (for KASLR). More importantly, it instructs
> > the linker to generate what is actually needed (a program that can be
> > moved around in memory before execution), which is better than having to
> > rely on the linker to create a position dependent binary that happens to
> > tolerate being moved around after poking it in exactly the right manner.
> >
> > Note that this means that all codegen should be compatible with PIE,
> > including Rust objects, so this needs to switch to the small code model
> > with the PIE relocation model as well.
> >
> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > ---
> >   arch/x86/Kconfig                        |  2 +-
> >   arch/x86/Makefile                       | 11 +++++++----
> >   arch/x86/boot/compressed/misc.c         |  2 ++
> >   arch/x86/kernel/vmlinux.lds.S           |  5 +++++
> >   drivers/firmware/efi/libstub/x86-stub.c |  2 ++
> >   5 files changed, 17 insertions(+), 5 deletions(-)
> >
...
>
> This patch causes a build failure here (on 64-bit):
>
>    LD      .tmp_vmlinux2
>    NM      .tmp_vmlinux2.syms
>    KSYMS   .tmp_vmlinux2.kallsyms.S
>    AS      .tmp_vmlinux2.kallsyms.o
>    LD      vmlinux
>    BTFIDS  vmlinux
> WARN: resolve_btfids: unresolved symbol bpf_lsm_key_free
> FAILED elf_update(WRITE): invalid section entry size
> make[5]: *** [scripts/Makefile.vmlinux:34: vmlinux] Error 255
> make[5]: *** Deleting file 'vmlinux'
> make[4]: *** [Makefile:1153: vmlinux] Error 2
> make[3]: *** [debian/rules:74: build-arch] Error 2
> dpkg-buildpackage: error: make -f debian/rules binary subprocess
> returned exit status 2
> make[2]: *** [scripts/Makefile.package:121: bindeb-pkg] Error 2
> make[1]: *** [/home/opc/linux-mainline-worktree2/Makefile:1544:
> bindeb-pkg] Error 2
> make: *** [Makefile:224: __sub-make] Error 2
>
> The parent commit builds fine. With V=1:
>
> + ldflags='-m elf_x86_64 -z noexecstack --pie -z text -z
> call-nop=suffix-nop -z max-page-size=0x200000 --build-id=sha1
> --orphan-handling=warn --script=./arch/x86/kernel/vmlinux.lds
> -Map=vmlinux.map'
> + ld -m elf_x86_64 -z noexecstack --pie -z text -z call-nop=suffix-nop
> -z max-page-size=0x200000 --build-id=sha1 --orphan-handling=warn
> --script=./arch/x86/kernel/vmlinux.lds -Map=vmlinux.map -o vmlinux
> --whole-archive vmlinux.a .vmlinux.export.o init/version-timestamp.o
> --no-whole-archive --start-group --end-group .tmp_vmlinux2.kallsyms.o
> .tmp_vmlinux1.btf.o
> + is_enabled CONFIG_DEBUG_INFO_BTF
> + grep -q '^CONFIG_DEBUG_INFO_BTF=y' include/config/auto.conf
> + info BTFIDS vmlinux
> + printf '  %-7s %s\n' BTFIDS vmlinux
>    BTFIDS  vmlinux
> + ./tools/bpf/resolve_btfids/resolve_btfids vmlinux
> WARN: resolve_btfids: unresolved symbol bpf_lsm_key_free
> FAILED elf_update(WRITE): invalid section entry size
>
> I can send the full config off-list if necessary, but looks like it
> might be enough to set CONFIG_DEBUG_INFO_BTF=y.
>

Thanks for the report. Turns out that adding the GOT to .rodata bumps
the section's sh_entsize to 8, and libelf complains if the section
size is not a multiple of the entry size.

I'll include a fix in the next revision.
diff mbox series

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 54cb1f14218b..dbb4d284b0e1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2187,7 +2187,7 @@  config RANDOMIZE_BASE
 # Relocation on x86 needs some additional build support
 config X86_NEED_RELOCS
 	def_bool y
-	depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE)
+	depends on X86_32 && RELOCATABLE
 
 config PHYSICAL_ALIGN
 	hex "Alignment value to which kernel should be aligned"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 83d20f402535..c1dcff444bc8 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -206,9 +206,8 @@  else
                 PIE_CFLAGS-$(CONFIG_SMP) += -mstack-protector-guard-reg=gs
         endif
 
-        # Don't emit relaxable GOTPCREL relocations
-        KBUILD_AFLAGS_KERNEL += -Wa,-mrelax-relocations=no
-        KBUILD_CFLAGS_KERNEL += -Wa,-mrelax-relocations=no $(PIE_CFLAGS-y)
+        KBUILD_CFLAGS_KERNEL	+= $(PIE_CFLAGS-y)
+        KBUILD_RUSTFLAGS_KERNEL	+= -Ccode-model=small -Crelocation-model=pie
 endif
 
 #
@@ -264,12 +263,16 @@  else
 LDFLAGS_vmlinux :=
 endif
 
+ifdef CONFIG_X86_64
+ldflags-pie-$(CONFIG_LD_IS_LLD)	:= --apply-dynamic-relocs
+ldflags-pie-$(CONFIG_LD_IS_BFD)	:= -z call-nop=suffix-nop
+LDFLAGS_vmlinux			+= --pie -z text $(ldflags-pie-y)
+
 #
 # The 64-bit kernel must be aligned to 2MB.  Pass -z max-page-size=0x200000 to
 # the linker to force 2MB page size regardless of the default page size used
 # by the linker.
 #
-ifdef CONFIG_X86_64
 LDFLAGS_vmlinux += -z max-page-size=0x200000
 endif
 
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 89f01375cdb7..79e3ffe16f61 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -495,6 +495,8 @@  asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
 		error("Destination virtual address changed when not relocatable");
 #endif
 
+	boot_params_ptr->kaslr_va_shift = virt_addr - LOAD_PHYSICAL_ADDR;
+
 	debug_putstr("\nDecompressing Linux... ");
 
 	if (init_unaccepted_memory()) {
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index f7e832c2ac61..d172e6e8eaaf 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -459,6 +459,11 @@  xen_elfnote_phys32_entry_offset =
 
 	DISCARDS
 
+	/DISCARD/ : {
+		*(.dynsym .gnu.hash .hash .dynamic .dynstr)
+		*(.interp .dynbss .eh_frame .sframe)
+	}
+
 	/*
 	 * Make sure that the .got.plt is either completely empty or it
 	 * contains only the lazy dispatch entries.
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index f8e465da344d..5c03954924fe 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -912,6 +912,8 @@  static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
 	if (status != EFI_SUCCESS)
 		return status;
 
+	boot_params_ptr->kaslr_va_shift = virt_addr - LOAD_PHYSICAL_ADDR;
+
 	entry = decompress_kernel((void *)addr, virt_addr, error);
 	if (entry == ULONG_MAX) {
 		efi_free(alloc_size, addr);