Message ID | 20221028150112.2883620-1-ardb@kernel.org |
---|---|
State | Accepted |
Commit | 23715a26c8d812912a70c6ac1ce67af649b95914 |
Headers | show |
Series | arm64: efi: Recover from synchronous exceptions occurring in firmware | expand |
On Fri, 28 Oct 2022 at 17:01, Ard Biesheuvel <ardb@kernel.org> wrote: > > Unlike x86, which has machinery to deal with page faults that occur > during the execution of EFI runtime services, arm64 has nothing like > that, and a synchronous exception raised by firmware code brings down > the whole system. > > With more EFI based systems appearing that were not built to run Linux > (such as the Windows-on-ARM laptops based on Qualcomm SOCs), as well as > the introduction of PRM (platform specific firmware routines that are > callable just like EFI runtime services), we are more likely to run into > issues of this sort, and it is much more likely that we can identify and > work around such issues if they don't bring down the system entirely. > > Since we already use a EFI runtime services call wrapper in assembler, > we can quite easily add some code that captures the execution state at > the point where the call is made, allowing us to revert to this state > and proceed execution if the call triggered a synchronous exception. > > Given that the kernel and the firmware don't share any data structures > that could end up in an indeterminate state, we can happily continue > running, as long as we mark the EFI runtime services as unavailable from > that point on. > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Does anyone mind if I take this via the EFI tree for v6.1? > --- > arch/arm64/include/asm/efi.h | 8 ++++++++ > arch/arm64/kernel/efi-rt-wrapper.S | 33 ++++++++++++++++++++++++++++-- > arch/arm64/kernel/efi.c | 26 +++++++++++++++++++++++ > arch/arm64/mm/fault.c | 4 ++++ > 4 files changed, 69 insertions(+), 2 deletions(-) > > diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h > index 439e2bc5d5d8..d6cf535d8352 100644 > --- a/arch/arm64/include/asm/efi.h > +++ b/arch/arm64/include/asm/efi.h > @@ -14,8 +14,16 @@ > > #ifdef CONFIG_EFI > extern void efi_init(void); > + > +bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg); > #else > #define efi_init() > + > +static inline > +bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) > +{ > + return false; > +} > #endif > > int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); > diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S > index 75691a2641c1..67babd5f04c2 100644 > --- a/arch/arm64/kernel/efi-rt-wrapper.S > +++ b/arch/arm64/kernel/efi-rt-wrapper.S > @@ -6,7 +6,7 @@ > #include <linux/linkage.h> > > SYM_FUNC_START(__efi_rt_asm_wrapper) > - stp x29, x30, [sp, #-32]! > + stp x29, x30, [sp, #-112]! > mov x29, sp > > /* > @@ -16,6 +16,20 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) > */ > stp x1, x18, [sp, #16] > > + /* > + * Preserve all callee saved registers and record the stack pointer > + * value in a per-CPU variable so we can recover from synchronous > + * exceptions occurring while running the firmware routines. > + */ > + stp x19, x20, [sp, #32] > + stp x21, x22, [sp, #48] > + stp x23, x24, [sp, #64] > + stp x25, x26, [sp, #80] > + stp x27, x28, [sp, #96] > + > + adr_this_cpu x8, __efi_rt_asm_recover_sp, x9 > + str x29, [x8] > + > /* > * We are lucky enough that no EFI runtime services take more than > * 5 arguments, so all are passed in registers rather than via the > @@ -31,7 +45,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) > > ldp x1, x2, [sp, #16] > cmp x2, x18 > - ldp x29, x30, [sp], #32 > + ldp x29, x30, [sp], #112 > b.ne 0f > ret > 0: > @@ -45,3 +59,18 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) > mov x18, x2 > b efi_handle_corrupted_x18 // tail call > SYM_FUNC_END(__efi_rt_asm_wrapper) > + > +SYM_FUNC_START(__efi_rt_asm_recover) > + ldr_this_cpu x8, __efi_rt_asm_recover_sp, x9 > + mov sp, x8 > + > + ldp x0, x18, [sp, #16] > + ldp x19, x20, [sp, #32] > + ldp x21, x22, [sp, #48] > + ldp x23, x24, [sp, #64] > + ldp x25, x26, [sp, #80] > + ldp x27, x28, [sp, #96] > + ldp x29, x30, [sp], #112 > + > + b efi_handle_runtime_exception > +SYM_FUNC_END(__efi_rt_asm_recover) > diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c > index e1be6c429810..7c1e62a20e1e 100644 > --- a/arch/arm64/kernel/efi.c > +++ b/arch/arm64/kernel/efi.c > @@ -9,6 +9,7 @@ > > #include <linux/efi.h> > #include <linux/init.h> > +#include <linux/percpu.h> > > #include <asm/efi.h> > > @@ -128,3 +129,28 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) > pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); > return s; > } > + > +asmlinkage DEFINE_PER_CPU(u64, __efi_rt_asm_recover_sp); > + > +asmlinkage efi_status_t __efi_rt_asm_recover(void); > + > +asmlinkage efi_status_t efi_handle_runtime_exception(const char *f) > +{ > + pr_err(FW_BUG "Fault occurred in EFI runtime service %s()\n", f); > + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); > + return EFI_ABORTED; > +} > + > +bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) > +{ > + /* Check whether the exception occurred while running the firmware */ > + if (current_work() != &efi_rts_work.work || regs->pc >= TASK_SIZE_64) > + return false; > + > + pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg); > + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); > + dump_stack(); > + > + regs->pc = (u64)__efi_rt_asm_recover; > + return true; > +} > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c > index 5b391490e045..3e9cf9826417 100644 > --- a/arch/arm64/mm/fault.c > +++ b/arch/arm64/mm/fault.c > @@ -30,6 +30,7 @@ > #include <asm/bug.h> > #include <asm/cmpxchg.h> > #include <asm/cpufeature.h> > +#include <asm/efi.h> > #include <asm/exception.h> > #include <asm/daifflags.h> > #include <asm/debug-monitors.h> > @@ -391,6 +392,9 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, > msg = "paging request"; > } > > + if (efi_runtime_fixup_exception(regs, msg)) > + return; > + > die_kernel_fault(msg, addr, esr, regs); > } > > -- > 2.35.1 >
On Wed, Nov 02, 2022 at 10:08:28AM +0100, Ard Biesheuvel wrote: > On Fri, 28 Oct 2022 at 17:01, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > Unlike x86, which has machinery to deal with page faults that occur > > during the execution of EFI runtime services, arm64 has nothing like > > that, and a synchronous exception raised by firmware code brings down > > the whole system. > > > > With more EFI based systems appearing that were not built to run Linux > > (such as the Windows-on-ARM laptops based on Qualcomm SOCs), as well as > > the introduction of PRM (platform specific firmware routines that are > > callable just like EFI runtime services), we are more likely to run into > > issues of this sort, and it is much more likely that we can identify and > > work around such issues if they don't bring down the system entirely. > > > > Since we already use a EFI runtime services call wrapper in assembler, > > we can quite easily add some code that captures the execution state at > > the point where the call is made, allowing us to revert to this state > > and proceed execution if the call triggered a synchronous exception. > > > > Given that the kernel and the firmware don't share any data structures > > that could end up in an indeterminate state, we can happily continue > > running, as long as we mark the EFI runtime services as unavailable from > > that point on. > > > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > > Does anyone mind if I take this via the EFI tree for v6.1? No, feel free to take it. Acked-by: Catalin Marinas <catalin.marinas@arm.com>
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 439e2bc5d5d8..d6cf535d8352 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,8 +14,16 @@ #ifdef CONFIG_EFI extern void efi_init(void); + +bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg); #else #define efi_init() + +static inline +bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) +{ + return false; +} #endif int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 75691a2641c1..67babd5f04c2 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -6,7 +6,7 @@ #include <linux/linkage.h> SYM_FUNC_START(__efi_rt_asm_wrapper) - stp x29, x30, [sp, #-32]! + stp x29, x30, [sp, #-112]! mov x29, sp /* @@ -16,6 +16,20 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) */ stp x1, x18, [sp, #16] + /* + * Preserve all callee saved registers and record the stack pointer + * value in a per-CPU variable so we can recover from synchronous + * exceptions occurring while running the firmware routines. + */ + stp x19, x20, [sp, #32] + stp x21, x22, [sp, #48] + stp x23, x24, [sp, #64] + stp x25, x26, [sp, #80] + stp x27, x28, [sp, #96] + + adr_this_cpu x8, __efi_rt_asm_recover_sp, x9 + str x29, [x8] + /* * We are lucky enough that no EFI runtime services take more than * 5 arguments, so all are passed in registers rather than via the @@ -31,7 +45,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) ldp x1, x2, [sp, #16] cmp x2, x18 - ldp x29, x30, [sp], #32 + ldp x29, x30, [sp], #112 b.ne 0f ret 0: @@ -45,3 +59,18 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) mov x18, x2 b efi_handle_corrupted_x18 // tail call SYM_FUNC_END(__efi_rt_asm_wrapper) + +SYM_FUNC_START(__efi_rt_asm_recover) + ldr_this_cpu x8, __efi_rt_asm_recover_sp, x9 + mov sp, x8 + + ldp x0, x18, [sp, #16] + ldp x19, x20, [sp, #32] + ldp x21, x22, [sp, #48] + ldp x23, x24, [sp, #64] + ldp x25, x26, [sp, #80] + ldp x27, x28, [sp, #96] + ldp x29, x30, [sp], #112 + + b efi_handle_runtime_exception +SYM_FUNC_END(__efi_rt_asm_recover) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index e1be6c429810..7c1e62a20e1e 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -9,6 +9,7 @@ #include <linux/efi.h> #include <linux/init.h> +#include <linux/percpu.h> #include <asm/efi.h> @@ -128,3 +129,28 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); return s; } + +asmlinkage DEFINE_PER_CPU(u64, __efi_rt_asm_recover_sp); + +asmlinkage efi_status_t __efi_rt_asm_recover(void); + +asmlinkage efi_status_t efi_handle_runtime_exception(const char *f) +{ + pr_err(FW_BUG "Fault occurred in EFI runtime service %s()\n", f); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return EFI_ABORTED; +} + +bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) +{ + /* Check whether the exception occurred while running the firmware */ + if (current_work() != &efi_rts_work.work || regs->pc >= TASK_SIZE_64) + return false; + + pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + dump_stack(); + + regs->pc = (u64)__efi_rt_asm_recover; + return true; +} diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 5b391490e045..3e9cf9826417 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -30,6 +30,7 @@ #include <asm/bug.h> #include <asm/cmpxchg.h> #include <asm/cpufeature.h> +#include <asm/efi.h> #include <asm/exception.h> #include <asm/daifflags.h> #include <asm/debug-monitors.h> @@ -391,6 +392,9 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, msg = "paging request"; } + if (efi_runtime_fixup_exception(regs, msg)) + return; + die_kernel_fault(msg, addr, esr, regs); }
Unlike x86, which has machinery to deal with page faults that occur during the execution of EFI runtime services, arm64 has nothing like that, and a synchronous exception raised by firmware code brings down the whole system. With more EFI based systems appearing that were not built to run Linux (such as the Windows-on-ARM laptops based on Qualcomm SOCs), as well as the introduction of PRM (platform specific firmware routines that are callable just like EFI runtime services), we are more likely to run into issues of this sort, and it is much more likely that we can identify and work around such issues if they don't bring down the system entirely. Since we already use a EFI runtime services call wrapper in assembler, we can quite easily add some code that captures the execution state at the point where the call is made, allowing us to revert to this state and proceed execution if the call triggered a synchronous exception. Given that the kernel and the firmware don't share any data structures that could end up in an indeterminate state, we can happily continue running, as long as we mark the EFI runtime services as unavailable from that point on. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> --- arch/arm64/include/asm/efi.h | 8 ++++++++ arch/arm64/kernel/efi-rt-wrapper.S | 33 ++++++++++++++++++++++++++++-- arch/arm64/kernel/efi.c | 26 +++++++++++++++++++++++ arch/arm64/mm/fault.c | 4 ++++ 4 files changed, 69 insertions(+), 2 deletions(-)