Message ID | 20180223185729.8780-7-julien.grall@arm.com |
---|---|
State | Accepted |
Commit | ce73d72e976cc54f01c85e5c04edd5b9ce7b0906 |
Headers | show |
Series | xen/arm: PSCI 1.1 and SMCCC-1.1 support and XSA-254 variant 2 update | expand |
Hi, On 23/02/18 18:57, Julien Grall wrote: > The function SMCCC_ARCH_WORKAROUND_1 will be called by the guest for > hardening the branch predictor. So we want the handling to be as fast as > possible. > > As the mitigation is applied on every guest exit, we can check for the > call before saving all the context and return very early. > > For now, only provide a fast path for HVC64 call. Because the code rely > on 2 registers, x0 and x1 are saved in advance. > > Signed-off-by: Julien Grall <julien.grall@arm.com> > Reviewed-by: Volodymyr Babchuk <volodymyr.babchuk@epam.com> > Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> Thanks, that looks good now. Reviewed-by: Andre Przywara <andre.przywara@arm.com> Cheers, Andre. > --- > guest_sync only handle 64-bit guest, so I have only implemented the > 64-bit side for now. We can discuss whether it is useful to > implement it for 32-bit guests. > > We could also consider to implement the fast path for SMC64, > althought a guest should always use HVC. > > I decided to keep the reviewed-by as mostly the documentation was > updated to make it clearer. > > Changes in v4: > - Add Stefano's reviewed-by > - Use xzr to clobber x1 instead of x0 > - Update comments in the code > > Changes in v2: > - Add Volodymyr's reviewed-by > --- > xen/arch/arm/arm64/entry.S | 59 +++++++++++++++++++++++++++++++++++++++-- > xen/include/asm-arm/processor.h | 2 ++ > 2 files changed, 59 insertions(+), 2 deletions(-) > > diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S > index 6d99e46f0f..ffa9a1c492 100644 > --- a/xen/arch/arm/arm64/entry.S > +++ b/xen/arch/arm/arm64/entry.S > @@ -1,6 +1,7 @@ > #include <asm/asm_defns.h> > #include <asm/regs.h> > #include <asm/alternative.h> > +#include <asm/smccc.h> > #include <public/xen.h> > > /* > @@ -90,8 +91,12 @@ lr .req x30 /* link register */ > .endm > /* > * Save state on entry to hypervisor, restore on exit > + * > + * save_x0_x1: Does the macro needs to save x0/x1? Defaults to 1 > + * If 0, we rely on the on x0/x1 to have been saved at the correct > + * position on the stack before. > */ > - .macro entry, hyp, compat > + .macro entry, hyp, compat, save_x0_x1=1 > sub sp, sp, #(UREGS_SPSR_el1 - UREGS_LR) /* CPSR, PC, SP, LR */ > push x28, x29 > push x26, x27 > @@ -107,7 +112,16 @@ lr .req x30 /* link register */ > push x6, x7 > push x4, x5 > push x2, x3 > + /* > + * The caller may already have saved x0/x1 on the stack at the > + * correct address and corrupt them with another value. Only > + * save them if save_x0_x1 == 1. > + */ > + .if \save_x0_x1 == 1 > push x0, x1 > + .else > + sub sp, sp, #16 > + .endif > > .if \hyp == 1 /* Hypervisor mode */ > > @@ -200,7 +214,48 @@ hyp_irq: > exit hyp=1 > > guest_sync: > - entry hyp=0, compat=0 > + /* > + * Save x0, x1 in advance > + */ > + stp x0, x1, [sp, #-(UREGS_kernel_sizeof - UREGS_X0)] > + > + /* > + * x1 is used because x0 may contain the function identifier. > + * This avoids to restore x0 from the stack. > + */ > + mrs x1, esr_el2 > + lsr x1, x1, #HSR_EC_SHIFT /* x1 = ESR_EL2.EC */ > + cmp x1, #HSR_EC_HVC64 > + b.ne 1f /* Not a HVC skip fastpath. */ > + > + mrs x1, esr_el2 > + and x1, x1, #0xffff /* Check the immediate [0:16] */ > + cbnz x1, 1f /* should be 0 for HVC #0 */ > + > + /* > + * Fastest path possible for ARM_SMCCC_ARCH_WORKAROUND_1. > + * The workaround has already been applied on the exception > + * entry from the guest, so let's quickly get back to the guest. > + * > + * Note that eor is used because the function identifier cannot > + * be encoded as an immediate for cmp. > + */ > + eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID > + cbnz w0, 1f > + > + /* > + * Clobber both x0 and x1 to prevent leakage. Note that thanks > + * the eor, x0 = 0. > + */ > + mov x1, xzr > + eret > + > +1: > + /* > + * x0/x1 may have been scratch by the fast path above, so avoid > + * to save them. > + */ > + entry hyp=0, compat=0, save_x0_x1=0 > /* > * The vSError will be checked while SKIP_SYNCHRONIZE_SERROR_ENTRY_EXIT > * is not set. If a vSError took place, the initial exception will be > diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h > index c0f79d0093..222a02dd99 100644 > --- a/xen/include/asm-arm/processor.h > +++ b/xen/include/asm-arm/processor.h > @@ -306,6 +306,8 @@ > #define HDCR_TPM (_AC(1,U)<<6) /* Trap Performance Monitors accesses */ > #define HDCR_TPMCR (_AC(1,U)<<5) /* Trap PMCR accesses */ > > +#define HSR_EC_SHIFT 26 > + > #define HSR_EC_UNKNOWN 0x00 > #define HSR_EC_WFI_WFE 0x01 > #define HSR_EC_CP15_32 0x03 >
diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S index 6d99e46f0f..ffa9a1c492 100644 --- a/xen/arch/arm/arm64/entry.S +++ b/xen/arch/arm/arm64/entry.S @@ -1,6 +1,7 @@ #include <asm/asm_defns.h> #include <asm/regs.h> #include <asm/alternative.h> +#include <asm/smccc.h> #include <public/xen.h> /* @@ -90,8 +91,12 @@ lr .req x30 /* link register */ .endm /* * Save state on entry to hypervisor, restore on exit + * + * save_x0_x1: Does the macro needs to save x0/x1? Defaults to 1 + * If 0, we rely on the on x0/x1 to have been saved at the correct + * position on the stack before. */ - .macro entry, hyp, compat + .macro entry, hyp, compat, save_x0_x1=1 sub sp, sp, #(UREGS_SPSR_el1 - UREGS_LR) /* CPSR, PC, SP, LR */ push x28, x29 push x26, x27 @@ -107,7 +112,16 @@ lr .req x30 /* link register */ push x6, x7 push x4, x5 push x2, x3 + /* + * The caller may already have saved x0/x1 on the stack at the + * correct address and corrupt them with another value. Only + * save them if save_x0_x1 == 1. + */ + .if \save_x0_x1 == 1 push x0, x1 + .else + sub sp, sp, #16 + .endif .if \hyp == 1 /* Hypervisor mode */ @@ -200,7 +214,48 @@ hyp_irq: exit hyp=1 guest_sync: - entry hyp=0, compat=0 + /* + * Save x0, x1 in advance + */ + stp x0, x1, [sp, #-(UREGS_kernel_sizeof - UREGS_X0)] + + /* + * x1 is used because x0 may contain the function identifier. + * This avoids to restore x0 from the stack. + */ + mrs x1, esr_el2 + lsr x1, x1, #HSR_EC_SHIFT /* x1 = ESR_EL2.EC */ + cmp x1, #HSR_EC_HVC64 + b.ne 1f /* Not a HVC skip fastpath. */ + + mrs x1, esr_el2 + and x1, x1, #0xffff /* Check the immediate [0:16] */ + cbnz x1, 1f /* should be 0 for HVC #0 */ + + /* + * Fastest path possible for ARM_SMCCC_ARCH_WORKAROUND_1. + * The workaround has already been applied on the exception + * entry from the guest, so let's quickly get back to the guest. + * + * Note that eor is used because the function identifier cannot + * be encoded as an immediate for cmp. + */ + eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID + cbnz w0, 1f + + /* + * Clobber both x0 and x1 to prevent leakage. Note that thanks + * the eor, x0 = 0. + */ + mov x1, xzr + eret + +1: + /* + * x0/x1 may have been scratch by the fast path above, so avoid + * to save them. + */ + entry hyp=0, compat=0, save_x0_x1=0 /* * The vSError will be checked while SKIP_SYNCHRONIZE_SERROR_ENTRY_EXIT * is not set. If a vSError took place, the initial exception will be diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h index c0f79d0093..222a02dd99 100644 --- a/xen/include/asm-arm/processor.h +++ b/xen/include/asm-arm/processor.h @@ -306,6 +306,8 @@ #define HDCR_TPM (_AC(1,U)<<6) /* Trap Performance Monitors accesses */ #define HDCR_TPMCR (_AC(1,U)<<5) /* Trap PMCR accesses */ +#define HSR_EC_SHIFT 26 + #define HSR_EC_UNKNOWN 0x00 #define HSR_EC_WFI_WFE 0x01 #define HSR_EC_CP15_32 0x03