Message ID | 20180522174254.27551-11-julien.grall@arm.com |
---|---|
State | Superseded |
Headers | show |
Series | xen/arm: SSBD (aka Spectre-v4) mitigation (XSA-263) | expand |
On Tue, 22 May 2018, Julien Grall wrote: > The function ARM_SMCCC_ARCH_WORKAROUND_2 will be called by the guest for > enabling/disabling the ssbd mitigation. So we want the handling to > be as fast as possible. > > The new sequence will forward guest's ARCH_WORKAROUND_2 call to EL3 and > also track the state of the workaround per-vCPU. > > Note that since we need to execute branches, this always executes after > the spectre-v2 mitigation. > > This code is based on KVM counterpart "arm64: KVM: Handle guest's > ARCH_WORKAROUND_2 requests" written by Marc Zyngier. > > This is part of XSA-263. > > Signed-off-by: Julien Grall <julien.grall@arm.com> I think the patch works as intended. > --- > xen/arch/arm/arm64/asm-offsets.c | 2 ++ > xen/arch/arm/arm64/entry.S | 43 +++++++++++++++++++++++++++++++++++++++- > xen/arch/arm/cpuerrata.c | 18 +++++++++++++++++ > 3 files changed, 62 insertions(+), 1 deletion(-) > > diff --git a/xen/arch/arm/arm64/asm-offsets.c b/xen/arch/arm/arm64/asm-offsets.c > index ce24e44473..f5c696d092 100644 > --- a/xen/arch/arm/arm64/asm-offsets.c > +++ b/xen/arch/arm/arm64/asm-offsets.c > @@ -22,6 +22,7 @@ > void __dummy__(void) > { > OFFSET(UREGS_X0, struct cpu_user_regs, x0); > + OFFSET(UREGS_X1, struct cpu_user_regs, x1); > OFFSET(UREGS_LR, struct cpu_user_regs, lr); > > OFFSET(UREGS_SP, struct cpu_user_regs, sp); > @@ -45,6 +46,7 @@ void __dummy__(void) > BLANK(); > > DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); > + OFFSET(CPUINFO_flags, struct cpu_info, flags); > > OFFSET(VCPU_arch_saved_context, struct vcpu, arch.saved_context); > > diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S > index e2344e565f..8e25ff3997 100644 > --- a/xen/arch/arm/arm64/entry.S > +++ b/xen/arch/arm/arm64/entry.S > @@ -1,4 +1,6 @@ > #include <asm/asm_defns.h> > +#include <asm/current.h> > +#include <asm/macros.h> > #include <asm/regs.h> > #include <asm/alternative.h> > #include <asm/smccc.h> > @@ -241,7 +243,7 @@ guest_sync: > * be encoded as an immediate for cmp. > */ > eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID > - cbnz w0, guest_sync_slowpath > + cbnz w0, check_wa2 > > /* > * Clobber both x0 and x1 to prevent leakage. Note that thanks > @@ -250,6 +252,45 @@ guest_sync: > mov x1, xzr > eret > > +check_wa2: > + /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ > + eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID We come to check_wa2 after checking on #ARM_SMCCC_ARCH_WORKAROUND_1_FID, so maybe we can skip this? > + eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID > + cbnz w0, guest_sync_slowpath > +#ifdef CONFIG_ARM_SSBD > +alternative_cb arm_enable_wa2_handling > + b wa2_end > +alternative_cb_end > + /* Sanitize the argument */ > + mov x0, #-(UREGS_kernel_sizeof - UREGS_X1) /* x0 := offset of guest's x1 on the stack */ > + ldr x1, [sp, x0] /* Load guest's x1 */ > + cmp w1, wzr > + cset x1, ne > + > + /* > + * Update the guest flag. At this stage sp point after the field > + * guest_cpu_user_regs in cpu_info. > + */ > + adr_cpu_info x2 > + ldr x0, [x2, #CPUINFO_flags] > + bfi x0, x1, #CPUINFO_WORKAROUND_2_FLAG_SHIFT, #1 > + str x0, [x2, #CPUINFO_flags] > + > + /* Check that we actually need to perform the call */ > + ldr_this_cpu x0, ssbd_callback_required, x2 > + cbz x0, wa2_end > + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID > + smc #0 Shouldn't we make the call only if get_cpu_info()->flags changed? > +wa2_end: > + /* Don't leak data from the SMC call */ > + mov x1, xzr > + mov x2, xzr > + mov x3, xzr > +#endif /* !CONFIG_ARM_SSBD */ > + mov x0, xzr > + eret > guest_sync_slowpath: > /* > * x0/x1 may have been scratch by the fast path above, so avoid > diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c > index f921721a66..54df4ff445 100644 > --- a/xen/arch/arm/cpuerrata.c > +++ b/xen/arch/arm/cpuerrata.c > @@ -7,6 +7,7 @@ > #include <xen/warning.h> > #include <asm/cpufeature.h> > #include <asm/cpuerrata.h> > +#include <asm/insn.h> > #include <asm/psci.h> > > /* Override macros from asm/page.h to make them work with mfn_t */ > @@ -272,6 +273,23 @@ static int __init parse_spec_ctrl(const char *s) > } > custom_param("spec-ctrl", parse_spec_ctrl); > > +/* Arm64 only for now as for Arm32 the workaround is currently handled in C. */ > +#ifdef CONFIG_ARM_64 > +void __init arm_enable_wa2_handling(const struct alt_instr *alt, > + const uint32_t *origptr, > + uint32_t *updptr, int nr_inst) > +{ > + BUG_ON(nr_inst != 1); > + > + /* > + * Only allow mitigation on guest ARCH_WORKAROUND_2 if the SSBD > + * state allow it to be flipped. > + */ > + if ( get_ssbd_state() == ARM_SSBD_RUNTIME ) > + *updptr = aarch64_insn_gen_nop(); > +} > +#endif > + > /* > * Assembly code may use the variable directly, so we need to make sure > * it fits in a register. > -- > 2.11.0 >
Hi Stefano, On 25/05/18 20:18, Stefano Stabellini wrote: > On Tue, 22 May 2018, Julien Grall wrote: >> The function ARM_SMCCC_ARCH_WORKAROUND_2 will be called by the guest for >> enabling/disabling the ssbd mitigation. So we want the handling to >> be as fast as possible. >> >> The new sequence will forward guest's ARCH_WORKAROUND_2 call to EL3 and >> also track the state of the workaround per-vCPU. >> >> Note that since we need to execute branches, this always executes after >> the spectre-v2 mitigation. >> >> This code is based on KVM counterpart "arm64: KVM: Handle guest's >> ARCH_WORKAROUND_2 requests" written by Marc Zyngier. >> >> This is part of XSA-263. >> >> Signed-off-by: Julien Grall <julien.grall@arm.com> > > I think the patch works as intended. > > >> --- >> xen/arch/arm/arm64/asm-offsets.c | 2 ++ >> xen/arch/arm/arm64/entry.S | 43 +++++++++++++++++++++++++++++++++++++++- >> xen/arch/arm/cpuerrata.c | 18 +++++++++++++++++ >> 3 files changed, 62 insertions(+), 1 deletion(-) >> >> diff --git a/xen/arch/arm/arm64/asm-offsets.c b/xen/arch/arm/arm64/asm-offsets.c >> index ce24e44473..f5c696d092 100644 >> --- a/xen/arch/arm/arm64/asm-offsets.c >> +++ b/xen/arch/arm/arm64/asm-offsets.c >> @@ -22,6 +22,7 @@ >> void __dummy__(void) >> { >> OFFSET(UREGS_X0, struct cpu_user_regs, x0); >> + OFFSET(UREGS_X1, struct cpu_user_regs, x1); >> OFFSET(UREGS_LR, struct cpu_user_regs, lr); >> >> OFFSET(UREGS_SP, struct cpu_user_regs, sp); >> @@ -45,6 +46,7 @@ void __dummy__(void) >> BLANK(); >> >> DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); >> + OFFSET(CPUINFO_flags, struct cpu_info, flags); >> >> OFFSET(VCPU_arch_saved_context, struct vcpu, arch.saved_context); >> >> diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S >> index e2344e565f..8e25ff3997 100644 >> --- a/xen/arch/arm/arm64/entry.S >> +++ b/xen/arch/arm/arm64/entry.S >> @@ -1,4 +1,6 @@ >> #include <asm/asm_defns.h> >> +#include <asm/current.h> >> +#include <asm/macros.h> >> #include <asm/regs.h> >> #include <asm/alternative.h> >> #include <asm/smccc.h> >> @@ -241,7 +243,7 @@ guest_sync: >> * be encoded as an immediate for cmp. >> */ >> eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID >> - cbnz w0, guest_sync_slowpath >> + cbnz w0, check_wa2 >> >> /* >> * Clobber both x0 and x1 to prevent leakage. Note that thanks >> @@ -250,6 +252,45 @@ guest_sync: >> mov x1, xzr >> eret >> >> +check_wa2: >> + /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ >> + eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID > > We come to check_wa2 after checking on #ARM_SMCCC_ARCH_WORKAROUND_1_FID, > so maybe we can skip this? This is necessary. w0 contains "guest x0" xor "ARM_SMCCC_ARCH_WORKAROUND_1_FID". So we first need to revert back the xor to get "guest x0". Note, it would be possible to combine the 2 xor. Something like: eor w0, w0, #(ARM_SMCCC_ARCH_WORKAROUND_1_FID ^ ARM_SMCCC_ARCH_WORKAROUND_2_FID). Which version do you prefer? > > >> + eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID >> + cbnz w0, guest_sync_slowpath >> +#ifdef CONFIG_ARM_SSBD >> +alternative_cb arm_enable_wa2_handling >> + b wa2_end >> +alternative_cb_end >> + /* Sanitize the argument */ >> + mov x0, #-(UREGS_kernel_sizeof - UREGS_X1) /* x0 := offset of guest's x1 on the stack */ >> + ldr x1, [sp, x0] /* Load guest's x1 */ >> + cmp w1, wzr >> + cset x1, ne >> + >> + /* >> + * Update the guest flag. At this stage sp point after the field >> + * guest_cpu_user_regs in cpu_info. >> + */ >> + adr_cpu_info x2 >> + ldr x0, [x2, #CPUINFO_flags] >> + bfi x0, x1, #CPUINFO_WORKAROUND_2_FLAG_SHIFT, #1 >> + str x0, [x2, #CPUINFO_flags] >> + >> + /* Check that we actually need to perform the call */ >> + ldr_this_cpu x0, ssbd_callback_required, x2 >> + cbz x0, wa2_end >> + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID >> + smc #0 > > Shouldn't we make the call only if get_cpu_info()->flags changed? There are no harm to call ARCH_WORKAROUND_2 if the flag didn't changed. However the guest should already avoid to do the call when it is not necessary. So that's not a common case that we should care. Cheers,
On Tue, 29 May 2018, Julien Grall wrote: > Hi Stefano, > > On 25/05/18 20:18, Stefano Stabellini wrote: > > On Tue, 22 May 2018, Julien Grall wrote: > > > The function ARM_SMCCC_ARCH_WORKAROUND_2 will be called by the guest for > > > enabling/disabling the ssbd mitigation. So we want the handling to > > > be as fast as possible. > > > > > > The new sequence will forward guest's ARCH_WORKAROUND_2 call to EL3 and > > > also track the state of the workaround per-vCPU. > > > > > > Note that since we need to execute branches, this always executes after > > > the spectre-v2 mitigation. > > > > > > This code is based on KVM counterpart "arm64: KVM: Handle guest's > > > ARCH_WORKAROUND_2 requests" written by Marc Zyngier. > > > > > > This is part of XSA-263. > > > > > > Signed-off-by: Julien Grall <julien.grall@arm.com> > > > > I think the patch works as intended. > > > > > > > --- > > > xen/arch/arm/arm64/asm-offsets.c | 2 ++ > > > xen/arch/arm/arm64/entry.S | 43 > > > +++++++++++++++++++++++++++++++++++++++- > > > xen/arch/arm/cpuerrata.c | 18 +++++++++++++++++ > > > 3 files changed, 62 insertions(+), 1 deletion(-) > > > > > > diff --git a/xen/arch/arm/arm64/asm-offsets.c > > > b/xen/arch/arm/arm64/asm-offsets.c > > > index ce24e44473..f5c696d092 100644 > > > --- a/xen/arch/arm/arm64/asm-offsets.c > > > +++ b/xen/arch/arm/arm64/asm-offsets.c > > > @@ -22,6 +22,7 @@ > > > void __dummy__(void) > > > { > > > OFFSET(UREGS_X0, struct cpu_user_regs, x0); > > > + OFFSET(UREGS_X1, struct cpu_user_regs, x1); > > > OFFSET(UREGS_LR, struct cpu_user_regs, lr); > > > OFFSET(UREGS_SP, struct cpu_user_regs, sp); > > > @@ -45,6 +46,7 @@ void __dummy__(void) > > > BLANK(); > > > DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); > > > + OFFSET(CPUINFO_flags, struct cpu_info, flags); > > > OFFSET(VCPU_arch_saved_context, struct vcpu, arch.saved_context); > > > diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S > > > index e2344e565f..8e25ff3997 100644 > > > --- a/xen/arch/arm/arm64/entry.S > > > +++ b/xen/arch/arm/arm64/entry.S > > > @@ -1,4 +1,6 @@ > > > #include <asm/asm_defns.h> > > > +#include <asm/current.h> > > > +#include <asm/macros.h> > > > #include <asm/regs.h> > > > #include <asm/alternative.h> > > > #include <asm/smccc.h> > > > @@ -241,7 +243,7 @@ guest_sync: > > > * be encoded as an immediate for cmp. > > > */ > > > eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID > > > - cbnz w0, guest_sync_slowpath > > > + cbnz w0, check_wa2 > > > /* > > > * Clobber both x0 and x1 to prevent leakage. Note that thanks > > > @@ -250,6 +252,45 @@ guest_sync: > > > mov x1, xzr > > > eret > > > +check_wa2: > > > + /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ > > > + eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID > > > > We come to check_wa2 after checking on #ARM_SMCCC_ARCH_WORKAROUND_1_FID, > > so maybe we can skip this? > > This is necessary. w0 contains "guest x0" xor > "ARM_SMCCC_ARCH_WORKAROUND_1_FID". So we first need to revert back the xor to > get "guest x0". > > Note, it would be possible to combine the 2 xor. Something like: > > eor w0, w0, #(ARM_SMCCC_ARCH_WORKAROUND_1_FID ^ > ARM_SMCCC_ARCH_WORKAROUND_2_FID). > > Which version do you prefer? I understand now. Let's combine the two xor. > > > + eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID > > > + cbnz w0, guest_sync_slowpath > > > +#ifdef CONFIG_ARM_SSBD > > > +alternative_cb arm_enable_wa2_handling > > > + b wa2_end > > > +alternative_cb_end > > > + /* Sanitize the argument */ > > > + mov x0, #-(UREGS_kernel_sizeof - UREGS_X1) /* x0 := offset > > > of guest's x1 on the stack */ > > > + ldr x1, [sp, x0] /* Load guest's > > > x1 */ > > > + cmp w1, wzr > > > + cset x1, ne > > > + > > > + /* > > > + * Update the guest flag. At this stage sp point after the field > > > + * guest_cpu_user_regs in cpu_info. > > > + */ > > > + adr_cpu_info x2 > > > + ldr x0, [x2, #CPUINFO_flags] > > > + bfi x0, x1, #CPUINFO_WORKAROUND_2_FLAG_SHIFT, #1 > > > + str x0, [x2, #CPUINFO_flags] > > > + > > > + /* Check that we actually need to perform the call */ > > > + ldr_this_cpu x0, ssbd_callback_required, x2 > > > + cbz x0, wa2_end > > > + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID > > > + smc #0 > > > > Shouldn't we make the call only if get_cpu_info()->flags changed? > > There are no harm to call ARCH_WORKAROUND_2 if the flag didn't changed. > However the guest should already avoid to do the call when it is not > necessary. So that's not a common case that we should care. All right
diff --git a/xen/arch/arm/arm64/asm-offsets.c b/xen/arch/arm/arm64/asm-offsets.c index ce24e44473..f5c696d092 100644 --- a/xen/arch/arm/arm64/asm-offsets.c +++ b/xen/arch/arm/arm64/asm-offsets.c @@ -22,6 +22,7 @@ void __dummy__(void) { OFFSET(UREGS_X0, struct cpu_user_regs, x0); + OFFSET(UREGS_X1, struct cpu_user_regs, x1); OFFSET(UREGS_LR, struct cpu_user_regs, lr); OFFSET(UREGS_SP, struct cpu_user_regs, sp); @@ -45,6 +46,7 @@ void __dummy__(void) BLANK(); DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); + OFFSET(CPUINFO_flags, struct cpu_info, flags); OFFSET(VCPU_arch_saved_context, struct vcpu, arch.saved_context); diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S index e2344e565f..8e25ff3997 100644 --- a/xen/arch/arm/arm64/entry.S +++ b/xen/arch/arm/arm64/entry.S @@ -1,4 +1,6 @@ #include <asm/asm_defns.h> +#include <asm/current.h> +#include <asm/macros.h> #include <asm/regs.h> #include <asm/alternative.h> #include <asm/smccc.h> @@ -241,7 +243,7 @@ guest_sync: * be encoded as an immediate for cmp. */ eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID - cbnz w0, guest_sync_slowpath + cbnz w0, check_wa2 /* * Clobber both x0 and x1 to prevent leakage. Note that thanks @@ -250,6 +252,45 @@ guest_sync: mov x1, xzr eret +check_wa2: + /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ + eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID + eor w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID + cbnz w0, guest_sync_slowpath +#ifdef CONFIG_ARM_SSBD +alternative_cb arm_enable_wa2_handling + b wa2_end +alternative_cb_end + /* Sanitize the argument */ + mov x0, #-(UREGS_kernel_sizeof - UREGS_X1) /* x0 := offset of guest's x1 on the stack */ + ldr x1, [sp, x0] /* Load guest's x1 */ + cmp w1, wzr + cset x1, ne + + /* + * Update the guest flag. At this stage sp point after the field + * guest_cpu_user_regs in cpu_info. + */ + adr_cpu_info x2 + ldr x0, [x2, #CPUINFO_flags] + bfi x0, x1, #CPUINFO_WORKAROUND_2_FLAG_SHIFT, #1 + str x0, [x2, #CPUINFO_flags] + + /* Check that we actually need to perform the call */ + ldr_this_cpu x0, ssbd_callback_required, x2 + cbz x0, wa2_end + + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID + smc #0 + +wa2_end: + /* Don't leak data from the SMC call */ + mov x1, xzr + mov x2, xzr + mov x3, xzr +#endif /* !CONFIG_ARM_SSBD */ + mov x0, xzr + eret guest_sync_slowpath: /* * x0/x1 may have been scratch by the fast path above, so avoid diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c index f921721a66..54df4ff445 100644 --- a/xen/arch/arm/cpuerrata.c +++ b/xen/arch/arm/cpuerrata.c @@ -7,6 +7,7 @@ #include <xen/warning.h> #include <asm/cpufeature.h> #include <asm/cpuerrata.h> +#include <asm/insn.h> #include <asm/psci.h> /* Override macros from asm/page.h to make them work with mfn_t */ @@ -272,6 +273,23 @@ static int __init parse_spec_ctrl(const char *s) } custom_param("spec-ctrl", parse_spec_ctrl); +/* Arm64 only for now as for Arm32 the workaround is currently handled in C. */ +#ifdef CONFIG_ARM_64 +void __init arm_enable_wa2_handling(const struct alt_instr *alt, + const uint32_t *origptr, + uint32_t *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); + + /* + * Only allow mitigation on guest ARCH_WORKAROUND_2 if the SSBD + * state allow it to be flipped. + */ + if ( get_ssbd_state() == ARM_SSBD_RUNTIME ) + *updptr = aarch64_insn_gen_nop(); +} +#endif + /* * Assembly code may use the variable directly, so we need to make sure * it fits in a register.
The function ARM_SMCCC_ARCH_WORKAROUND_2 will be called by the guest for enabling/disabling the ssbd mitigation. So we want the handling to be as fast as possible. The new sequence will forward guest's ARCH_WORKAROUND_2 call to EL3 and also track the state of the workaround per-vCPU. Note that since we need to execute branches, this always executes after the spectre-v2 mitigation. This code is based on KVM counterpart "arm64: KVM: Handle guest's ARCH_WORKAROUND_2 requests" written by Marc Zyngier. This is part of XSA-263. Signed-off-by: Julien Grall <julien.grall@arm.com> --- xen/arch/arm/arm64/asm-offsets.c | 2 ++ xen/arch/arm/arm64/entry.S | 43 +++++++++++++++++++++++++++++++++++++++- xen/arch/arm/cpuerrata.c | 18 +++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-)