diff mbox series

[RFT,v3,01/27] arm64: Cope with CPUs stuck in VHE mode

Message ID 20210304213902.83903-2-marcan@marcan.st
State New
Headers show
Series Apple M1 SoC platform bring-up | expand

Commit Message

Hector Martin March 4, 2021, 9:38 p.m. UTC
From: Marc Zyngier <maz@kernel.org>

It seems that the CPU known as Apple M1 has the terrible habit
of being stuck with HCR_EL2.E2H==1, in violation of the architecture.

Try and work around this deplorable state of affairs by detecting
the stuck bit early and short-circuit the nVHE dance. It is still
unknown whether there are many more such nuggets to be found...

Reported-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kernel/head.S     | 33 ++++++++++++++++++++++++++++++---
 arch/arm64/kernel/hyp-stub.S | 28 ++++++++++++++++++++++++----
 2 files changed, 54 insertions(+), 7 deletions(-)

Comments

Marc Zyngier March 24, 2021, 8 p.m. UTC | #1
On Wed, 24 Mar 2021 18:05:46 +0000,
Will Deacon <will@kernel.org> wrote:
> 

> On Fri, Mar 05, 2021 at 06:38:36AM +0900, Hector Martin wrote:

> > From: Marc Zyngier <maz@kernel.org>

> > 

> > It seems that the CPU known as Apple M1 has the terrible habit

> > of being stuck with HCR_EL2.E2H==1, in violation of the architecture.

> > 

> > Try and work around this deplorable state of affairs by detecting

> > the stuck bit early and short-circuit the nVHE dance. It is still

> > unknown whether there are many more such nuggets to be found...

> > 

> > Reported-by: Hector Martin <marcan@marcan.st>

> > Signed-off-by: Marc Zyngier <maz@kernel.org>

> > ---

> >  arch/arm64/kernel/head.S     | 33 ++++++++++++++++++++++++++++++---

> >  arch/arm64/kernel/hyp-stub.S | 28 ++++++++++++++++++++++++----

> >  2 files changed, 54 insertions(+), 7 deletions(-)

> > 

> > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S

> > index 66b0e0b66e31..673002b11865 100644

> > --- a/arch/arm64/kernel/head.S

> > +++ b/arch/arm64/kernel/head.S

> > @@ -477,14 +477,13 @@ EXPORT_SYMBOL(kimage_vaddr)

> >   * booted in EL1 or EL2 respectively.

> >   */

> >  SYM_FUNC_START(init_kernel_el)

> > -	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF

> > -	msr	sctlr_el1, x0

> > -

> >  	mrs	x0, CurrentEL

> >  	cmp	x0, #CurrentEL_EL2

> >  	b.eq	init_el2

> >  

> >  SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)

> > +	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF

> > +	msr	sctlr_el1, x0

> >  	isb

> >  	mov_q	x0, INIT_PSTATE_EL1

> >  	msr	spsr_el1, x0

> > @@ -504,6 +503,34 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)

> >  	msr	vbar_el2, x0

> >  	isb

> >  

> > +	/*

> > +	 * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,

> > +	 * making it impossible to start in nVHE mode. Is that

> > +	 * compliant with the architecture? Absolutely not!

> > +	 */

> > +	mrs	x0, hcr_el2

> > +	and	x0, x0, #HCR_E2H

> > +	cbz	x0, 1f

> > +

> > +	/* Switching to VHE requires a sane SCTLR_EL1 as a start */

> > +	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF

> > +	msr_s	SYS_SCTLR_EL12, x0

> > +

> > +	/*

> > +	 * Force an eret into a helper "function", and let it return

> > +	 * to our original caller... This makes sure that we have

> > +	 * initialised the basic PSTATE state.

> > +	 */

> > +	mov	x0, #INIT_PSTATE_EL2

> > +	msr	spsr_el1, x0

> > +	adr_l	x0, stick_to_vhe

> > +	msr	elr_el1, x0

> > +	eret

> > +

> > +1:

> > +	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF

> > +	msr	sctlr_el1, x0

> > +

> >  	msr	elr_el2, lr

> >  	mov	w0, #BOOT_CPU_MODE_EL2

> >  	eret

> > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S

> > index 5eccbd62fec8..c7601030ee82 100644

> > --- a/arch/arm64/kernel/hyp-stub.S

> > +++ b/arch/arm64/kernel/hyp-stub.S

> > @@ -27,12 +27,12 @@ SYM_CODE_START(__hyp_stub_vectors)

> >  	ventry	el2_fiq_invalid			// FIQ EL2t

> >  	ventry	el2_error_invalid		// Error EL2t

> >  

> > -	ventry	el2_sync_invalid		// Synchronous EL2h

> > +	ventry	elx_sync			// Synchronous EL2h

> >  	ventry	el2_irq_invalid			// IRQ EL2h

> >  	ventry	el2_fiq_invalid			// FIQ EL2h

> >  	ventry	el2_error_invalid		// Error EL2h

> >  

> > -	ventry	el1_sync			// Synchronous 64-bit EL1

> > +	ventry	elx_sync			// Synchronous 64-bit EL1

> >  	ventry	el1_irq_invalid			// IRQ 64-bit EL1

> >  	ventry	el1_fiq_invalid			// FIQ 64-bit EL1

> >  	ventry	el1_error_invalid		// Error 64-bit EL1

> > @@ -45,7 +45,7 @@ SYM_CODE_END(__hyp_stub_vectors)

> >  

> >  	.align 11

> >  

> > -SYM_CODE_START_LOCAL(el1_sync)

> > +SYM_CODE_START_LOCAL(elx_sync)

> >  	cmp	x0, #HVC_SET_VECTORS

> >  	b.ne	1f

> >  	msr	vbar_el2, x1

> > @@ -71,7 +71,7 @@ SYM_CODE_START_LOCAL(el1_sync)

> >  

> >  9:	mov	x0, xzr

> >  	eret

> > -SYM_CODE_END(el1_sync)

> > +SYM_CODE_END(elx_sync)

> >  

> >  // nVHE? No way! Give me the real thing!

> >  SYM_CODE_START_LOCAL(mutate_to_vhe)

> > @@ -243,3 +243,23 @@ SYM_FUNC_START(switch_to_vhe)

> >  #endif

> >  	ret

> >  SYM_FUNC_END(switch_to_vhe)

> > +

> > +SYM_FUNC_START(stick_to_vhe)

> > +	/*

> > +	 * Make sure the switch to VHE cannot fail, by overriding the

> > +	 * override. This is hilarious.

> > +	 */

> > +	adr_l	x1, id_aa64mmfr1_override

> > +	add	x1, x1, #FTR_OVR_MASK_OFFSET

> > +	dc 	civac, x1

> > +	dsb	sy

> > +	isb

> 

> Why do we need an ISB here?


Hmmm. Probably me being paranoid and trying to come up with something
for Hector to test before I had access to the HW. The DSB is more than
enough to order CMO and load/store.

> > +	ldr	x0, [x1]

> > +	bic	x0, x0, #(0xf << ID_AA64MMFR1_VHE_SHIFT)

> > +	str	x0, [x1]

> 

> I find it a bit bizarre doing this here, as for the primary CPU we're still

> a way away from parsing the early paramaters and for secondary CPUs this

> doesn't need to be done for each one. Furthermore, this same code is run

> on the resume path, which can probably then race with itself.


Agreed, this isn't great.

> Is it possible to do it later on the boot CPU only, e.g. in

> init_feature_override()? We should probably also log a warning that we're

> ignoring the option because nVHE is not available.


I've come up with this on top of the original patch, spitting a
warning when the right conditions are met. It's pretty ugly, but hey,
so is the HW this runs on.

	M.

diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index c7601030ee82..e6fa5cdd790a 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -245,19 +245,6 @@ SYM_FUNC_START(switch_to_vhe)
 SYM_FUNC_END(switch_to_vhe)
 
 SYM_FUNC_START(stick_to_vhe)
-	/*
-	 * Make sure the switch to VHE cannot fail, by overriding the
-	 * override. This is hilarious.
-	 */
-	adr_l	x1, id_aa64mmfr1_override
-	add	x1, x1, #FTR_OVR_MASK_OFFSET
-	dc 	civac, x1
-	dsb	sy
-	isb
-	ldr	x0, [x1]
-	bic	x0, x0, #(0xf << ID_AA64MMFR1_VHE_SHIFT)
-	str	x0, [x1]
-
 	mov	x0, #HVC_VHE_RESTART
 	hvc	#0
 	mov	x0, #BOOT_CPU_MODE_EL2
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 83f1c4b92095..ec07e150cf5c 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -195,6 +195,8 @@ static __init void parse_cmdline(void)
 		__parse_cmdline(prop, true);
 }
 
+static bool nvhe_impaired __initdata;
+
 /* Keep checkers quiet */
 void init_feature_override(void);
 
@@ -211,9 +213,32 @@ asmlinkage void __init init_feature_override(void)
 
 	parse_cmdline();
 
+	/*
+	 * If we ever reach this point while running VHE, we're
+	 * guaranteed to be on one of these funky, VHE-stuck CPUs. If
+	 * the user was trying to force nVHE on us, proceed with
+	 * attitude adjustment.
+	 */
+	if (is_kernel_in_hyp_mode()) {
+		u64 mask = 0xfUL << ID_AA64MMFR1_VHE_SHIFT;
+
+		if ((id_aa64mmfr1_override.mask & mask) &&
+		    !(id_aa64mmfr1_override.val & mask)) {
+			nvhe_impaired = true;
+			id_aa64mmfr1_override.mask &= ~mask;
+		}
+	}
+
 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
 		if (regs[i]->override)
 			__flush_dcache_area(regs[i]->override,
 					    sizeof(*regs[i]->override));
 	}
 }
+
+static int __init check_feature_override(void)
+{
+	WARN_ON(nvhe_impaired);
+	return 0;
+}
+core_initcall(check_feature_override);

-- 
Without deviation from the norm, progress is not possible.
diff mbox series

Patch

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 66b0e0b66e31..673002b11865 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -477,14 +477,13 @@  EXPORT_SYMBOL(kimage_vaddr)
  * booted in EL1 or EL2 respectively.
  */
 SYM_FUNC_START(init_kernel_el)
-	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF
-	msr	sctlr_el1, x0
-
 	mrs	x0, CurrentEL
 	cmp	x0, #CurrentEL_EL2
 	b.eq	init_el2
 
 SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
+	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF
+	msr	sctlr_el1, x0
 	isb
 	mov_q	x0, INIT_PSTATE_EL1
 	msr	spsr_el1, x0
@@ -504,6 +503,34 @@  SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
 	msr	vbar_el2, x0
 	isb
 
+	/*
+	 * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
+	 * making it impossible to start in nVHE mode. Is that
+	 * compliant with the architecture? Absolutely not!
+	 */
+	mrs	x0, hcr_el2
+	and	x0, x0, #HCR_E2H
+	cbz	x0, 1f
+
+	/* Switching to VHE requires a sane SCTLR_EL1 as a start */
+	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF
+	msr_s	SYS_SCTLR_EL12, x0
+
+	/*
+	 * Force an eret into a helper "function", and let it return
+	 * to our original caller... This makes sure that we have
+	 * initialised the basic PSTATE state.
+	 */
+	mov	x0, #INIT_PSTATE_EL2
+	msr	spsr_el1, x0
+	adr_l	x0, stick_to_vhe
+	msr	elr_el1, x0
+	eret
+
+1:
+	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF
+	msr	sctlr_el1, x0
+
 	msr	elr_el2, lr
 	mov	w0, #BOOT_CPU_MODE_EL2
 	eret
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 5eccbd62fec8..c7601030ee82 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -27,12 +27,12 @@  SYM_CODE_START(__hyp_stub_vectors)
 	ventry	el2_fiq_invalid			// FIQ EL2t
 	ventry	el2_error_invalid		// Error EL2t
 
-	ventry	el2_sync_invalid		// Synchronous EL2h
+	ventry	elx_sync			// Synchronous EL2h
 	ventry	el2_irq_invalid			// IRQ EL2h
 	ventry	el2_fiq_invalid			// FIQ EL2h
 	ventry	el2_error_invalid		// Error EL2h
 
-	ventry	el1_sync			// Synchronous 64-bit EL1
+	ventry	elx_sync			// Synchronous 64-bit EL1
 	ventry	el1_irq_invalid			// IRQ 64-bit EL1
 	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
 	ventry	el1_error_invalid		// Error 64-bit EL1
@@ -45,7 +45,7 @@  SYM_CODE_END(__hyp_stub_vectors)
 
 	.align 11
 
-SYM_CODE_START_LOCAL(el1_sync)
+SYM_CODE_START_LOCAL(elx_sync)
 	cmp	x0, #HVC_SET_VECTORS
 	b.ne	1f
 	msr	vbar_el2, x1
@@ -71,7 +71,7 @@  SYM_CODE_START_LOCAL(el1_sync)
 
 9:	mov	x0, xzr
 	eret
-SYM_CODE_END(el1_sync)
+SYM_CODE_END(elx_sync)
 
 // nVHE? No way! Give me the real thing!
 SYM_CODE_START_LOCAL(mutate_to_vhe)
@@ -243,3 +243,23 @@  SYM_FUNC_START(switch_to_vhe)
 #endif
 	ret
 SYM_FUNC_END(switch_to_vhe)
+
+SYM_FUNC_START(stick_to_vhe)
+	/*
+	 * Make sure the switch to VHE cannot fail, by overriding the
+	 * override. This is hilarious.
+	 */
+	adr_l	x1, id_aa64mmfr1_override
+	add	x1, x1, #FTR_OVR_MASK_OFFSET
+	dc 	civac, x1
+	dsb	sy
+	isb
+	ldr	x0, [x1]
+	bic	x0, x0, #(0xf << ID_AA64MMFR1_VHE_SHIFT)
+	str	x0, [x1]
+
+	mov	x0, #HVC_VHE_RESTART
+	hvc	#0
+	mov	x0, #BOOT_CPU_MODE_EL2
+	ret
+SYM_FUNC_END(stick_to_vhe)