diff mbox

[RFC,3/3] arm64: KVM: use ID map with increased VA range if required

Message ID 1424964547-2118-4-git-send-email-ard.biesheuvel@linaro.org
State New
Headers show

Commit Message

Ard Biesheuvel Feb. 26, 2015, 3:29 p.m. UTC
This patch modifies the HYP init code so it can deal with system
RAM residing at an offset which exceeds the reach of VA_BITS.

Like for EL1, this involves configuring an additional level of
translation for the ID map. However, in case of EL2, this implies
that all translations use the extra level, as we cannot seamlessly
switch between translation tables with different numbers of
translation levels. For this reason, the ID map is merged with
the runtime HYP map, since they don't overlap anyway.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/kvm/hyp-init.S | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

Comments

Ard Biesheuvel Feb. 26, 2015, 4:56 p.m. UTC | #1
On 26 February 2015 at 16:11, Catalin Marinas <catalin.marinas@arm.com> wrote:
> On Thu, Feb 26, 2015 at 03:29:07PM +0000, Ard Biesheuvel wrote:
>> +     /*
>> +      * If we are running with VA_BITS < 48, we may be running with an extra
>> +      * level of translation in the ID map. This is only the case if system
>> +      * RAM is out of range for the currently configured page size and number
>> +      * of translation levels, in which case we will also need the extra
>> +      * level for the HYP ID map, or we won't be able to enable the EL2 MMU.
>> +      *
>> +      * However, at EL2, there is only one TTBR register, and we can't switch
>> +      * between translation tables *and* update TCR_EL2.T0SZ at the same
>> +      * time. Bottom line: we need the extra level in *both* our translation
>> +      * tables.
>
> It doesn't sound too nice but I'm not sure we can do much about it.
>

I don't think there is.

>> +      *
>> +      * Fortunately, there is an easy way out: the existing ID map, with the
>> +      * extra level, can be reused for both. The kernel image is already
>> +      * identity mapped at a high virtual offset, which leaves VA_BITS of
>> +      * address space at the low end to put our runtime HYP mappings.
>> +      */
>
> Another aspect here is that the Hyp VA starts at HYP_PAGE_OFFSET which
> is 1 << (VA_BITS - 1). On some platforms we may get an overlap with the
> physical memory (and the original idmap). Could we switch to the
> dedicated hyp TTBR at this point, with the extra level? Functions like
> __create_hyp_mapping() don't need to know about this extra level as
> hyp_pgd only knows about 4 levels but TTBR0_EL2 would point to the extra
> level.
>

The original id map only covers the kernel image, not all of system
ram (and it doesn't have to). The only way the hyp VA range could
overlap with the existing ID mapping while we are running with the
additional level is if KERNEL_START < (1<<VA_BITS) and KERNEL_END >=
(1<<VA_BITS). I don't think the pgtable code in head.S can deal with
that anyway atm. Otherwise, I don't see how they would ever overlap.

If we are not running with the additional level, then we keep the
separate translation tables. We could decide to reuse the existing ID
map in that case as well, but this is fairly trivial to implement so I
left it out for now.
Ard Biesheuvel Feb. 26, 2015, 6:17 p.m. UTC | #2
On 26 February 2015 at 18:06, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 26/02/15 15:29, Ard Biesheuvel wrote:
>> This patch modifies the HYP init code so it can deal with system
>> RAM residing at an offset which exceeds the reach of VA_BITS.
>>
>> Like for EL1, this involves configuring an additional level of
>> translation for the ID map. However, in case of EL2, this implies
>> that all translations use the extra level, as we cannot seamlessly
>> switch between translation tables with different numbers of
>> translation levels. For this reason, the ID map is merged with
>> the runtime HYP map, since they don't overlap anyway.
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> ---
>>  arch/arm64/kvm/hyp-init.S | 40 ++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 38 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
>> index c3191168a994..0af16bce6316 100644
>> --- a/arch/arm64/kvm/hyp-init.S
>> +++ b/arch/arm64/kvm/hyp-init.S
>> @@ -20,6 +20,7 @@
>>  #include <asm/assembler.h>
>>  #include <asm/kvm_arm.h>
>>  #include <asm/kvm_mmu.h>
>> +#include <asm/pgtable-hwdef.h>
>>
>>       .text
>>       .pushsection    .hyp.idmap.text, "ax"
>> @@ -58,13 +59,44 @@ __invalid:
>>        */
>>  __do_hyp_init:
>>
>> -     msr     ttbr0_el2, x0
>> -
>>       mrs     x4, tcr_el1
>>       ldr     x5, =TCR_EL2_MASK
>>       and     x4, x4, x5
>>       ldr     x5, =TCR_EL2_FLAGS
>>       orr     x4, x4, x5
>> +
>> +#ifndef CONFIG_ARM64_VA_BITS_48
>> +     /*
>> +      * If we are running with VA_BITS < 48, we may be running with an extra
>> +      * level of translation in the ID map. This is only the case if system
>> +      * RAM is out of range for the currently configured page size and number
>> +      * of translation levels, in which case we will also need the extra
>> +      * level for the HYP ID map, or we won't be able to enable the EL2 MMU.
>> +      *
>> +      * However, at EL2, there is only one TTBR register, and we can't switch
>> +      * between translation tables *and* update TCR_EL2.T0SZ at the same
>> +      * time. Bottom line: we need the extra level in *both* our translation
>> +      * tables.
>> +      *
>> +      * Fortunately, there is an easy way out: the existing ID map, with the
>> +      * extra level, can be reused for both. The kernel image is already
>> +      * identity mapped at a high virtual offset, which leaves VA_BITS of
>> +      * address space at the low end to put our runtime HYP mappings.
>> +      */
>> +     adrp    x5, idmap_t0sz                  // get ID map TCR.T0SZ
>> +     ldr     x5, [x5, :lo12:idmap_t0sz]
>> +     cmp     x5, TCR_T0SZ(VA_BITS)           // extra level configured?
>> +     b.ge    1f                              // if not, skip
>> +
>> +     bfi     x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
>> +
>> +     adrp    x0, idmap_pg_dir        // get root of ID map
>> +     orr     x5, x1, PMD_TYPE_TABLE  // table entry pointing at HYP pgd
>> +     str     x5, [x0]                // store at offset #0
>> +     mov     x1, #0
>> +1:
>
> Wow. This is making my head spin a bit.
>
> Nitpick: shouldn't you use PUD_TYPE_TABLE instead of PMD_TYPE_TABLE
> (yeah, I know, that's the same thing...)?
>

I think both are equally inappropriate, considering that this is the
level above PGD. But I am happy to switch one for the other ...


>> +#endif
>> +     msr     ttbr0_el2, x0
>>       msr     tcr_el2, x4
>>
>>       ldr     x4, =VTCR_EL2_FLAGS
>> @@ -91,6 +123,9 @@ __do_hyp_init:
>>       msr     sctlr_el2, x4
>>       isb
>>
>> +     /* Skip the trampoline dance if we merged the boot and runtime PGDs */
>> +     cbz     x1, merged
>> +
>>       /* MMU is now enabled. Get ready for the trampoline dance */
>>       ldr     x4, =TRAMPOLINE_VA
>>       adr     x5, target
>> @@ -105,6 +140,7 @@ target: /* We're now in the trampoline code, switch page tables */
>>       tlbi    alle2
>>       dsb     sy
>>
>> +merged:
>>       /* Set the stack and new vectors */
>>       kern_hyp_va     x2
>>       mov     sp, x2
>>
>
> The one thing that worries here is that our EL1 idmap is not a strict
> idmap anymore. I really wonder what we can break with that...
>

Yes, that is the question I had myself. The other issue is that we
never remove the entry we add at the root level in this code, which I
don't think matters that much but it would be good if someone could
confirm.

> But hey, it's so mad I like it! ;-)
>

Thanks!
Ard Biesheuvel Feb. 26, 2015, 7:04 p.m. UTC | #3
On 26 February 2015 at 18:51, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 26/02/15 18:17, Ard Biesheuvel wrote:
>> On 26 February 2015 at 18:06, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>> On 26/02/15 15:29, Ard Biesheuvel wrote:
>>>> This patch modifies the HYP init code so it can deal with system
>>>> RAM residing at an offset which exceeds the reach of VA_BITS.
>>>>
>>>> Like for EL1, this involves configuring an additional level of
>>>> translation for the ID map. However, in case of EL2, this implies
>>>> that all translations use the extra level, as we cannot seamlessly
>>>> switch between translation tables with different numbers of
>>>> translation levels. For this reason, the ID map is merged with
>>>> the runtime HYP map, since they don't overlap anyway.
>>>>
>>>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>>>> ---
>>>>  arch/arm64/kvm/hyp-init.S | 40 ++++++++++++++++++++++++++++++++++++++--
>>>>  1 file changed, 38 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
>>>> index c3191168a994..0af16bce6316 100644
>>>> --- a/arch/arm64/kvm/hyp-init.S
>>>> +++ b/arch/arm64/kvm/hyp-init.S
>>>> @@ -20,6 +20,7 @@
>>>>  #include <asm/assembler.h>
>>>>  #include <asm/kvm_arm.h>
>>>>  #include <asm/kvm_mmu.h>
>>>> +#include <asm/pgtable-hwdef.h>
>>>>
>>>>       .text
>>>>       .pushsection    .hyp.idmap.text, "ax"
>>>> @@ -58,13 +59,44 @@ __invalid:
>>>>        */
>>>>  __do_hyp_init:
>>>>
>>>> -     msr     ttbr0_el2, x0
>>>> -
>>>>       mrs     x4, tcr_el1
>>>>       ldr     x5, =TCR_EL2_MASK
>>>>       and     x4, x4, x5
>>>>       ldr     x5, =TCR_EL2_FLAGS
>>>>       orr     x4, x4, x5
>>>> +
>>>> +#ifndef CONFIG_ARM64_VA_BITS_48
>>>> +     /*
>>>> +      * If we are running with VA_BITS < 48, we may be running with an extra
>>>> +      * level of translation in the ID map. This is only the case if system
>>>> +      * RAM is out of range for the currently configured page size and number
>>>> +      * of translation levels, in which case we will also need the extra
>>>> +      * level for the HYP ID map, or we won't be able to enable the EL2 MMU.
>>>> +      *
>>>> +      * However, at EL2, there is only one TTBR register, and we can't switch
>>>> +      * between translation tables *and* update TCR_EL2.T0SZ at the same
>>>> +      * time. Bottom line: we need the extra level in *both* our translation
>>>> +      * tables.
>>>> +      *
>>>> +      * Fortunately, there is an easy way out: the existing ID map, with the
>>>> +      * extra level, can be reused for both. The kernel image is already
>>>> +      * identity mapped at a high virtual offset, which leaves VA_BITS of
>>>> +      * address space at the low end to put our runtime HYP mappings.
>>>> +      */
>>>> +     adrp    x5, idmap_t0sz                  // get ID map TCR.T0SZ
>>>> +     ldr     x5, [x5, :lo12:idmap_t0sz]
>>>> +     cmp     x5, TCR_T0SZ(VA_BITS)           // extra level configured?
>>>> +     b.ge    1f                              // if not, skip
>>>> +
>>>> +     bfi     x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
>>>> +
>>>> +     adrp    x0, idmap_pg_dir        // get root of ID map
>>>> +     orr     x5, x1, PMD_TYPE_TABLE  // table entry pointing at HYP pgd
>>>> +     str     x5, [x0]                // store at offset #0
>>>> +     mov     x1, #0
>>>> +1:
>>>
>>> Wow. This is making my head spin a bit.
>>>
>>> Nitpick: shouldn't you use PUD_TYPE_TABLE instead of PMD_TYPE_TABLE
>>> (yeah, I know, that's the same thing...)?
>>>
>>
>> I think both are equally inappropriate, considering that this is the
>> level above PGD. But I am happy to switch one for the other ...
>
> Yeah, brainfart here. We'd need a PGD_TYPE_TABLE, but I don't think it's
> worth the hassle.
>
>>
>>>> +#endif
>>>> +     msr     ttbr0_el2, x0
>>>>       msr     tcr_el2, x4
>>>>
>>>>       ldr     x4, =VTCR_EL2_FLAGS
>>>> @@ -91,6 +123,9 @@ __do_hyp_init:
>>>>       msr     sctlr_el2, x4
>>>>       isb
>>>>
>>>> +     /* Skip the trampoline dance if we merged the boot and runtime PGDs */
>>>> +     cbz     x1, merged
>>>> +
>>>>       /* MMU is now enabled. Get ready for the trampoline dance */
>>>>       ldr     x4, =TRAMPOLINE_VA
>>>>       adr     x5, target
>>>> @@ -105,6 +140,7 @@ target: /* We're now in the trampoline code, switch page tables */
>>>>       tlbi    alle2
>>>>       dsb     sy
>>>>
>>>> +merged:
>>>>       /* Set the stack and new vectors */
>>>>       kern_hyp_va     x2
>>>>       mov     sp, x2
>>>>
>>>
>>> The one thing that worries here is that our EL1 idmap is not a strict
>>> idmap anymore. I really wonder what we can break with that...
>>>
>>
>> Yes, that is the question I had myself. The other issue is that we
>> never remove the entry we add at the root level in this code, which I
>> don't think matters that much but it would be good if someone could
>> confirm.
>
> Do you mean having both the idmap and the runtime mappings in EL2? I'm
> trying to break it, but I don't see any obvious issues, except for the
> semi-religious fear of keeping unused mappings around....
>

No, I think that part is sound. My concern is that the pointer to
hyp_pgd is never removed if hyp_pgd itself is ever taken down and
freed.


> Worse case, we could have a hyp-specific idmap, and nuke/populate the
> idmap pgd entry as we see fit. One thing at a time...
>
>         M.
> --
> Jazz is not dead. It just smells funny...
diff mbox

Patch

diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index c3191168a994..0af16bce6316 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -20,6 +20,7 @@ 
 #include <asm/assembler.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/pgtable-hwdef.h>
 
 	.text
 	.pushsection	.hyp.idmap.text, "ax"
@@ -58,13 +59,44 @@  __invalid:
 	 */
 __do_hyp_init:
 
-	msr	ttbr0_el2, x0
-
 	mrs	x4, tcr_el1
 	ldr	x5, =TCR_EL2_MASK
 	and	x4, x4, x5
 	ldr	x5, =TCR_EL2_FLAGS
 	orr	x4, x4, x5
+
+#ifndef CONFIG_ARM64_VA_BITS_48
+	/*
+	 * If we are running with VA_BITS < 48, we may be running with an extra
+	 * level of translation in the ID map. This is only the case if system
+	 * RAM is out of range for the currently configured page size and number
+	 * of translation levels, in which case we will also need the extra
+	 * level for the HYP ID map, or we won't be able to enable the EL2 MMU.
+	 *
+	 * However, at EL2, there is only one TTBR register, and we can't switch
+	 * between translation tables *and* update TCR_EL2.T0SZ at the same
+	 * time. Bottom line: we need the extra level in *both* our translation
+	 * tables.
+	 *
+	 * Fortunately, there is an easy way out: the existing ID map, with the
+	 * extra level, can be reused for both. The kernel image is already
+	 * identity mapped at a high virtual offset, which leaves VA_BITS of
+	 * address space at the low end to put our runtime HYP mappings.
+	 */
+	adrp	x5, idmap_t0sz			// get ID map TCR.T0SZ
+	ldr	x5, [x5, :lo12:idmap_t0sz]
+	cmp	x5, TCR_T0SZ(VA_BITS)		// extra level configured?
+	b.ge	1f				// if not, skip
+
+	bfi	x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+
+	adrp	x0, idmap_pg_dir	// get root of ID map
+	orr	x5, x1, PMD_TYPE_TABLE	// table entry pointing at HYP pgd
+	str	x5, [x0]		// store at offset #0
+	mov	x1, #0
+1:
+#endif
+	msr	ttbr0_el2, x0
 	msr	tcr_el2, x4
 
 	ldr	x4, =VTCR_EL2_FLAGS
@@ -91,6 +123,9 @@  __do_hyp_init:
 	msr	sctlr_el2, x4
 	isb
 
+	/* Skip the trampoline dance if we merged the boot and runtime PGDs */
+	cbz	x1, merged
+
 	/* MMU is now enabled. Get ready for the trampoline dance */
 	ldr	x4, =TRAMPOLINE_VA
 	adr	x5, target
@@ -105,6 +140,7 @@  target: /* We're now in the trampoline code, switch page tables */
 	tlbi	alle2
 	dsb	sy
 
+merged:
 	/* Set the stack and new vectors */
 	kern_hyp_va	x2
 	mov	sp, x2