diff mbox

[v5,8/8] arm64: enforce x1|x2|x3 == 0 upon kernel entry as per boot protocol

Message ID 1426690527-14258-9-git-send-email-ard.biesheuvel@linaro.org
State New
Headers show

Commit Message

Ard Biesheuvel March 18, 2015, 2:55 p.m. UTC
According to the arm64 boot protocol, registers x1 to x3 should be
zero upon kernel entry, and non-zero values are reserved for future
use. This future use is going to be problematic if we never enforce
the current rules, so start enforcing them now, by emitting a warning
if non-zero values are detected.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/kernel/head.S  |  4 ++++
 arch/arm64/kernel/setup.c | 15 +++++++++++++++
 2 files changed, 19 insertions(+)

Comments

Ard Biesheuvel March 18, 2015, 6:16 p.m. UTC | #1
On 18 March 2015 at 19:13, Mark Rutland <mark.rutland@arm.com> wrote:
> On Wed, Mar 18, 2015 at 02:55:27PM +0000, Ard Biesheuvel wrote:
>> According to the arm64 boot protocol, registers x1 to x3 should be
>> zero upon kernel entry, and non-zero values are reserved for future
>> use. This future use is going to be problematic if we never enforce
>> the current rules, so start enforcing them now, by emitting a warning
>> if non-zero values are detected.
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> ---
>>  arch/arm64/kernel/head.S  |  4 ++++
>>  arch/arm64/kernel/setup.c | 15 +++++++++++++++
>>  2 files changed, 19 insertions(+)
>>
>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>> index a0fbd99efb89..8636c3cef006 100644
>> --- a/arch/arm64/kernel/head.S
>> +++ b/arch/arm64/kernel/head.S
>> @@ -233,6 +233,10 @@ section_table:
>>  #endif
>>
>>  ENTRY(stext)
>> +     adr_l   x8, boot_regs                   // record the contents of
>> +     stp     x0, x1, [x8]                    // x0 .. x3 at kernel entry
>> +     stp     x2, x3, [x8, #16]
>
> I think we should have a dc ivac here as we do for
> set_cpu_boot_mode_flag.
>
> That avoids a potential issue with boot_regs sharing a cacheline with
> data we write with the MMU on -- using __flush_dcache_area will result
> in a civac, so we could write back dirty data atop of the boot_regs if
> there were clean entries in the cache when we did the non-cacheable
> write.
>

Hmm, I wondered about that.

Could we instead just make it u64 __initconst boot_regs[] in setup.c ?

>> +
>>       mov     x21, x0                         // x21=FDT
>>       bl      el2_setup                       // Drop to EL1, w20=cpu_boot_mode
>>       adrp    x24, __PHYS_OFFSET
>> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
>> index 6c5fb5aff325..2d5cae2de679 100644
>> --- a/arch/arm64/kernel/setup.c
>> +++ b/arch/arm64/kernel/setup.c
>> @@ -114,6 +114,11 @@ void __init early_print(const char *str, ...)
>>       printk("%s", buf);
>>  }
>>
>> +/*
>> + * The recorded values of x0 .. x3 upon kernel entry.
>> + */
>> +u64 __read_mostly boot_regs[4];
>> +
>>  void __init smp_setup_processor_id(void)
>>  {
>>       u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
>> @@ -387,6 +392,16 @@ void __init setup_arch(char **cmdline_p)
>>       conswitchp = &dummy_con;
>>  #endif
>>  #endif
>> +     /*
>> +      * boot_regs[] is written by the boot CPU with the caches off, so we
>> +      * need to ensure that we read the value from main memory
>> +      */
>> +     __flush_dcache_area(boot_regs, sizeof(boot_regs));
>> +     if (boot_regs[1] || boot_regs[2] || boot_regs[3]) {
>> +             pr_err("WARNING: boot protocol violation detected (x1 == %llx, x2 == %llx, x3 == %llx)\n",
>> +                     boot_regs[1], boot_regs[2], boot_regs[3]);
>> +             pr_err("WARNING: your bootloader may fail to load newer kernels\n");
>> +     }
>>  }
>>
>>  static int __init arm64_device_init(void)
>> --
>> 1.8.3.2
>>
>>
Ard Biesheuvel March 18, 2015, 6:46 p.m. UTC | #2
On 18 March 2015 at 19:16, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> On 18 March 2015 at 19:13, Mark Rutland <mark.rutland@arm.com> wrote:
>> On Wed, Mar 18, 2015 at 02:55:27PM +0000, Ard Biesheuvel wrote:
>>> According to the arm64 boot protocol, registers x1 to x3 should be
>>> zero upon kernel entry, and non-zero values are reserved for future
>>> use. This future use is going to be problematic if we never enforce
>>> the current rules, so start enforcing them now, by emitting a warning
>>> if non-zero values are detected.
>>>
>>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>>> ---
>>>  arch/arm64/kernel/head.S  |  4 ++++
>>>  arch/arm64/kernel/setup.c | 15 +++++++++++++++
>>>  2 files changed, 19 insertions(+)
>>>
>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>>> index a0fbd99efb89..8636c3cef006 100644
>>> --- a/arch/arm64/kernel/head.S
>>> +++ b/arch/arm64/kernel/head.S
>>> @@ -233,6 +233,10 @@ section_table:
>>>  #endif
>>>
>>>  ENTRY(stext)
>>> +     adr_l   x8, boot_regs                   // record the contents of
>>> +     stp     x0, x1, [x8]                    // x0 .. x3 at kernel entry
>>> +     stp     x2, x3, [x8, #16]
>>
>> I think we should have a dc ivac here as we do for
>> set_cpu_boot_mode_flag.
>>
>> That avoids a potential issue with boot_regs sharing a cacheline with
>> data we write with the MMU on -- using __flush_dcache_area will result
>> in a civac, so we could write back dirty data atop of the boot_regs if
>> there were clean entries in the cache when we did the non-cacheable
>> write.
>>
>
> Hmm, I wondered about that.
>
> Could we instead just make it u64 __initconst boot_regs[] in setup.c ?
>

Never mind, it's easier just to do the invalidate right after, and I
can drop the flush before the access.


>>> +
>>>       mov     x21, x0                         // x21=FDT
>>>       bl      el2_setup                       // Drop to EL1, w20=cpu_boot_mode
>>>       adrp    x24, __PHYS_OFFSET
>>> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
>>> index 6c5fb5aff325..2d5cae2de679 100644
>>> --- a/arch/arm64/kernel/setup.c
>>> +++ b/arch/arm64/kernel/setup.c
>>> @@ -114,6 +114,11 @@ void __init early_print(const char *str, ...)
>>>       printk("%s", buf);
>>>  }
>>>
>>> +/*
>>> + * The recorded values of x0 .. x3 upon kernel entry.
>>> + */
>>> +u64 __read_mostly boot_regs[4];
>>> +
>>>  void __init smp_setup_processor_id(void)
>>>  {
>>>       u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
>>> @@ -387,6 +392,16 @@ void __init setup_arch(char **cmdline_p)
>>>       conswitchp = &dummy_con;
>>>  #endif
>>>  #endif
>>> +     /*
>>> +      * boot_regs[] is written by the boot CPU with the caches off, so we
>>> +      * need to ensure that we read the value from main memory
>>> +      */
>>> +     __flush_dcache_area(boot_regs, sizeof(boot_regs));
>>> +     if (boot_regs[1] || boot_regs[2] || boot_regs[3]) {
>>> +             pr_err("WARNING: boot protocol violation detected (x1 == %llx, x2 == %llx, x3 == %llx)\n",
>>> +                     boot_regs[1], boot_regs[2], boot_regs[3]);
>>> +             pr_err("WARNING: your bootloader may fail to load newer kernels\n");
>>> +     }
>>>  }
>>>
>>>  static int __init arm64_device_init(void)
>>> --
>>> 1.8.3.2
>>>
>>>
Ard Biesheuvel March 18, 2015, 7:55 p.m. UTC | #3
On 18 March 2015 at 19:57, Mark Rutland <mark.rutland@arm.com> wrote:
> On Wed, Mar 18, 2015 at 06:46:11PM +0000, Ard Biesheuvel wrote:
>> On 18 March 2015 at 19:16, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
>> > On 18 March 2015 at 19:13, Mark Rutland <mark.rutland@arm.com> wrote:
>> >> On Wed, Mar 18, 2015 at 02:55:27PM +0000, Ard Biesheuvel wrote:
>> >>> According to the arm64 boot protocol, registers x1 to x3 should be
>> >>> zero upon kernel entry, and non-zero values are reserved for future
>> >>> use. This future use is going to be problematic if we never enforce
>> >>> the current rules, so start enforcing them now, by emitting a warning
>> >>> if non-zero values are detected.
>> >>>
>> >>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> >>> ---
>> >>>  arch/arm64/kernel/head.S  |  4 ++++
>> >>>  arch/arm64/kernel/setup.c | 15 +++++++++++++++
>> >>>  2 files changed, 19 insertions(+)
>> >>>
>> >>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>> >>> index a0fbd99efb89..8636c3cef006 100644
>> >>> --- a/arch/arm64/kernel/head.S
>> >>> +++ b/arch/arm64/kernel/head.S
>> >>> @@ -233,6 +233,10 @@ section_table:
>> >>>  #endif
>> >>>
>> >>>  ENTRY(stext)
>> >>> +     adr_l   x8, boot_regs                   // record the contents of
>> >>> +     stp     x0, x1, [x8]                    // x0 .. x3 at kernel entry
>> >>> +     stp     x2, x3, [x8, #16]
>> >>
>> >> I think we should have a dc ivac here as we do for
>> >> set_cpu_boot_mode_flag.
>> >>
>> >> That avoids a potential issue with boot_regs sharing a cacheline with
>> >> data we write with the MMU on -- using __flush_dcache_area will result
>> >> in a civac, so we could write back dirty data atop of the boot_regs if
>> >> there were clean entries in the cache when we did the non-cacheable
>> >> write.
>> >>
>> >
>> > Hmm, I wondered about that.
>> >
>> > Could we instead just make it u64 __initconst boot_regs[] in setup.c ?
>> >
>>
>> Never mind, it's easier just to do the invalidate right after, and I
>> can drop the flush before the access.
>
> Yup.
>
> Annoyingly the minimum cache line size seems to be a word (given the
> defnition of CTR.DminLine), which means you need a few dc ivac
> instructions to be architecturally correct.
>

But that applies to cpu_boot_mode as well then?

I will add a call to __inval_cache_range() right after recording the
initial values, that should do the right thing regarding llinesize
Peter Maydell March 18, 2015, 10:26 p.m. UTC | #4
On 18 March 2015 at 18:57, Mark Rutland <mark.rutland@arm.com> wrote:
> Annoyingly the minimum cache line size seems to be a word (given the
> defnition of CTR.DminLine), which means you need a few dc ivac
> instructions to be architecturally correct.

Section D3.4.1 says "For the purpose of these principles, a
cache entry covers at least 16 bytes and no more than 2KB of
contiguous address space, aligned to its size.", so (depending
on what you think the "for the purpose of these principles"
means) you may be OK there.

-- PMM
Ard Biesheuvel March 19, 2015, 7:30 a.m. UTC | #5
On 18 March 2015 at 21:24, Mark Rutland <mark.rutland@arm.com> wrote:
>> >> >>>  ENTRY(stext)
>> >> >>> +     adr_l   x8, boot_regs                   // record the contents of
>> >> >>> +     stp     x0, x1, [x8]                    // x0 .. x3 at kernel entry
>> >> >>> +     stp     x2, x3, [x8, #16]
>> >> >>
>> >> >> I think we should have a dc ivac here as we do for
>> >> >> set_cpu_boot_mode_flag.
>> >> >>
>> >> >> That avoids a potential issue with boot_regs sharing a cacheline with
>> >> >> data we write with the MMU on -- using __flush_dcache_area will result
>> >> >> in a civac, so we could write back dirty data atop of the boot_regs if
>> >> >> there were clean entries in the cache when we did the non-cacheable
>> >> >> write.
>> >> >>
>> >> >
>> >> > Hmm, I wondered about that.
>> >> >
>> >> > Could we instead just make it u64 __initconst boot_regs[] in setup.c ?
>> >> >
>> >>
>> >> Never mind, it's easier just to do the invalidate right after, and I
>> >> can drop the flush before the access.
>> >
>> > Yup.
>> >
>> > Annoyingly the minimum cache line size seems to be a word (given the
>> > defnition of CTR.DminLine), which means you need a few dc ivac
>> > instructions to be architecturally correct.
>> >
>>
>> But that applies to cpu_boot_mode as well then?
>
> It writes a single word, so it happens to be safe.
>
>> I will add a call to __inval_cache_range() right after recording the
>> initial values, that should do the right thing regarding llinesize
>
> That works, with one caveat: you'll need a dmb sy between the writes and
> the call -- dc instructions by VA only hazard against normal cacheable
> accesses, and __inval_cache_range assumes the caches are on and so
> doesn't have a dmb prior to the dc instructions.
>

Does it matter at all that __inval_cache_range() will mostly end up
doing a civac for the whole array, since it uses civac not ivac for
both non-cachelined aligned ends of the region, and the typical
cacheline size is larger then the size of the array? Couldn't that
also clobber what we just wrote with a stale cacheline?
Ard Biesheuvel March 19, 2015, 10:38 a.m. UTC | #6
On 19 March 2015 at 11:35, Mark Rutland <mark.rutland@arm.com> wrote:
> On Thu, Mar 19, 2015 at 07:30:03AM +0000, Ard Biesheuvel wrote:
>> On 18 March 2015 at 21:24, Mark Rutland <mark.rutland@arm.com> wrote:
>> >> >> >>>  ENTRY(stext)
>> >> >> >>> +     adr_l   x8, boot_regs                   // record the contents of
>> >> >> >>> +     stp     x0, x1, [x8]                    // x0 .. x3 at kernel entry
>> >> >> >>> +     stp     x2, x3, [x8, #16]
>> >> >> >>
>> >> >> >> I think we should have a dc ivac here as we do for
>> >> >> >> set_cpu_boot_mode_flag.
>> >> >> >>
>> >> >> >> That avoids a potential issue with boot_regs sharing a cacheline with
>> >> >> >> data we write with the MMU on -- using __flush_dcache_area will result
>> >> >> >> in a civac, so we could write back dirty data atop of the boot_regs if
>> >> >> >> there were clean entries in the cache when we did the non-cacheable
>> >> >> >> write.
>> >> >> >>
>> >> >> >
>> >> >> > Hmm, I wondered about that.
>> >> >> >
>> >> >> > Could we instead just make it u64 __initconst boot_regs[] in setup.c ?
>> >> >> >
>> >> >>
>> >> >> Never mind, it's easier just to do the invalidate right after, and I
>> >> >> can drop the flush before the access.
>> >> >
>> >> > Yup.
>> >> >
>> >> > Annoyingly the minimum cache line size seems to be a word (given the
>> >> > defnition of CTR.DminLine), which means you need a few dc ivac
>> >> > instructions to be architecturally correct.
>> >> >
>> >>
>> >> But that applies to cpu_boot_mode as well then?
>> >
>> > It writes a single word, so it happens to be safe.
>> >
>> >> I will add a call to __inval_cache_range() right after recording the
>> >> initial values, that should do the right thing regarding llinesize
>> >
>> > That works, with one caveat: you'll need a dmb sy between the writes and
>> > the call -- dc instructions by VA only hazard against normal cacheable
>> > accesses, and __inval_cache_range assumes the caches are on and so
>> > doesn't have a dmb prior to the dc instructions.
>> >
>>
>> Does it matter at all that __inval_cache_range() will mostly end up
>> doing a civac for the whole array, since it uses civac not ivac for
>> both non-cachelined aligned ends of the region, and the typical
>> cacheline size is larger then the size of the array? Couldn't that
>> also clobber what we just wrote with a stale cacheline?
>
> Yes, though only if the memory were outside the footprint of the loaded
> Image (which per the boot protocol should be clean to the PoC).
>
> So I guess we should move the boot_regs structure back into head.S so it
> doesn't fall outside
>

OK that means .data should be fine too. __cacheline_aligned variables
are put into the .data section, so let me use that instead (.bss gets
cleared after anyway, that is why i added the __read_mostly initially)
diff mbox

Patch

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index a0fbd99efb89..8636c3cef006 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -233,6 +233,10 @@  section_table:
 #endif
 
 ENTRY(stext)
+	adr_l	x8, boot_regs			// record the contents of
+	stp	x0, x1, [x8]			// x0 .. x3 at kernel entry
+	stp	x2, x3, [x8, #16]
+
 	mov	x21, x0				// x21=FDT
 	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
 	adrp	x24, __PHYS_OFFSET
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 6c5fb5aff325..2d5cae2de679 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -114,6 +114,11 @@  void __init early_print(const char *str, ...)
 	printk("%s", buf);
 }
 
+/*
+ * The recorded values of x0 .. x3 upon kernel entry.
+ */
+u64 __read_mostly boot_regs[4];
+
 void __init smp_setup_processor_id(void)
 {
 	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
@@ -387,6 +392,16 @@  void __init setup_arch(char **cmdline_p)
 	conswitchp = &dummy_con;
 #endif
 #endif
+	/*
+	 * boot_regs[] is written by the boot CPU with the caches off, so we
+	 * need to ensure that we read the value from main memory
+	 */
+	__flush_dcache_area(boot_regs, sizeof(boot_regs));
+	if (boot_regs[1] || boot_regs[2] || boot_regs[3]) {
+		pr_err("WARNING: boot protocol violation detected (x1 == %llx, x2 == %llx, x3 == %llx)\n",
+			boot_regs[1], boot_regs[2], boot_regs[3]);
+		pr_err("WARNING: your bootloader may fail to load newer kernels\n");
+	}
 }
 
 static int __init arm64_device_init(void)