diff mbox series

riscv: Do not save the scratch CSR during suspend

Message ID 20240312195641.1830521-1-samuel.holland@sifive.com
State Accepted
Commit fa7d7339016ab7850258e85d6adfd4c4abca5498
Headers show
Series riscv: Do not save the scratch CSR during suspend | expand

Commit Message

Samuel Holland March 12, 2024, 7:56 p.m. UTC
While the processor is executing kernel code, the value of the scratch
CSR is always zero, so there is no need to save the value. Continue to
write the CSR during the resume flow, so we do not rely on firmware to
initialize it.

Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
---

 arch/riscv/include/asm/suspend.h | 1 -
 arch/riscv/kernel/suspend.c      | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

Comments

JeeHeng Sia March 15, 2024, 4:55 a.m. UTC | #1
> -----Original Message-----
> From: Samuel Holland <samuel.holland@sifive.com>
> Sent: Wednesday, March 13, 2024 3:57 AM
> To: Palmer Dabbelt <palmer@dabbelt.com>; linux-riscv@lists.infradead.org
> Cc: Samuel Holland <samuel.holland@sifive.com>; Albert Ou <aou@eecs.berkeley.edu>; Andrew Jones <ajones@ventanamicro.com>;
> Conor Dooley <conor.dooley@microchip.com>; Leyfoon Tan <leyfoon.tan@starfivetech.com>; Paul Walmsley
> <paul.walmsley@sifive.com>; Pavel Machek <pavel@ucw.cz>; Rafael J. Wysocki <rafael@kernel.org>; JeeHeng Sia
> <jeeheng.sia@starfivetech.com>; linux-kernel@vger.kernel.org; linux-pm@vger.kernel.org
> Subject: [PATCH] riscv: Do not save the scratch CSR during suspend
> 
> While the processor is executing kernel code, the value of the scratch
> CSR is always zero, so there is no need to save the value. Continue to
> write the CSR during the resume flow, so we do not rely on firmware to
> initialize it.
> 
> Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
> ---
> 
>  arch/riscv/include/asm/suspend.h | 1 -
>  arch/riscv/kernel/suspend.c      | 3 +--
>  2 files changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h
> index 491296a335d0..6569eefacf38 100644
> --- a/arch/riscv/include/asm/suspend.h
> +++ b/arch/riscv/include/asm/suspend.h
> @@ -13,7 +13,6 @@ struct suspend_context {
>  	/* Saved and restored by low-level functions */
>  	struct pt_regs regs;
>  	/* Saved and restored by high-level functions */
> -	unsigned long scratch;
>  	unsigned long envcfg;
>  	unsigned long tvec;
>  	unsigned long ie;
> diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c
> index 299795341e8a..3d306d8a253d 100644
> --- a/arch/riscv/kernel/suspend.c
> +++ b/arch/riscv/kernel/suspend.c
> @@ -14,7 +14,6 @@
> 
>  void suspend_save_csrs(struct suspend_context *context)
>  {
> -	context->scratch = csr_read(CSR_SCRATCH);
>  	if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
>  		context->envcfg = csr_read(CSR_ENVCFG);
>  	context->tvec = csr_read(CSR_TVEC);
> @@ -37,7 +36,7 @@ void suspend_save_csrs(struct suspend_context *context)
> 
>  void suspend_restore_csrs(struct suspend_context *context)
>  {
> -	csr_write(CSR_SCRATCH, context->scratch);
> +	csr_write(CSR_SCRATCH, 0);
If the register is always zero, do we need to explicitly write zero to the register during resume?
>  	if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
>  		csr_write(CSR_ENVCFG, context->envcfg);
>  	csr_write(CSR_TVEC, context->tvec);
> --
> 2.43.1
Samuel Holland March 21, 2024, 11:51 p.m. UTC | #2
On 2024-03-14 11:55 PM, JeeHeng Sia wrote:
> 
> 
>> -----Original Message-----
>> From: Samuel Holland <samuel.holland@sifive.com>
>> Sent: Wednesday, March 13, 2024 3:57 AM
>> To: Palmer Dabbelt <palmer@dabbelt.com>; linux-riscv@lists.infradead.org
>> Cc: Samuel Holland <samuel.holland@sifive.com>; Albert Ou <aou@eecs.berkeley.edu>; Andrew Jones <ajones@ventanamicro.com>;
>> Conor Dooley <conor.dooley@microchip.com>; Leyfoon Tan <leyfoon.tan@starfivetech.com>; Paul Walmsley
>> <paul.walmsley@sifive.com>; Pavel Machek <pavel@ucw.cz>; Rafael J. Wysocki <rafael@kernel.org>; JeeHeng Sia
>> <jeeheng.sia@starfivetech.com>; linux-kernel@vger.kernel.org; linux-pm@vger.kernel.org
>> Subject: [PATCH] riscv: Do not save the scratch CSR during suspend
>>
>> While the processor is executing kernel code, the value of the scratch
>> CSR is always zero, so there is no need to save the value. Continue to
>> write the CSR during the resume flow, so we do not rely on firmware to
>> initialize it.
>>
>> Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
>> ---
>>
>>  arch/riscv/include/asm/suspend.h | 1 -
>>  arch/riscv/kernel/suspend.c      | 3 +--
>>  2 files changed, 1 insertion(+), 3 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h
>> index 491296a335d0..6569eefacf38 100644
>> --- a/arch/riscv/include/asm/suspend.h
>> +++ b/arch/riscv/include/asm/suspend.h
>> @@ -13,7 +13,6 @@ struct suspend_context {
>>  	/* Saved and restored by low-level functions */
>>  	struct pt_regs regs;
>>  	/* Saved and restored by high-level functions */
>> -	unsigned long scratch;
>>  	unsigned long envcfg;
>>  	unsigned long tvec;
>>  	unsigned long ie;
>> diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c
>> index 299795341e8a..3d306d8a253d 100644
>> --- a/arch/riscv/kernel/suspend.c
>> +++ b/arch/riscv/kernel/suspend.c
>> @@ -14,7 +14,6 @@
>>
>>  void suspend_save_csrs(struct suspend_context *context)
>>  {
>> -	context->scratch = csr_read(CSR_SCRATCH);
>>  	if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
>>  		context->envcfg = csr_read(CSR_ENVCFG);
>>  	context->tvec = csr_read(CSR_TVEC);
>> @@ -37,7 +36,7 @@ void suspend_save_csrs(struct suspend_context *context)
>>
>>  void suspend_restore_csrs(struct suspend_context *context)
>>  {
>> -	csr_write(CSR_SCRATCH, context->scratch);
>> +	csr_write(CSR_SCRATCH, 0);
> If the register is always zero, do we need to explicitly write zero to the register during resume?

The register contains zero while executing in the kernel. While executing in
userspace, the value is nonzero. The value is checked at the beginning of
handle_exception(). We must ensure the value is zero before enabling interrupts,
or we might incorrectly think the interrupt was entered from userspace.

We don't know what the value will be when the hart comes out of non-retentive
suspend. Per the SBI HSM specification, Table 6: "All other registers remain in
an undefined state."

Regards,
Samuel
Palmer Dabbelt April 9, 2024, 7:43 p.m. UTC | #3
On Thu, 21 Mar 2024 16:51:31 PDT (-0700), samuel.holland@sifive.com wrote:
> On 2024-03-14 11:55 PM, JeeHeng Sia wrote:
>>
>>
>>> -----Original Message-----
>>> From: Samuel Holland <samuel.holland@sifive.com>
>>> Sent: Wednesday, March 13, 2024 3:57 AM
>>> To: Palmer Dabbelt <palmer@dabbelt.com>; linux-riscv@lists.infradead.org
>>> Cc: Samuel Holland <samuel.holland@sifive.com>; Albert Ou <aou@eecs.berkeley.edu>; Andrew Jones <ajones@ventanamicro.com>;
>>> Conor Dooley <conor.dooley@microchip.com>; Leyfoon Tan <leyfoon.tan@starfivetech.com>; Paul Walmsley
>>> <paul.walmsley@sifive.com>; Pavel Machek <pavel@ucw.cz>; Rafael J. Wysocki <rafael@kernel.org>; JeeHeng Sia
>>> <jeeheng.sia@starfivetech.com>; linux-kernel@vger.kernel.org; linux-pm@vger.kernel.org
>>> Subject: [PATCH] riscv: Do not save the scratch CSR during suspend
>>>
>>> While the processor is executing kernel code, the value of the scratch
>>> CSR is always zero, so there is no need to save the value. Continue to
>>> write the CSR during the resume flow, so we do not rely on firmware to
>>> initialize it.
>>>
>>> Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
>>> ---
>>>
>>>  arch/riscv/include/asm/suspend.h | 1 -
>>>  arch/riscv/kernel/suspend.c      | 3 +--
>>>  2 files changed, 1 insertion(+), 3 deletions(-)
>>>
>>> diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h
>>> index 491296a335d0..6569eefacf38 100644
>>> --- a/arch/riscv/include/asm/suspend.h
>>> +++ b/arch/riscv/include/asm/suspend.h
>>> @@ -13,7 +13,6 @@ struct suspend_context {
>>>  	/* Saved and restored by low-level functions */
>>>  	struct pt_regs regs;
>>>  	/* Saved and restored by high-level functions */
>>> -	unsigned long scratch;
>>>  	unsigned long envcfg;
>>>  	unsigned long tvec;
>>>  	unsigned long ie;
>>> diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c
>>> index 299795341e8a..3d306d8a253d 100644
>>> --- a/arch/riscv/kernel/suspend.c
>>> +++ b/arch/riscv/kernel/suspend.c
>>> @@ -14,7 +14,6 @@
>>>
>>>  void suspend_save_csrs(struct suspend_context *context)
>>>  {
>>> -	context->scratch = csr_read(CSR_SCRATCH);
>>>  	if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
>>>  		context->envcfg = csr_read(CSR_ENVCFG);
>>>  	context->tvec = csr_read(CSR_TVEC);
>>> @@ -37,7 +36,7 @@ void suspend_save_csrs(struct suspend_context *context)
>>>
>>>  void suspend_restore_csrs(struct suspend_context *context)
>>>  {
>>> -	csr_write(CSR_SCRATCH, context->scratch);
>>> +	csr_write(CSR_SCRATCH, 0);
>> If the register is always zero, do we need to explicitly write zero to the register during resume?
>
> The register contains zero while executing in the kernel. While executing in
> userspace, the value is nonzero. The value is checked at the beginning of
> handle_exception(). We must ensure the value is zero before enabling interrupts,
> or we might incorrectly think the interrupt was entered from userspace.
>
> We don't know what the value will be when the hart comes out of non-retentive
> suspend. Per the SBI HSM specification, Table 6: "All other registers remain in
> an undefined state."

We're also not setting it at all in `.macro suspend_restore_csrs`, which 
I think is just a bug?

That said, I'm kind of seeing bugs everywhere I look in this now -- what 
about all the other registers we can poke, like timers/counters or the 
V/F state (or anything from M-mode, though maybe that's just someone 
else's problem)?

I also think we'd break on medlow kernels, as a bunch of this relies on 
medany-as-PIC for the SATP-off transition.

Maybe I'm going crazy here, though...

> Regards,
> Samuel
patchwork-bot+linux-riscv@kernel.org April 10, 2024, 2:20 p.m. UTC | #4
Hello:

This patch was applied to riscv/linux.git (for-next)
by Palmer Dabbelt <palmer@rivosinc.com>:

On Tue, 12 Mar 2024 12:56:38 -0700 you wrote:
> While the processor is executing kernel code, the value of the scratch
> CSR is always zero, so there is no need to save the value. Continue to
> write the CSR during the resume flow, so we do not rely on firmware to
> initialize it.
> 
> Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
> 
> [...]

Here is the summary with links:
  - riscv: Do not save the scratch CSR during suspend
    https://git.kernel.org/riscv/c/ba5ea59f768f

You are awesome, thank you!
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h
index 491296a335d0..6569eefacf38 100644
--- a/arch/riscv/include/asm/suspend.h
+++ b/arch/riscv/include/asm/suspend.h
@@ -13,7 +13,6 @@  struct suspend_context {
 	/* Saved and restored by low-level functions */
 	struct pt_regs regs;
 	/* Saved and restored by high-level functions */
-	unsigned long scratch;
 	unsigned long envcfg;
 	unsigned long tvec;
 	unsigned long ie;
diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c
index 299795341e8a..3d306d8a253d 100644
--- a/arch/riscv/kernel/suspend.c
+++ b/arch/riscv/kernel/suspend.c
@@ -14,7 +14,6 @@ 
 
 void suspend_save_csrs(struct suspend_context *context)
 {
-	context->scratch = csr_read(CSR_SCRATCH);
 	if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
 		context->envcfg = csr_read(CSR_ENVCFG);
 	context->tvec = csr_read(CSR_TVEC);
@@ -37,7 +36,7 @@  void suspend_save_csrs(struct suspend_context *context)
 
 void suspend_restore_csrs(struct suspend_context *context)
 {
-	csr_write(CSR_SCRATCH, context->scratch);
+	csr_write(CSR_SCRATCH, 0);
 	if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
 		csr_write(CSR_ENVCFG, context->envcfg);
 	csr_write(CSR_TVEC, context->tvec);