diff mbox series

[v16,1/5] iommu/arm-smmu: re-enable context caching in smmu reset operation

Message ID 20241008125410.3422512-2-quic_bibekkum@quicinc.com
State New
Headers show
Series iommu/arm-smmu: introduction of ACTLR implementation for Qualcomm SoCs | expand

Commit Message

Bibek Kumar Patro Oct. 8, 2024, 12:54 p.m. UTC
Default MMU-500 reset operation disables context caching in
prefetch buffer. It is however expected for context banks using
the ACTLR register to retain their prefetch value during reset
and runtime suspend.

Replace default MMU-500 reset operation with Qualcomm specific reset
operation which envelope the default reset operation and re-enables
context caching in prefetch buffer for Qualcomm SoCs.

Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 45 ++++++++++++++++++++--
 1 file changed, 42 insertions(+), 3 deletions(-)

--
2.34.1

Comments

Rob Clark Oct. 28, 2024, 9:12 p.m. UTC | #1
On Tue, Oct 8, 2024 at 5:54 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
> Default MMU-500 reset operation disables context caching in
> prefetch buffer. It is however expected for context banks using
> the ACTLR register to retain their prefetch value during reset
> and runtime suspend.
>
> Replace default MMU-500 reset operation with Qualcomm specific reset
> operation which envelope the default reset operation and re-enables
> context caching in prefetch buffer for Qualcomm SoCs.
>
> Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>

Reviewed-by: Rob Clark <robdclark@gmail.com>

> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 45 ++++++++++++++++++++--
>  1 file changed, 42 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> index 087fb4f6f4d3..0cb10b354802 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> @@ -16,6 +16,16 @@
>
>  #define QCOM_DUMMY_VAL -1
>
> +/*
> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
> + * buffer). The remaining bits are implementation defined and vary across
> + * SoCs.
> + */
> +
> +#define CPRE                   (1 << 1)
> +#define CMTLB                  (1 << 0)
> +
>  static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>  {
>         return container_of(smmu, struct qcom_smmu, smmu);
> @@ -396,11 +406,40 @@ static int qcom_smmu_def_domain_type(struct device *dev)
>         return match ? IOMMU_DOMAIN_IDENTITY : 0;
>  }
>
> +static int qcom_smmu500_reset(struct arm_smmu_device *smmu)
> +{
> +       int ret;
> +       u32 val;
> +       int i;
> +
> +       ret = arm_mmu500_reset(smmu);
> +       if (ret)
> +               return ret;
> +
> +       /*
> +        * arm_mmu500_reset() disables CPRE which is re-enabled here.
> +        * The errata for MMU-500 before the r2p2 revision requires CPRE to be
> +        * disabled. The arm_mmu500_reset function disables CPRE to accommodate all
> +        * RTL revisions. Since all Qualcomm SoCs are on the r2p4 revision, where
> +        * the CPRE bit can be enabled, the qcom_smmu500_reset function re-enables
> +        * the CPRE bit for the next-page prefetcher to retain the prefetch value
> +        * during reset and runtime suspend operations.
> +        */
> +
> +       for (i = 0; i < smmu->num_context_banks; ++i) {
> +               val = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
> +               val |= CPRE;
> +               arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, val);
> +       }
> +
> +       return 0;
> +}
> +
>  static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu)
>  {
>         int ret;
>
> -       arm_mmu500_reset(smmu);
> +       qcom_smmu500_reset(smmu);
>
>         /*
>          * To address performance degradation in non-real time clients,
> @@ -427,7 +466,7 @@ static const struct arm_smmu_impl qcom_smmu_500_impl = {
>         .init_context = qcom_smmu_init_context,
>         .cfg_probe = qcom_smmu_cfg_probe,
>         .def_domain_type = qcom_smmu_def_domain_type,
> -       .reset = arm_mmu500_reset,
> +       .reset = qcom_smmu500_reset,
>         .write_s2cr = qcom_smmu_write_s2cr,
>         .tlb_sync = qcom_smmu_tlb_sync,
>  #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG
> @@ -461,7 +500,7 @@ static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = {
>  static const struct arm_smmu_impl qcom_adreno_smmu_500_impl = {
>         .init_context = qcom_adreno_smmu_init_context,
>         .def_domain_type = qcom_smmu_def_domain_type,
> -       .reset = arm_mmu500_reset,
> +       .reset = qcom_smmu500_reset,
>         .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
>         .write_sctlr = qcom_adreno_smmu_write_sctlr,
>         .tlb_sync = qcom_smmu_tlb_sync,
> --
> 2.34.1
>
Robin Murphy Oct. 29, 2024, 1:25 p.m. UTC | #2
On 2024-10-29 12:47 pm, Will Deacon wrote:
> On Fri, Oct 25, 2024 at 07:51:22PM +0530, Bibek Kumar Patro wrote:
>>
>>
>> On 10/24/2024 6:22 PM, Will Deacon wrote:
>>> On Tue, Oct 08, 2024 at 06:24:06PM +0530, Bibek Kumar Patro wrote:
>>>> Default MMU-500 reset operation disables context caching in
>>>> prefetch buffer. It is however expected for context banks using
>>>> the ACTLR register to retain their prefetch value during reset
>>>> and runtime suspend.
>>>>
>>>> Replace default MMU-500 reset operation with Qualcomm specific reset
>>>> operation which envelope the default reset operation and re-enables
>>>> context caching in prefetch buffer for Qualcomm SoCs.
>>>>
>>>> Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>> ---
>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 45 ++++++++++++++++++++--
>>>>    1 file changed, 42 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> index 087fb4f6f4d3..0cb10b354802 100644
>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> @@ -16,6 +16,16 @@
>>>>
>>>>    #define QCOM_DUMMY_VAL	-1
>>>>
>>>> +/*
>>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
>>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
>>>> + * buffer). The remaining bits are implementation defined and vary across
>>>> + * SoCs.
>>>> + */
>>>> +
>>>> +#define CPRE			(1 << 1)
>>>> +#define CMTLB			(1 << 0)
>>>> +
>>>>    static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>>>    {
>>>>    	return container_of(smmu, struct qcom_smmu, smmu);
>>>> @@ -396,11 +406,40 @@ static int qcom_smmu_def_domain_type(struct device *dev)
>>>>    	return match ? IOMMU_DOMAIN_IDENTITY : 0;
>>>>    }
>>>>
>>>> +static int qcom_smmu500_reset(struct arm_smmu_device *smmu)
>>>> +{
>>>> +	int ret;
>>>> +	u32 val;
>>>> +	int i;
>>>> +
>>>> +	ret = arm_mmu500_reset(smmu);
>>>> +	if (ret)
>>>> +		return ret;
>>>> +
>>>> +	/*
>>>> +	 * arm_mmu500_reset() disables CPRE which is re-enabled here.
>>>> +	 * The errata for MMU-500 before the r2p2 revision requires CPRE to be
>>>> +	 * disabled. The arm_mmu500_reset function disables CPRE to accommodate all
>>>> +	 * RTL revisions. Since all Qualcomm SoCs are on the r2p4 revision, where
>>>> +	 * the CPRE bit can be enabled, the qcom_smmu500_reset function re-enables
>>>> +	 * the CPRE bit for the next-page prefetcher to retain the prefetch value
>>>> +	 * during reset and runtime suspend operations.
>>>> +	 */
>>>> +
>>>> +	for (i = 0; i < smmu->num_context_banks; ++i) {
>>>> +		val = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
>>>> +		val |= CPRE;
>>>> +		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, val);
>>>> +	}
>>>
>>> If CPRE only needs to be disabled prior to r2p2, then please teach the
>>> MMU-500 code about that instead of adding qualcomm-specific logic here.
>>>
>>
>> Doing this on MMU-500 code would make it generic and reflect for SoC of all
>> the vendors on this platform.
>> We can make sure that it won't cause any problems in Qualcomm SoCs as we
>> have been enabling this since for some years now and could not
>> observe/reproduce any issues around these errata.
> 
> Unless you can explain definitively hy that's the case, I still don't
> think we should be second-guessing the core SMMU driver code in the
> Qualcomm backend.

Of the still-open errata, #562869 could be safely mitigated by nobbling 
ARM_SMMU_FEAT_FMT_AARCH32_S, but #1047329 is the one which worries me, 
since even if we don't support nesting within Linux, I'm wary of the 
firmware hypervisor sticking its own S2 under any S1 context we set up. 
I guess we could try the alternate SMMU_ACR.IPA2PA_CEN workaround for 
that, however it's not obvious that the performance impact in that case 
wouldn't be worse than whatever benefit may be gained from keeping CPRE.

Thanks,
Robin.

>> But we won't be able to guarantee the same behavior in SoC for other vendors
>> where these errata might still be applicable as per [1] and [2].
>> So as per my understanding it's safe to include in Qualcomm specific
>> implementation and not changing the default behavior in all other vendors'
>> SoC even if they are not prior to r2p2 revision [3].
> 
> If you want to gate the errata workarounds on policy, then please follow
> what we do for the CPU: add a Kconfig option (e.g.
> ARM_SMMU_WORKAROUND_BROKEN_CPRE) which defaults to "on" (assuming that
> the relevant errata aren't all "rare") and update silicon-errata.rst
> accordingly.
> 
> Then you can choose to disable them in your .config if you're happy to
> pick up the pieces.
> 
> As an aside, I'm happy with the rest of the series now.
> 
> Will
Bibek Kumar Patro Oct. 30, 2024, 11:30 a.m. UTC | #3
On 10/29/2024 6:17 PM, Will Deacon wrote:
> On Fri, Oct 25, 2024 at 07:51:22PM +0530, Bibek Kumar Patro wrote:
>>
>>
>> On 10/24/2024 6:22 PM, Will Deacon wrote:
>>> On Tue, Oct 08, 2024 at 06:24:06PM +0530, Bibek Kumar Patro wrote:
>>>> Default MMU-500 reset operation disables context caching in
>>>> prefetch buffer. It is however expected for context banks using
>>>> the ACTLR register to retain their prefetch value during reset
>>>> and runtime suspend.
>>>>
>>>> Replace default MMU-500 reset operation with Qualcomm specific reset
>>>> operation which envelope the default reset operation and re-enables
>>>> context caching in prefetch buffer for Qualcomm SoCs.
>>>>
>>>> Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>> ---
>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 45 ++++++++++++++++++++--
>>>>    1 file changed, 42 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> index 087fb4f6f4d3..0cb10b354802 100644
>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> @@ -16,6 +16,16 @@
>>>>
>>>>    #define QCOM_DUMMY_VAL	-1
>>>>
>>>> +/*
>>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
>>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
>>>> + * buffer). The remaining bits are implementation defined and vary across
>>>> + * SoCs.
>>>> + */
>>>> +
>>>> +#define CPRE			(1 << 1)
>>>> +#define CMTLB			(1 << 0)
>>>> +
>>>>    static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>>>    {
>>>>    	return container_of(smmu, struct qcom_smmu, smmu);
>>>> @@ -396,11 +406,40 @@ static int qcom_smmu_def_domain_type(struct device *dev)
>>>>    	return match ? IOMMU_DOMAIN_IDENTITY : 0;
>>>>    }
>>>>
>>>> +static int qcom_smmu500_reset(struct arm_smmu_device *smmu)
>>>> +{
>>>> +	int ret;
>>>> +	u32 val;
>>>> +	int i;
>>>> +
>>>> +	ret = arm_mmu500_reset(smmu);
>>>> +	if (ret)
>>>> +		return ret;
>>>> +
>>>> +	/*
>>>> +	 * arm_mmu500_reset() disables CPRE which is re-enabled here.
>>>> +	 * The errata for MMU-500 before the r2p2 revision requires CPRE to be
>>>> +	 * disabled. The arm_mmu500_reset function disables CPRE to accommodate all
>>>> +	 * RTL revisions. Since all Qualcomm SoCs are on the r2p4 revision, where
>>>> +	 * the CPRE bit can be enabled, the qcom_smmu500_reset function re-enables
>>>> +	 * the CPRE bit for the next-page prefetcher to retain the prefetch value
>>>> +	 * during reset and runtime suspend operations.
>>>> +	 */
>>>> +
>>>> +	for (i = 0; i < smmu->num_context_banks; ++i) {
>>>> +		val = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
>>>> +		val |= CPRE;
>>>> +		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, val);
>>>> +	}
>>>
>>> If CPRE only needs to be disabled prior to r2p2, then please teach the
>>> MMU-500 code about that instead of adding qualcomm-specific logic here.
>>>
>>
>> Doing this on MMU-500 code would make it generic and reflect for SoC of all
>> the vendors on this platform.
>> We can make sure that it won't cause any problems in Qualcomm SoCs as we
>> have been enabling this since for some years now and could not
>> observe/reproduce any issues around these errata.
> 
> Unless you can explain definitively hy that's the case, I still don't
> think we should be second-guessing the core SMMU driver code in the
> Qualcomm backend.
> 
>> But we won't be able to guarantee the same behavior in SoC for other vendors
>> where these errata might still be applicable as per [1] and [2].
>> So as per my understanding it's safe to include in Qualcomm specific
>> implementation and not changing the default behavior in all other vendors'
>> SoC even if they are not prior to r2p2 revision [3].
> 
> If you want to gate the errata workarounds on policy, then please follow
> what we do for the CPU: add a Kconfig option (e.g.
> ARM_SMMU_WORKAROUND_BROKEN_CPRE) which defaults to "on" (assuming that
> the relevant errata aren't all "rare") and update silicon-errata.rst
> accordingly.
> 
> Then you can choose to disable them in your .config if you're happy to
> pick up the pieces.

This seems to be a good idea to me . I am thinking of this approach 
based on your suggestion,
i.e. we can bind the original workaround in
arm_mmu500_reset implementation within ARM_SMMU_WORKAROUND_BROKEN_CPRE
config (defualts to on, CPRE would be disabled) and in QCOM SoCs default 
it to off
(when ARM_SMMU_QCOM=Y -> switch ARM_SMMU_WORKAROUND_BROKEN_CPRE=N).

In silicon-errata.rst would updating ARM_SMMU_WORKAROUND_BROKEN_CPRE be 
okay , as the config names are based on erratum number.

Thanks & regards,
Bibek



> 
> As an aside, I'm happy with the rest of the series now.
> 
> Will
Will Deacon Nov. 1, 2024, 12:10 p.m. UTC | #4
On Wed, Oct 30, 2024 at 05:00:13PM +0530, Bibek Kumar Patro wrote:
> On 10/29/2024 6:17 PM, Will Deacon wrote:
> > On Fri, Oct 25, 2024 at 07:51:22PM +0530, Bibek Kumar Patro wrote:
> > > On 10/24/2024 6:22 PM, Will Deacon wrote:
> > > > On Tue, Oct 08, 2024 at 06:24:06PM +0530, Bibek Kumar Patro wrote:
> > If you want to gate the errata workarounds on policy, then please follow
> > what we do for the CPU: add a Kconfig option (e.g.
> > ARM_SMMU_WORKAROUND_BROKEN_CPRE) which defaults to "on" (assuming that
> > the relevant errata aren't all "rare") and update silicon-errata.rst
> > accordingly.
> > 
> > Then you can choose to disable them in your .config if you're happy to
> > pick up the pieces.
> 
> This seems to be a good idea to me . I am thinking of this approach based on
> your suggestion,
> i.e. we can bind the original workaround in
> arm_mmu500_reset implementation within ARM_SMMU_WORKAROUND_BROKEN_CPRE
> config (defualts to on, CPRE would be disabled) and in QCOM SoCs default it
> to off
> (when ARM_SMMU_QCOM=Y -> switch ARM_SMMU_WORKAROUND_BROKEN_CPRE=N).

ARM_SMMU_QCOM is enabled by default, so please don't do that. People who
want to disable errata workarounds based on a hunch can do that themselves.
There's no need to try to do that automatically in Kconfig.

> In silicon-errata.rst would updating ARM_SMMU_WORKAROUND_BROKEN_CPRE be okay
> , as the config names are based on erratum number.

In this case, the Kconfig option covers a variety of errata so how about
we go with:

	ARM_SMMU_MMU_500_CPRE_ERRATA

and then you can list all of the numbers in the "Erratum ID" column?

Will
Bibek Kumar Patro Nov. 4, 2024, 6:31 a.m. UTC | #5
On 11/1/2024 5:40 PM, Will Deacon wrote:
> On Wed, Oct 30, 2024 at 05:00:13PM +0530, Bibek Kumar Patro wrote:
>> On 10/29/2024 6:17 PM, Will Deacon wrote:
>>> On Fri, Oct 25, 2024 at 07:51:22PM +0530, Bibek Kumar Patro wrote:
>>>> On 10/24/2024 6:22 PM, Will Deacon wrote:
>>>>> On Tue, Oct 08, 2024 at 06:24:06PM +0530, Bibek Kumar Patro wrote:
>>> If you want to gate the errata workarounds on policy, then please follow
>>> what we do for the CPU: add a Kconfig option (e.g.
>>> ARM_SMMU_WORKAROUND_BROKEN_CPRE) which defaults to "on" (assuming that
>>> the relevant errata aren't all "rare") and update silicon-errata.rst
>>> accordingly.
>>>
>>> Then you can choose to disable them in your .config if you're happy to
>>> pick up the pieces.
>>
>> This seems to be a good idea to me . I am thinking of this approach based on
>> your suggestion,
>> i.e. we can bind the original workaround in
>> arm_mmu500_reset implementation within ARM_SMMU_WORKAROUND_BROKEN_CPRE
>> config (defualts to on, CPRE would be disabled) and in QCOM SoCs default it
>> to off
>> (when ARM_SMMU_QCOM=Y -> switch ARM_SMMU_WORKAROUND_BROKEN_CPRE=N).
> 
> ARM_SMMU_QCOM is enabled by default, so please don't do that. People who
> want to disable errata workarounds based on a hunch can do that themselves.
> There's no need to try to do that automatically in Kconfig.
> 

Okay I see, that seems better. To allow users to manually toggle the 
config switch for
disabling errata workarounds based on their specific needs,
rather than having it enabled by default.

>> In silicon-errata.rst would updating ARM_SMMU_WORKAROUND_BROKEN_CPRE be okay
>> , as the config names are based on erratum number.
> 
> In this case, the Kconfig option covers a variety of errata so how about
> we go with:
> 
> 	ARM_SMMU_MMU_500_CPRE_ERRATA
> 
> and then you can list all of the numbers in the "Erratum ID" column?
> 

Ack, this name sounds self-explanatory to me. Thanks for the suggestion,
I'll proceed with this name and ensure documenting the known numbers in 
Erratum ID column
<#826419, #841119, #562869, #1047329>

Thanks & regards,
Bibek


> Will
>
Dmitry Baryshkov Nov. 4, 2024, 11:10 a.m. UTC | #6
On Tue, Oct 29, 2024 at 12:47:09PM +0000, Will Deacon wrote:
> On Fri, Oct 25, 2024 at 07:51:22PM +0530, Bibek Kumar Patro wrote:
> > 
> > 
> > On 10/24/2024 6:22 PM, Will Deacon wrote:
> > > On Tue, Oct 08, 2024 at 06:24:06PM +0530, Bibek Kumar Patro wrote:
> > > > Default MMU-500 reset operation disables context caching in
> > > > prefetch buffer. It is however expected for context banks using
> > > > the ACTLR register to retain their prefetch value during reset
> > > > and runtime suspend.
> > > > 
> > > > Replace default MMU-500 reset operation with Qualcomm specific reset
> > > > operation which envelope the default reset operation and re-enables
> > > > context caching in prefetch buffer for Qualcomm SoCs.
> > > > 
> > > > Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
> > > > Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> > > > ---
> > > >   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 45 ++++++++++++++++++++--
> > > >   1 file changed, 42 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > > > index 087fb4f6f4d3..0cb10b354802 100644
> > > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > > > @@ -16,6 +16,16 @@
> > > > 
> > > >   #define QCOM_DUMMY_VAL	-1
> > > > 
> > > > +/*
> > > > + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
> > > > + * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
> > > > + * buffer). The remaining bits are implementation defined and vary across
> > > > + * SoCs.
> > > > + */
> > > > +
> > > > +#define CPRE			(1 << 1)
> > > > +#define CMTLB			(1 << 0)
> > > > +
> > > >   static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
> > > >   {
> > > >   	return container_of(smmu, struct qcom_smmu, smmu);
> > > > @@ -396,11 +406,40 @@ static int qcom_smmu_def_domain_type(struct device *dev)
> > > >   	return match ? IOMMU_DOMAIN_IDENTITY : 0;
> > > >   }
> > > > 
> > > > +static int qcom_smmu500_reset(struct arm_smmu_device *smmu)
> > > > +{
> > > > +	int ret;
> > > > +	u32 val;
> > > > +	int i;
> > > > +
> > > > +	ret = arm_mmu500_reset(smmu);
> > > > +	if (ret)
> > > > +		return ret;
> > > > +
> > > > +	/*
> > > > +	 * arm_mmu500_reset() disables CPRE which is re-enabled here.
> > > > +	 * The errata for MMU-500 before the r2p2 revision requires CPRE to be
> > > > +	 * disabled. The arm_mmu500_reset function disables CPRE to accommodate all
> > > > +	 * RTL revisions. Since all Qualcomm SoCs are on the r2p4 revision, where
> > > > +	 * the CPRE bit can be enabled, the qcom_smmu500_reset function re-enables
> > > > +	 * the CPRE bit for the next-page prefetcher to retain the prefetch value
> > > > +	 * during reset and runtime suspend operations.
> > > > +	 */
> > > > +
> > > > +	for (i = 0; i < smmu->num_context_banks; ++i) {
> > > > +		val = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
> > > > +		val |= CPRE;
> > > > +		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, val);
> > > > +	}
> > > 
> > > If CPRE only needs to be disabled prior to r2p2, then please teach the
> > > MMU-500 code about that instead of adding qualcomm-specific logic here.
> > > 
> > 
> > Doing this on MMU-500 code would make it generic and reflect for SoC of all
> > the vendors on this platform.
> > We can make sure that it won't cause any problems in Qualcomm SoCs as we
> > have been enabling this since for some years now and could not
> > observe/reproduce any issues around these errata.
> 
> Unless you can explain definitively hy that's the case, I still don't
> think we should be second-guessing the core SMMU driver code in the
> Qualcomm backend.
> 
> > But we won't be able to guarantee the same behavior in SoC for other vendors
> > where these errata might still be applicable as per [1] and [2].
> > So as per my understanding it's safe to include in Qualcomm specific
> > implementation and not changing the default behavior in all other vendors'
> > SoC even if they are not prior to r2p2 revision [3].
> 
> If you want to gate the errata workarounds on policy, then please follow
> what we do for the CPU: add a Kconfig option (e.g.
> ARM_SMMU_WORKAROUND_BROKEN_CPRE) which defaults to "on" (assuming that
> the relevant errata aren't all "rare") and update silicon-errata.rst
> accordingly.
> 
> Then you can choose to disable them in your .config if you're happy to
> pick up the pieces.

Is it actually going to work? For most of the CPU errata we can detect
and limit the workarounds to some class of CPUs. For SMMU, if I'm not
misunderstanding something, the errata will be enabled by default for
all SMMU-500 implementation, so only very few kernels, targeting only
the Qualcomm hardware, can get that disabled.

> As an aside, I'm happy with the rest of the series now.
> 
> Will
Will Deacon Nov. 5, 2024, 11:37 a.m. UTC | #7
On Mon, Nov 04, 2024 at 01:10:12PM +0200, Dmitry Baryshkov wrote:
> On Tue, Oct 29, 2024 at 12:47:09PM +0000, Will Deacon wrote:
> > On Fri, Oct 25, 2024 at 07:51:22PM +0530, Bibek Kumar Patro wrote:
> > > 
> > > 
> > > On 10/24/2024 6:22 PM, Will Deacon wrote:
> > > > On Tue, Oct 08, 2024 at 06:24:06PM +0530, Bibek Kumar Patro wrote:
> > > > > Default MMU-500 reset operation disables context caching in
> > > > > prefetch buffer. It is however expected for context banks using
> > > > > the ACTLR register to retain their prefetch value during reset
> > > > > and runtime suspend.
> > > > > 
> > > > > Replace default MMU-500 reset operation with Qualcomm specific reset
> > > > > operation which envelope the default reset operation and re-enables
> > > > > context caching in prefetch buffer for Qualcomm SoCs.
> > > > > 
> > > > > Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
> > > > > Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> > > > > ---
> > > > >   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 45 ++++++++++++++++++++--
> > > > >   1 file changed, 42 insertions(+), 3 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > > > > index 087fb4f6f4d3..0cb10b354802 100644
> > > > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > > > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > > > > @@ -16,6 +16,16 @@
> > > > > 
> > > > >   #define QCOM_DUMMY_VAL	-1
> > > > > 
> > > > > +/*
> > > > > + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
> > > > > + * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
> > > > > + * buffer). The remaining bits are implementation defined and vary across
> > > > > + * SoCs.
> > > > > + */
> > > > > +
> > > > > +#define CPRE			(1 << 1)
> > > > > +#define CMTLB			(1 << 0)
> > > > > +
> > > > >   static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
> > > > >   {
> > > > >   	return container_of(smmu, struct qcom_smmu, smmu);
> > > > > @@ -396,11 +406,40 @@ static int qcom_smmu_def_domain_type(struct device *dev)
> > > > >   	return match ? IOMMU_DOMAIN_IDENTITY : 0;
> > > > >   }
> > > > > 
> > > > > +static int qcom_smmu500_reset(struct arm_smmu_device *smmu)
> > > > > +{
> > > > > +	int ret;
> > > > > +	u32 val;
> > > > > +	int i;
> > > > > +
> > > > > +	ret = arm_mmu500_reset(smmu);
> > > > > +	if (ret)
> > > > > +		return ret;
> > > > > +
> > > > > +	/*
> > > > > +	 * arm_mmu500_reset() disables CPRE which is re-enabled here.
> > > > > +	 * The errata for MMU-500 before the r2p2 revision requires CPRE to be
> > > > > +	 * disabled. The arm_mmu500_reset function disables CPRE to accommodate all
> > > > > +	 * RTL revisions. Since all Qualcomm SoCs are on the r2p4 revision, where
> > > > > +	 * the CPRE bit can be enabled, the qcom_smmu500_reset function re-enables
> > > > > +	 * the CPRE bit for the next-page prefetcher to retain the prefetch value
> > > > > +	 * during reset and runtime suspend operations.
> > > > > +	 */
> > > > > +
> > > > > +	for (i = 0; i < smmu->num_context_banks; ++i) {
> > > > > +		val = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
> > > > > +		val |= CPRE;
> > > > > +		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, val);
> > > > > +	}
> > > > 
> > > > If CPRE only needs to be disabled prior to r2p2, then please teach the
> > > > MMU-500 code about that instead of adding qualcomm-specific logic here.
> > > > 
> > > 
> > > Doing this on MMU-500 code would make it generic and reflect for SoC of all
> > > the vendors on this platform.
> > > We can make sure that it won't cause any problems in Qualcomm SoCs as we
> > > have been enabling this since for some years now and could not
> > > observe/reproduce any issues around these errata.
> > 
> > Unless you can explain definitively hy that's the case, I still don't
> > think we should be second-guessing the core SMMU driver code in the
> > Qualcomm backend.
> > 
> > > But we won't be able to guarantee the same behavior in SoC for other vendors
> > > where these errata might still be applicable as per [1] and [2].
> > > So as per my understanding it's safe to include in Qualcomm specific
> > > implementation and not changing the default behavior in all other vendors'
> > > SoC even if they are not prior to r2p2 revision [3].
> > 
> > If you want to gate the errata workarounds on policy, then please follow
> > what we do for the CPU: add a Kconfig option (e.g.
> > ARM_SMMU_WORKAROUND_BROKEN_CPRE) which defaults to "on" (assuming that
> > the relevant errata aren't all "rare") and update silicon-errata.rst
> > accordingly.
> > 
> > Then you can choose to disable them in your .config if you're happy to
> > pick up the pieces.
> 
> Is it actually going to work? For most of the CPU errata we can detect
> and limit the workarounds to some class of CPUs. For SMMU, if I'm not
> misunderstanding something, the errata will be enabled by default for
> all SMMU-500 implementation, so only very few kernels, targeting only
> the Qualcomm hardware, can get that disabled.

We can add checks based on rXpY per the erratum documentation, but Robin
was saying elsewhere in the thread that some of them are still open (i.e.
unfixed).

So ultimately, the decision to disable workarounds for known errata on
broken hardware is going to be a niche sport, yes.

Will
Dmitry Baryshkov Nov. 6, 2024, 7:40 a.m. UTC | #8
On Tue, Nov 05, 2024 at 11:37:24AM +0000, Will Deacon wrote:
> On Mon, Nov 04, 2024 at 01:10:12PM +0200, Dmitry Baryshkov wrote:
> > On Tue, Oct 29, 2024 at 12:47:09PM +0000, Will Deacon wrote:
> > > On Fri, Oct 25, 2024 at 07:51:22PM +0530, Bibek Kumar Patro wrote:
> > > > 
> > > > 
> > > > On 10/24/2024 6:22 PM, Will Deacon wrote:
> > > > > On Tue, Oct 08, 2024 at 06:24:06PM +0530, Bibek Kumar Patro wrote:
> > > > > > Default MMU-500 reset operation disables context caching in
> > > > > > prefetch buffer. It is however expected for context banks using
> > > > > > the ACTLR register to retain their prefetch value during reset
> > > > > > and runtime suspend.
> > > > > > 
> > > > > > Replace default MMU-500 reset operation with Qualcomm specific reset
> > > > > > operation which envelope the default reset operation and re-enables
> > > > > > context caching in prefetch buffer for Qualcomm SoCs.
> > > > > > 
> > > > > > Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
> > > > > > Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> > > > > > ---
> > > > > >   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 45 ++++++++++++++++++++--
> > > > > >   1 file changed, 42 insertions(+), 3 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > > > > > index 087fb4f6f4d3..0cb10b354802 100644
> > > > > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > > > > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > > > > > @@ -16,6 +16,16 @@
> > > > > > 
> > > > > >   #define QCOM_DUMMY_VAL	-1
> > > > > > 
> > > > > > +/*
> > > > > > + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
> > > > > > + * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
> > > > > > + * buffer). The remaining bits are implementation defined and vary across
> > > > > > + * SoCs.
> > > > > > + */
> > > > > > +
> > > > > > +#define CPRE			(1 << 1)
> > > > > > +#define CMTLB			(1 << 0)
> > > > > > +
> > > > > >   static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
> > > > > >   {
> > > > > >   	return container_of(smmu, struct qcom_smmu, smmu);
> > > > > > @@ -396,11 +406,40 @@ static int qcom_smmu_def_domain_type(struct device *dev)
> > > > > >   	return match ? IOMMU_DOMAIN_IDENTITY : 0;
> > > > > >   }
> > > > > > 
> > > > > > +static int qcom_smmu500_reset(struct arm_smmu_device *smmu)
> > > > > > +{
> > > > > > +	int ret;
> > > > > > +	u32 val;
> > > > > > +	int i;
> > > > > > +
> > > > > > +	ret = arm_mmu500_reset(smmu);
> > > > > > +	if (ret)
> > > > > > +		return ret;
> > > > > > +
> > > > > > +	/*
> > > > > > +	 * arm_mmu500_reset() disables CPRE which is re-enabled here.
> > > > > > +	 * The errata for MMU-500 before the r2p2 revision requires CPRE to be
> > > > > > +	 * disabled. The arm_mmu500_reset function disables CPRE to accommodate all
> > > > > > +	 * RTL revisions. Since all Qualcomm SoCs are on the r2p4 revision, where
> > > > > > +	 * the CPRE bit can be enabled, the qcom_smmu500_reset function re-enables
> > > > > > +	 * the CPRE bit for the next-page prefetcher to retain the prefetch value
> > > > > > +	 * during reset and runtime suspend operations.
> > > > > > +	 */
> > > > > > +
> > > > > > +	for (i = 0; i < smmu->num_context_banks; ++i) {
> > > > > > +		val = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
> > > > > > +		val |= CPRE;
> > > > > > +		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, val);
> > > > > > +	}
> > > > > 
> > > > > If CPRE only needs to be disabled prior to r2p2, then please teach the
> > > > > MMU-500 code about that instead of adding qualcomm-specific logic here.
> > > > > 
> > > > 
> > > > Doing this on MMU-500 code would make it generic and reflect for SoC of all
> > > > the vendors on this platform.
> > > > We can make sure that it won't cause any problems in Qualcomm SoCs as we
> > > > have been enabling this since for some years now and could not
> > > > observe/reproduce any issues around these errata.
> > > 
> > > Unless you can explain definitively hy that's the case, I still don't
> > > think we should be second-guessing the core SMMU driver code in the
> > > Qualcomm backend.
> > > 
> > > > But we won't be able to guarantee the same behavior in SoC for other vendors
> > > > where these errata might still be applicable as per [1] and [2].
> > > > So as per my understanding it's safe to include in Qualcomm specific
> > > > implementation and not changing the default behavior in all other vendors'
> > > > SoC even if they are not prior to r2p2 revision [3].
> > > 
> > > If you want to gate the errata workarounds on policy, then please follow
> > > what we do for the CPU: add a Kconfig option (e.g.
> > > ARM_SMMU_WORKAROUND_BROKEN_CPRE) which defaults to "on" (assuming that
> > > the relevant errata aren't all "rare") and update silicon-errata.rst
> > > accordingly.
> > > 
> > > Then you can choose to disable them in your .config if you're happy to
> > > pick up the pieces.
> > 
> > Is it actually going to work? For most of the CPU errata we can detect
> > and limit the workarounds to some class of CPUs. For SMMU, if I'm not
> > misunderstanding something, the errata will be enabled by default for
> > all SMMU-500 implementation, so only very few kernels, targeting only
> > the Qualcomm hardware, can get that disabled.
> 
> We can add checks based on rXpY per the erratum documentation, but Robin
> was saying elsewhere in the thread that some of them are still open (i.e.
> unfixed).
> 
> So ultimately, the decision to disable workarounds for known errata on
> broken hardware is going to be a niche sport, yes.

Yes, I understand that. I'm just trying to understand if we can have a
better solution than having errata workarounds enabled on a majority of
the kernels (which means less testing for the non-workaround-enabled
setup on Qualcomm devices).
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 087fb4f6f4d3..0cb10b354802 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -16,6 +16,16 @@ 

 #define QCOM_DUMMY_VAL	-1

+/*
+ * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
+ * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
+ * buffer). The remaining bits are implementation defined and vary across
+ * SoCs.
+ */
+
+#define CPRE			(1 << 1)
+#define CMTLB			(1 << 0)
+
 static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
 {
 	return container_of(smmu, struct qcom_smmu, smmu);
@@ -396,11 +406,40 @@  static int qcom_smmu_def_domain_type(struct device *dev)
 	return match ? IOMMU_DOMAIN_IDENTITY : 0;
 }

+static int qcom_smmu500_reset(struct arm_smmu_device *smmu)
+{
+	int ret;
+	u32 val;
+	int i;
+
+	ret = arm_mmu500_reset(smmu);
+	if (ret)
+		return ret;
+
+	/*
+	 * arm_mmu500_reset() disables CPRE which is re-enabled here.
+	 * The errata for MMU-500 before the r2p2 revision requires CPRE to be
+	 * disabled. The arm_mmu500_reset function disables CPRE to accommodate all
+	 * RTL revisions. Since all Qualcomm SoCs are on the r2p4 revision, where
+	 * the CPRE bit can be enabled, the qcom_smmu500_reset function re-enables
+	 * the CPRE bit for the next-page prefetcher to retain the prefetch value
+	 * during reset and runtime suspend operations.
+	 */
+
+	for (i = 0; i < smmu->num_context_banks; ++i) {
+		val = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
+		val |= CPRE;
+		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, val);
+	}
+
+	return 0;
+}
+
 static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu)
 {
 	int ret;

-	arm_mmu500_reset(smmu);
+	qcom_smmu500_reset(smmu);

 	/*
 	 * To address performance degradation in non-real time clients,
@@ -427,7 +466,7 @@  static const struct arm_smmu_impl qcom_smmu_500_impl = {
 	.init_context = qcom_smmu_init_context,
 	.cfg_probe = qcom_smmu_cfg_probe,
 	.def_domain_type = qcom_smmu_def_domain_type,
-	.reset = arm_mmu500_reset,
+	.reset = qcom_smmu500_reset,
 	.write_s2cr = qcom_smmu_write_s2cr,
 	.tlb_sync = qcom_smmu_tlb_sync,
 #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG
@@ -461,7 +500,7 @@  static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = {
 static const struct arm_smmu_impl qcom_adreno_smmu_500_impl = {
 	.init_context = qcom_adreno_smmu_init_context,
 	.def_domain_type = qcom_smmu_def_domain_type,
-	.reset = arm_mmu500_reset,
+	.reset = qcom_smmu500_reset,
 	.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
 	.write_sctlr = qcom_adreno_smmu_write_sctlr,
 	.tlb_sync = qcom_smmu_tlb_sync,