diff mbox series

[v13,6/6] iommu/arm-smmu: add support for PRR bit setup

Message ID 20240628140435.1652374-7-quic_bibekkum@quicinc.com
State New
Headers show
Series [v13,1/6] iommu/arm-smmu: re-enable context caching in smmu reset operation | expand

Commit Message

Bibek Kumar Patro June 28, 2024, 2:04 p.m. UTC
Add an adreno-smmu-priv interface for drm/msm to call
into arm-smmu-qcom and initiate the PRR bit setup or reset
sequence as per request.

This will be used by GPU to setup the PRR bit and related
configuration registers through adreno-smmu private
interface instead of directly poking the smmu hardware.

Suggested-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 23 ++++++++++++++++++++++
 drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
 include/linux/adreno-smmu-priv.h           |  6 +++++-
 3 files changed, 30 insertions(+), 1 deletion(-)

--
2.34.1

Comments

Bibek Kumar Patro July 1, 2024, 11 a.m. UTC | #1
On 6/28/2024 9:14 PM, Rob Clark wrote:
> On Fri, Jun 28, 2024 at 8:10 AM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>>
>>
>> On 6/28/2024 7:47 PM, Rob Clark wrote:
>>> On Fri, Jun 28, 2024 at 7:05 AM Bibek Kumar Patro
>>> <quic_bibekkum@quicinc.com> wrote:
>>>>
>>>> Add an adreno-smmu-priv interface for drm/msm to call
>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>>> sequence as per request.
>>>>
>>>> This will be used by GPU to setup the PRR bit and related
>>>> configuration registers through adreno-smmu private
>>>> interface instead of directly poking the smmu hardware.
>>>>
>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>> ---
>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 23 ++++++++++++++++++++++
>>>>    drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>>    include/linux/adreno-smmu-priv.h           |  6 +++++-
>>>>    3 files changed, 30 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> index bd101a161d04..64571a1c47b8 100644
>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> @@ -28,6 +28,7 @@
>>>>    #define PREFETCH_SHALLOW       (1 << PREFETCH_SHIFT)
>>>>    #define PREFETCH_MODERATE      (2 << PREFETCH_SHIFT)
>>>>    #define PREFETCH_DEEP          (3 << PREFETCH_SHIFT)
>>>> +#define GFX_ACTLR_PRR          (1 << 5)
>>>>
>>>>    static const struct actlr_config sc7280_apps_actlr_cfg[] = {
>>>>           { 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
>>>> @@ -235,6 +236,27 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>>           arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>>    }
>>>>
>>>> +static void qcom_adreno_smmu_set_prr(const void *cookie, phys_addr_t page_addr, bool set)
>>>> +{
>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>> +       u32 reg = 0;
>>>> +
>>>> +       writel_relaxed(lower_32_bits(page_addr),
>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>> +
>>>> +       writel_relaxed(upper_32_bits(page_addr),
>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>> +
>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>> +       reg &= ~GFX_ACTLR_PRR;
>>>> +       if (set)
>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>> +
>>>
>>> nit, extra line
>>>
>>
>> Ack, will remove this. Thanks for pointing out.
>>
>>> Also, if you passed a `struct page *` instead, then you could drop the
>>> bool param, ie. passing NULL for the page would disable PRR.  But I
>>> can go either way if others have a strong preference for phys_addr_t.
>>>
>>
>> Oh okay, this looks simple to reset the prr bit.
>> But since this page is allocated and is used inside gfx driver
>> before being utilized for prr bit operation, would it be safe for
>> drm/gfx driver to keep a reference to this page in smmu driver?
>>
>> Since we only need the page address for configuring the
>> CFG_UADDR/CFG_LADDR registers so passed the phys_addr_t.
> 
> I don't think the smmu driver needs to keep a reference to the page..
> we can just say it is the responsibility of the drm driver to call
> set_prr(NULL) before freeing the page
> 

That makes sense. If we go by this NULL page method to disable the PRR, 
we would have to set the address registers to reset value as well.

The sequence would be like the following as per my understaning:
- Check if it's NULL page
- Set the PRR_CFG_UADDR/PRR_CFG_LADDR to reset values i.e - 0x0 for
   these registers
- Reset the PRR bit in actlr register

Similar to this snippet:

#PRR_RESET_ADDR 0x0

--------------
reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
reg &= ~GFX_ACTLR_PRR;
arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);

if (!prr_page) {
	writel_relaxed(PRR_RESET_ADDR,
			smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
	writel_relaxed(PRR_RESET_ADDR),
                         smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
	return;
}


writel_relaxed(lower_32_bits(page_to_phys(prr_page)),
  		smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);

writel_relaxed(upper_32_bits(page_to_phys(prr_page)),
  		smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);

reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
-----------------

If looks good, will implement the same in next version.

Thanks & regards,
Bibek

> BR,
> -R
> 
>>> Otherwise, lgtm
>>>
>>> BR,
>>> -R
>>>
>>
>> Thanks & regards,
>> Bibek
>>
>>>> +}
>>>> +
>>>>    #define QCOM_ADRENO_SMMU_GPU_SID 0
>>>>
>>>>    static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>>>> @@ -407,6 +429,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>           priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>>>           priv->set_stall = qcom_adreno_smmu_set_stall;
>>>>           priv->resume_translation = qcom_adreno_smmu_resume_translation;
>>>> +       priv->set_prr = qcom_adreno_smmu_set_prr;
>>>>
>>>>           actlrvar = qsmmu->data->actlrvar;
>>>>           if (!actlrvar)
>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>> index d9c2ef8c1653..3076bef49e20 100644
>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>>>    #define ARM_SMMU_SCTLR_M               BIT(0)
>>>>
>>>>    #define ARM_SMMU_CB_ACTLR              0x4
>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>>>>
>>>>    #define ARM_SMMU_CB_RESUME             0x8
>>>>    #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
>>>> index c637e0997f6d..d6e2ca9f8d8c 100644
>>>> --- a/include/linux/adreno-smmu-priv.h
>>>> +++ b/include/linux/adreno-smmu-priv.h
>>>> @@ -49,7 +49,10 @@ struct adreno_smmu_fault_info {
>>>>     *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
>>>>     *                 the GPU driver must call resume_translation()
>>>>     * @resume_translation: Resume translation after a fault
>>>> - *
>>>> + * @set_prr:      Extendible interface to be used by GPU to modify the
>>>> + *                 ACTLR register bits, currently used to configure
>>>> + *                 Partially-Resident-Region (PRR) feature's
>>>> + *                 setup and reset sequence as requested.
>>>>     *
>>>>     * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>>>>     * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
>>>> @@ -67,6 +70,7 @@ struct adreno_smmu_priv {
>>>>        void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>>>>        void (*set_stall)(const void *cookie, bool enabled);
>>>>        void (*resume_translation)(const void *cookie, bool terminate);
>>>> +    void (*set_prr)(const void *cookie, phys_addr_t page_addr, bool set);
>>>>    };
>>>>
>>>>    #endif /* __ADRENO_SMMU_PRIV_H */
>>>> --
>>>> 2.34.1
>>>>
Rob Clark July 1, 2024, 8:31 p.m. UTC | #2
On Mon, Jul 1, 2024 at 4:01 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
>
>
> On 6/28/2024 9:14 PM, Rob Clark wrote:
> > On Fri, Jun 28, 2024 at 8:10 AM Bibek Kumar Patro
> > <quic_bibekkum@quicinc.com> wrote:
> >>
> >>
> >>
> >> On 6/28/2024 7:47 PM, Rob Clark wrote:
> >>> On Fri, Jun 28, 2024 at 7:05 AM Bibek Kumar Patro
> >>> <quic_bibekkum@quicinc.com> wrote:
> >>>>
> >>>> Add an adreno-smmu-priv interface for drm/msm to call
> >>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
> >>>> sequence as per request.
> >>>>
> >>>> This will be used by GPU to setup the PRR bit and related
> >>>> configuration registers through adreno-smmu private
> >>>> interface instead of directly poking the smmu hardware.
> >>>>
> >>>> Suggested-by: Rob Clark <robdclark@gmail.com>
> >>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> >>>> ---
> >>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 23 ++++++++++++++++++++++
> >>>>    drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
> >>>>    include/linux/adreno-smmu-priv.h           |  6 +++++-
> >>>>    3 files changed, 30 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>> index bd101a161d04..64571a1c47b8 100644
> >>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>> @@ -28,6 +28,7 @@
> >>>>    #define PREFETCH_SHALLOW       (1 << PREFETCH_SHIFT)
> >>>>    #define PREFETCH_MODERATE      (2 << PREFETCH_SHIFT)
> >>>>    #define PREFETCH_DEEP          (3 << PREFETCH_SHIFT)
> >>>> +#define GFX_ACTLR_PRR          (1 << 5)
> >>>>
> >>>>    static const struct actlr_config sc7280_apps_actlr_cfg[] = {
> >>>>           { 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
> >>>> @@ -235,6 +236,27 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
> >>>>           arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
> >>>>    }
> >>>>
> >>>> +static void qcom_adreno_smmu_set_prr(const void *cookie, phys_addr_t page_addr, bool set)
> >>>> +{
> >>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> >>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>> +       u32 reg = 0;
> >>>> +
> >>>> +       writel_relaxed(lower_32_bits(page_addr),
> >>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> >>>> +
> >>>> +       writel_relaxed(upper_32_bits(page_addr),
> >>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> >>>> +
> >>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> >>>> +       reg &= ~GFX_ACTLR_PRR;
> >>>> +       if (set)
> >>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> >>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> >>>> +
> >>>
> >>> nit, extra line
> >>>
> >>
> >> Ack, will remove this. Thanks for pointing out.
> >>
> >>> Also, if you passed a `struct page *` instead, then you could drop the
> >>> bool param, ie. passing NULL for the page would disable PRR.  But I
> >>> can go either way if others have a strong preference for phys_addr_t.
> >>>
> >>
> >> Oh okay, this looks simple to reset the prr bit.
> >> But since this page is allocated and is used inside gfx driver
> >> before being utilized for prr bit operation, would it be safe for
> >> drm/gfx driver to keep a reference to this page in smmu driver?
> >>
> >> Since we only need the page address for configuring the
> >> CFG_UADDR/CFG_LADDR registers so passed the phys_addr_t.
> >
> > I don't think the smmu driver needs to keep a reference to the page..
> > we can just say it is the responsibility of the drm driver to call
> > set_prr(NULL) before freeing the page
> >
>
> That makes sense. If we go by this NULL page method to disable the PRR,
> we would have to set the address registers to reset value as well.
>
> The sequence would be like the following as per my understaning:
> - Check if it's NULL page
> - Set the PRR_CFG_UADDR/PRR_CFG_LADDR to reset values i.e - 0x0 for
>    these registers
> - Reset the PRR bit in actlr register
>
> Similar to this snippet:
>
> #PRR_RESET_ADDR 0x0
>
> --------------
> reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> reg &= ~GFX_ACTLR_PRR;
> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>
> if (!prr_page) {
>         writel_relaxed(PRR_RESET_ADDR,
>                         smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>         writel_relaxed(PRR_RESET_ADDR),
>                          smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>         return;
> }
>
>
> writel_relaxed(lower_32_bits(page_to_phys(prr_page)),
>                 smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>
> writel_relaxed(upper_32_bits(page_to_phys(prr_page)),
>                 smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>
> reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> -----------------
>
> If looks good, will implement the same in next version.

yeah, that looks like it could work..

you probably don't need to zero out the PRR_CFG_*ADDR when disabling,
and probably could avoid double writing ACTLR, but that is getting
into bikeshedding

BR,
-R

>
> Thanks & regards,
> Bibek
>
> > BR,
> > -R
> >
> >>> Otherwise, lgtm
> >>>
> >>> BR,
> >>> -R
> >>>
> >>
> >> Thanks & regards,
> >> Bibek
> >>
> >>>> +}
> >>>> +
> >>>>    #define QCOM_ADRENO_SMMU_GPU_SID 0
> >>>>
> >>>>    static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> >>>> @@ -407,6 +429,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>>>           priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
> >>>>           priv->set_stall = qcom_adreno_smmu_set_stall;
> >>>>           priv->resume_translation = qcom_adreno_smmu_resume_translation;
> >>>> +       priv->set_prr = qcom_adreno_smmu_set_prr;
> >>>>
> >>>>           actlrvar = qsmmu->data->actlrvar;
> >>>>           if (!actlrvar)
> >>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>> index d9c2ef8c1653..3076bef49e20 100644
> >>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
> >>>>    #define ARM_SMMU_SCTLR_M               BIT(0)
> >>>>
> >>>>    #define ARM_SMMU_CB_ACTLR              0x4
> >>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> >>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
> >>>>
> >>>>    #define ARM_SMMU_CB_RESUME             0x8
> >>>>    #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> >>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> >>>> index c637e0997f6d..d6e2ca9f8d8c 100644
> >>>> --- a/include/linux/adreno-smmu-priv.h
> >>>> +++ b/include/linux/adreno-smmu-priv.h
> >>>> @@ -49,7 +49,10 @@ struct adreno_smmu_fault_info {
> >>>>     *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
> >>>>     *                 the GPU driver must call resume_translation()
> >>>>     * @resume_translation: Resume translation after a fault
> >>>> - *
> >>>> + * @set_prr:      Extendible interface to be used by GPU to modify the
> >>>> + *                 ACTLR register bits, currently used to configure
> >>>> + *                 Partially-Resident-Region (PRR) feature's
> >>>> + *                 setup and reset sequence as requested.
> >>>>     *
> >>>>     * The GPU driver (drm/msm) and adreno-smmu work together for controlling
> >>>>     * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> >>>> @@ -67,6 +70,7 @@ struct adreno_smmu_priv {
> >>>>        void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
> >>>>        void (*set_stall)(const void *cookie, bool enabled);
> >>>>        void (*resume_translation)(const void *cookie, bool terminate);
> >>>> +    void (*set_prr)(const void *cookie, phys_addr_t page_addr, bool set);
> >>>>    };
> >>>>
> >>>>    #endif /* __ADRENO_SMMU_PRIV_H */
> >>>> --
> >>>> 2.34.1
> >>>>
Bibek Kumar Patro July 3, 2024, 11:38 a.m. UTC | #3
On 7/2/2024 2:01 AM, Rob Clark wrote:
> On Mon, Jul 1, 2024 at 4:01 AM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>>
>>
>> On 6/28/2024 9:14 PM, Rob Clark wrote:
>>> On Fri, Jun 28, 2024 at 8:10 AM Bibek Kumar Patro
>>> <quic_bibekkum@quicinc.com> wrote:
>>>>
>>>>
>>>>
>>>> On 6/28/2024 7:47 PM, Rob Clark wrote:
>>>>> On Fri, Jun 28, 2024 at 7:05 AM Bibek Kumar Patro
>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>
>>>>>> Add an adreno-smmu-priv interface for drm/msm to call
>>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>>>>> sequence as per request.
>>>>>>
>>>>>> This will be used by GPU to setup the PRR bit and related
>>>>>> configuration registers through adreno-smmu private
>>>>>> interface instead of directly poking the smmu hardware.
>>>>>>
>>>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>> ---
>>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 23 ++++++++++++++++++++++
>>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>>>>     include/linux/adreno-smmu-priv.h           |  6 +++++-
>>>>>>     3 files changed, 30 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> index bd101a161d04..64571a1c47b8 100644
>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> @@ -28,6 +28,7 @@
>>>>>>     #define PREFETCH_SHALLOW       (1 << PREFETCH_SHIFT)
>>>>>>     #define PREFETCH_MODERATE      (2 << PREFETCH_SHIFT)
>>>>>>     #define PREFETCH_DEEP          (3 << PREFETCH_SHIFT)
>>>>>> +#define GFX_ACTLR_PRR          (1 << 5)
>>>>>>
>>>>>>     static const struct actlr_config sc7280_apps_actlr_cfg[] = {
>>>>>>            { 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
>>>>>> @@ -235,6 +236,27 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>>>>            arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>>>>     }
>>>>>>
>>>>>> +static void qcom_adreno_smmu_set_prr(const void *cookie, phys_addr_t page_addr, bool set)
>>>>>> +{
>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>> +       u32 reg = 0;
>>>>>> +
>>>>>> +       writel_relaxed(lower_32_bits(page_addr),
>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>>> +
>>>>>> +       writel_relaxed(upper_32_bits(page_addr),
>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>>> +
>>>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>>>> +       reg &= ~GFX_ACTLR_PRR;
>>>>>> +       if (set)
>>>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>>> +
>>>>>
>>>>> nit, extra line
>>>>>
>>>>
>>>> Ack, will remove this. Thanks for pointing out.
>>>>
>>>>> Also, if you passed a `struct page *` instead, then you could drop the
>>>>> bool param, ie. passing NULL for the page would disable PRR.  But I
>>>>> can go either way if others have a strong preference for phys_addr_t.
>>>>>
>>>>
>>>> Oh okay, this looks simple to reset the prr bit.
>>>> But since this page is allocated and is used inside gfx driver
>>>> before being utilized for prr bit operation, would it be safe for
>>>> drm/gfx driver to keep a reference to this page in smmu driver?
>>>>
>>>> Since we only need the page address for configuring the
>>>> CFG_UADDR/CFG_LADDR registers so passed the phys_addr_t.
>>>
>>> I don't think the smmu driver needs to keep a reference to the page..
>>> we can just say it is the responsibility of the drm driver to call
>>> set_prr(NULL) before freeing the page
>>>
>>
>> That makes sense. If we go by this NULL page method to disable the PRR,
>> we would have to set the address registers to reset value as well.
>>
>> The sequence would be like the following as per my understaning:
>> - Check if it's NULL page
>> - Set the PRR_CFG_UADDR/PRR_CFG_LADDR to reset values i.e - 0x0 for
>>     these registers
>> - Reset the PRR bit in actlr register
>>
>> Similar to this snippet:
>>
>> #PRR_RESET_ADDR 0x0
>>
>> --------------
>> reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>> reg &= ~GFX_ACTLR_PRR;
>> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>
>> if (!prr_page) {
>>          writel_relaxed(PRR_RESET_ADDR,
>>                          smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>          writel_relaxed(PRR_RESET_ADDR),
>>                           smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>          return;
>> }
>>
>>
>> writel_relaxed(lower_32_bits(page_to_phys(prr_page)),
>>                  smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>
>> writel_relaxed(upper_32_bits(page_to_phys(prr_page)),
>>                  smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>
>> reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>> -----------------
>>
>> If looks good, will implement the same in next version.
> 
> yeah, that looks like it could work..
> 
> you probably don't need to zero out the PRR_CFG_*ADDR when disabling,
> and probably could avoid double writing ACTLR, but that is getting
> into bikeshedding
> 

Actually Rob, since you rightly pointed this out.
I crosschecked again on these registers.
PRR_CFG_*ADDR is a global register in SMMU space but
ACTLR register including PRR bit is a per-domain register.
There might also be a situation where PRR feature need to be
disabled or enabled separately for each domain.
So I think it would be cleaner to have two apis, set_prr_addr(),
set_prr_bit().
set_prr_addr() will be used only to set this PRR_CFG_*ADDR
register by passing a 'struct page *'
set_prr_bit() will be used as a switch for PRR feature,
where required smmu_domain will be passed along with
the bool value to set/reset the PRR bit depending on which this
feature will be enabled/disabled for the selected domain.

Thanks & regards,
Bibek

> BR,
> -R
> 
>>
>> Thanks & regards,
>> Bibek
>>
>>> BR,
>>> -R
>>>
>>>>> Otherwise, lgtm
>>>>>
>>>>> BR,
>>>>> -R
>>>>>
>>>>
>>>> Thanks & regards,
>>>> Bibek
>>>>
>>>>>> +}
>>>>>> +
>>>>>>     #define QCOM_ADRENO_SMMU_GPU_SID 0
>>>>>>
>>>>>>     static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>>>>>> @@ -407,6 +429,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>>>            priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>>>>>            priv->set_stall = qcom_adreno_smmu_set_stall;
>>>>>>            priv->resume_translation = qcom_adreno_smmu_resume_translation;
>>>>>> +       priv->set_prr = qcom_adreno_smmu_set_prr;
>>>>>>
>>>>>>            actlrvar = qsmmu->data->actlrvar;
>>>>>>            if (!actlrvar)
>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>> index d9c2ef8c1653..3076bef49e20 100644
>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>>>>>     #define ARM_SMMU_SCTLR_M               BIT(0)
>>>>>>
>>>>>>     #define ARM_SMMU_CB_ACTLR              0x4
>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>>>>>>
>>>>>>     #define ARM_SMMU_CB_RESUME             0x8
>>>>>>     #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
>>>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
>>>>>> index c637e0997f6d..d6e2ca9f8d8c 100644
>>>>>> --- a/include/linux/adreno-smmu-priv.h
>>>>>> +++ b/include/linux/adreno-smmu-priv.h
>>>>>> @@ -49,7 +49,10 @@ struct adreno_smmu_fault_info {
>>>>>>      *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
>>>>>>      *                 the GPU driver must call resume_translation()
>>>>>>      * @resume_translation: Resume translation after a fault
>>>>>> - *
>>>>>> + * @set_prr:      Extendible interface to be used by GPU to modify the
>>>>>> + *                 ACTLR register bits, currently used to configure
>>>>>> + *                 Partially-Resident-Region (PRR) feature's
>>>>>> + *                 setup and reset sequence as requested.
>>>>>>      *
>>>>>>      * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>>>>>>      * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
>>>>>> @@ -67,6 +70,7 @@ struct adreno_smmu_priv {
>>>>>>         void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>>>>>>         void (*set_stall)(const void *cookie, bool enabled);
>>>>>>         void (*resume_translation)(const void *cookie, bool terminate);
>>>>>> +    void (*set_prr)(const void *cookie, phys_addr_t page_addr, bool set);
>>>>>>     };
>>>>>>
>>>>>>     #endif /* __ADRENO_SMMU_PRIV_H */
>>>>>> --
>>>>>> 2.34.1
>>>>>>
Rob Clark July 4, 2024, 2:46 p.m. UTC | #4
On Wed, Jul 3, 2024 at 4:38 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
>
>
> On 7/2/2024 2:01 AM, Rob Clark wrote:
> > On Mon, Jul 1, 2024 at 4:01 AM Bibek Kumar Patro
> > <quic_bibekkum@quicinc.com> wrote:
> >>
> >>
> >>
> >> On 6/28/2024 9:14 PM, Rob Clark wrote:
> >>> On Fri, Jun 28, 2024 at 8:10 AM Bibek Kumar Patro
> >>> <quic_bibekkum@quicinc.com> wrote:
> >>>>
> >>>>
> >>>>
> >>>> On 6/28/2024 7:47 PM, Rob Clark wrote:
> >>>>> On Fri, Jun 28, 2024 at 7:05 AM Bibek Kumar Patro
> >>>>> <quic_bibekkum@quicinc.com> wrote:
> >>>>>>
> >>>>>> Add an adreno-smmu-priv interface for drm/msm to call
> >>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
> >>>>>> sequence as per request.
> >>>>>>
> >>>>>> This will be used by GPU to setup the PRR bit and related
> >>>>>> configuration registers through adreno-smmu private
> >>>>>> interface instead of directly poking the smmu hardware.
> >>>>>>
> >>>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
> >>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> >>>>>> ---
> >>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 23 ++++++++++++++++++++++
> >>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
> >>>>>>     include/linux/adreno-smmu-priv.h           |  6 +++++-
> >>>>>>     3 files changed, 30 insertions(+), 1 deletion(-)
> >>>>>>
> >>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>> index bd101a161d04..64571a1c47b8 100644
> >>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>> @@ -28,6 +28,7 @@
> >>>>>>     #define PREFETCH_SHALLOW       (1 << PREFETCH_SHIFT)
> >>>>>>     #define PREFETCH_MODERATE      (2 << PREFETCH_SHIFT)
> >>>>>>     #define PREFETCH_DEEP          (3 << PREFETCH_SHIFT)
> >>>>>> +#define GFX_ACTLR_PRR          (1 << 5)
> >>>>>>
> >>>>>>     static const struct actlr_config sc7280_apps_actlr_cfg[] = {
> >>>>>>            { 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
> >>>>>> @@ -235,6 +236,27 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
> >>>>>>            arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
> >>>>>>     }
> >>>>>>
> >>>>>> +static void qcom_adreno_smmu_set_prr(const void *cookie, phys_addr_t page_addr, bool set)
> >>>>>> +{
> >>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >>>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> >>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>>>> +       u32 reg = 0;
> >>>>>> +
> >>>>>> +       writel_relaxed(lower_32_bits(page_addr),
> >>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> >>>>>> +
> >>>>>> +       writel_relaxed(upper_32_bits(page_addr),
> >>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> >>>>>> +
> >>>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> >>>>>> +       reg &= ~GFX_ACTLR_PRR;
> >>>>>> +       if (set)
> >>>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> >>>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> >>>>>> +
> >>>>>
> >>>>> nit, extra line
> >>>>>
> >>>>
> >>>> Ack, will remove this. Thanks for pointing out.
> >>>>
> >>>>> Also, if you passed a `struct page *` instead, then you could drop the
> >>>>> bool param, ie. passing NULL for the page would disable PRR.  But I
> >>>>> can go either way if others have a strong preference for phys_addr_t.
> >>>>>
> >>>>
> >>>> Oh okay, this looks simple to reset the prr bit.
> >>>> But since this page is allocated and is used inside gfx driver
> >>>> before being utilized for prr bit operation, would it be safe for
> >>>> drm/gfx driver to keep a reference to this page in smmu driver?
> >>>>
> >>>> Since we only need the page address for configuring the
> >>>> CFG_UADDR/CFG_LADDR registers so passed the phys_addr_t.
> >>>
> >>> I don't think the smmu driver needs to keep a reference to the page..
> >>> we can just say it is the responsibility of the drm driver to call
> >>> set_prr(NULL) before freeing the page
> >>>
> >>
> >> That makes sense. If we go by this NULL page method to disable the PRR,
> >> we would have to set the address registers to reset value as well.
> >>
> >> The sequence would be like the following as per my understaning:
> >> - Check if it's NULL page
> >> - Set the PRR_CFG_UADDR/PRR_CFG_LADDR to reset values i.e - 0x0 for
> >>     these registers
> >> - Reset the PRR bit in actlr register
> >>
> >> Similar to this snippet:
> >>
> >> #PRR_RESET_ADDR 0x0
> >>
> >> --------------
> >> reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> >> reg &= ~GFX_ACTLR_PRR;
> >> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> >>
> >> if (!prr_page) {
> >>          writel_relaxed(PRR_RESET_ADDR,
> >>                          smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> >>          writel_relaxed(PRR_RESET_ADDR),
> >>                           smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> >>          return;
> >> }
> >>
> >>
> >> writel_relaxed(lower_32_bits(page_to_phys(prr_page)),
> >>                  smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> >>
> >> writel_relaxed(upper_32_bits(page_to_phys(prr_page)),
> >>                  smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> >>
> >> reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> >> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> >> -----------------
> >>
> >> If looks good, will implement the same in next version.
> >
> > yeah, that looks like it could work..
> >
> > you probably don't need to zero out the PRR_CFG_*ADDR when disabling,
> > and probably could avoid double writing ACTLR, but that is getting
> > into bikeshedding
> >
>
> Actually Rob, since you rightly pointed this out.
> I crosschecked again on these registers.
> PRR_CFG_*ADDR is a global register in SMMU space but
> ACTLR register including PRR bit is a per-domain register.
> There might also be a situation where PRR feature need to be
> disabled or enabled separately for each domain.
> So I think it would be cleaner to have two apis, set_prr_addr(),
> set_prr_bit().
> set_prr_addr() will be used only to set this PRR_CFG_*ADDR
> register by passing a 'struct page *'
> set_prr_bit() will be used as a switch for PRR feature,
> where required smmu_domain will be passed along with
> the bool value to set/reset the PRR bit depending on which this
> feature will be enabled/disabled for the selected domain.

on a related note, adreno has been using arm-smmu for a number of
generations, I guess not all support PRR?  The drm driver will need to
know whether PRR is supported (and expose that to userspace to let the
UMD know whether to expose certain extensions).  How should this work?

BR,
-R

> Thanks & regards,
> Bibek
>
> > BR,
> > -R
> >
> >>
> >> Thanks & regards,
> >> Bibek
> >>
> >>> BR,
> >>> -R
> >>>
> >>>>> Otherwise, lgtm
> >>>>>
> >>>>> BR,
> >>>>> -R
> >>>>>
> >>>>
> >>>> Thanks & regards,
> >>>> Bibek
> >>>>
> >>>>>> +}
> >>>>>> +
> >>>>>>     #define QCOM_ADRENO_SMMU_GPU_SID 0
> >>>>>>
> >>>>>>     static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> >>>>>> @@ -407,6 +429,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>>>>>            priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
> >>>>>>            priv->set_stall = qcom_adreno_smmu_set_stall;
> >>>>>>            priv->resume_translation = qcom_adreno_smmu_resume_translation;
> >>>>>> +       priv->set_prr = qcom_adreno_smmu_set_prr;
> >>>>>>
> >>>>>>            actlrvar = qsmmu->data->actlrvar;
> >>>>>>            if (!actlrvar)
> >>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>>>> index d9c2ef8c1653..3076bef49e20 100644
> >>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
> >>>>>>     #define ARM_SMMU_SCTLR_M               BIT(0)
> >>>>>>
> >>>>>>     #define ARM_SMMU_CB_ACTLR              0x4
> >>>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> >>>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
> >>>>>>
> >>>>>>     #define ARM_SMMU_CB_RESUME             0x8
> >>>>>>     #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> >>>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> >>>>>> index c637e0997f6d..d6e2ca9f8d8c 100644
> >>>>>> --- a/include/linux/adreno-smmu-priv.h
> >>>>>> +++ b/include/linux/adreno-smmu-priv.h
> >>>>>> @@ -49,7 +49,10 @@ struct adreno_smmu_fault_info {
> >>>>>>      *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
> >>>>>>      *                 the GPU driver must call resume_translation()
> >>>>>>      * @resume_translation: Resume translation after a fault
> >>>>>> - *
> >>>>>> + * @set_prr:      Extendible interface to be used by GPU to modify the
> >>>>>> + *                 ACTLR register bits, currently used to configure
> >>>>>> + *                 Partially-Resident-Region (PRR) feature's
> >>>>>> + *                 setup and reset sequence as requested.
> >>>>>>      *
> >>>>>>      * The GPU driver (drm/msm) and adreno-smmu work together for controlling
> >>>>>>      * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> >>>>>> @@ -67,6 +70,7 @@ struct adreno_smmu_priv {
> >>>>>>         void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
> >>>>>>         void (*set_stall)(const void *cookie, bool enabled);
> >>>>>>         void (*resume_translation)(const void *cookie, bool terminate);
> >>>>>> +    void (*set_prr)(const void *cookie, phys_addr_t page_addr, bool set);
> >>>>>>     };
> >>>>>>
> >>>>>>     #endif /* __ADRENO_SMMU_PRIV_H */
> >>>>>> --
> >>>>>> 2.34.1
> >>>>>>
Rob Clark July 4, 2024, 3:58 p.m. UTC | #5
On Thu, Jul 4, 2024 at 7:46 AM Rob Clark <robdclark@gmail.com> wrote:
>
> On Wed, Jul 3, 2024 at 4:38 AM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
> >
> >
> >
> > On 7/2/2024 2:01 AM, Rob Clark wrote:
> > > On Mon, Jul 1, 2024 at 4:01 AM Bibek Kumar Patro
> > > <quic_bibekkum@quicinc.com> wrote:
> > >>
> > >>
> > >>
> > >> On 6/28/2024 9:14 PM, Rob Clark wrote:
> > >>> On Fri, Jun 28, 2024 at 8:10 AM Bibek Kumar Patro
> > >>> <quic_bibekkum@quicinc.com> wrote:
> > >>>>
> > >>>>
> > >>>>
> > >>>> On 6/28/2024 7:47 PM, Rob Clark wrote:
> > >>>>> On Fri, Jun 28, 2024 at 7:05 AM Bibek Kumar Patro
> > >>>>> <quic_bibekkum@quicinc.com> wrote:
> > >>>>>>
> > >>>>>> Add an adreno-smmu-priv interface for drm/msm to call
> > >>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
> > >>>>>> sequence as per request.
> > >>>>>>
> > >>>>>> This will be used by GPU to setup the PRR bit and related
> > >>>>>> configuration registers through adreno-smmu private
> > >>>>>> interface instead of directly poking the smmu hardware.
> > >>>>>>
> > >>>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
> > >>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> > >>>>>> ---
> > >>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 23 ++++++++++++++++++++++
> > >>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
> > >>>>>>     include/linux/adreno-smmu-priv.h           |  6 +++++-
> > >>>>>>     3 files changed, 30 insertions(+), 1 deletion(-)
> > >>>>>>
> > >>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > >>>>>> index bd101a161d04..64571a1c47b8 100644
> > >>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > >>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > >>>>>> @@ -28,6 +28,7 @@
> > >>>>>>     #define PREFETCH_SHALLOW       (1 << PREFETCH_SHIFT)
> > >>>>>>     #define PREFETCH_MODERATE      (2 << PREFETCH_SHIFT)
> > >>>>>>     #define PREFETCH_DEEP          (3 << PREFETCH_SHIFT)
> > >>>>>> +#define GFX_ACTLR_PRR          (1 << 5)
> > >>>>>>
> > >>>>>>     static const struct actlr_config sc7280_apps_actlr_cfg[] = {
> > >>>>>>            { 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
> > >>>>>> @@ -235,6 +236,27 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
> > >>>>>>            arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
> > >>>>>>     }
> > >>>>>>
> > >>>>>> +static void qcom_adreno_smmu_set_prr(const void *cookie, phys_addr_t page_addr, bool set)
> > >>>>>> +{
> > >>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> > >>>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> > >>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> > >>>>>> +       u32 reg = 0;
> > >>>>>> +
> > >>>>>> +       writel_relaxed(lower_32_bits(page_addr),
> > >>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> > >>>>>> +
> > >>>>>> +       writel_relaxed(upper_32_bits(page_addr),
> > >>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> > >>>>>> +
> > >>>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> > >>>>>> +       reg &= ~GFX_ACTLR_PRR;
> > >>>>>> +       if (set)
> > >>>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> > >>>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> > >>>>>> +
> > >>>>>
> > >>>>> nit, extra line
> > >>>>>
> > >>>>
> > >>>> Ack, will remove this. Thanks for pointing out.
> > >>>>
> > >>>>> Also, if you passed a `struct page *` instead, then you could drop the
> > >>>>> bool param, ie. passing NULL for the page would disable PRR.  But I
> > >>>>> can go either way if others have a strong preference for phys_addr_t.
> > >>>>>
> > >>>>
> > >>>> Oh okay, this looks simple to reset the prr bit.
> > >>>> But since this page is allocated and is used inside gfx driver
> > >>>> before being utilized for prr bit operation, would it be safe for
> > >>>> drm/gfx driver to keep a reference to this page in smmu driver?
> > >>>>
> > >>>> Since we only need the page address for configuring the
> > >>>> CFG_UADDR/CFG_LADDR registers so passed the phys_addr_t.
> > >>>
> > >>> I don't think the smmu driver needs to keep a reference to the page..
> > >>> we can just say it is the responsibility of the drm driver to call
> > >>> set_prr(NULL) before freeing the page
> > >>>
> > >>
> > >> That makes sense. If we go by this NULL page method to disable the PRR,
> > >> we would have to set the address registers to reset value as well.
> > >>
> > >> The sequence would be like the following as per my understaning:
> > >> - Check if it's NULL page
> > >> - Set the PRR_CFG_UADDR/PRR_CFG_LADDR to reset values i.e - 0x0 for
> > >>     these registers
> > >> - Reset the PRR bit in actlr register
> > >>
> > >> Similar to this snippet:
> > >>
> > >> #PRR_RESET_ADDR 0x0
> > >>
> > >> --------------
> > >> reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> > >> reg &= ~GFX_ACTLR_PRR;
> > >> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> > >>
> > >> if (!prr_page) {
> > >>          writel_relaxed(PRR_RESET_ADDR,
> > >>                          smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> > >>          writel_relaxed(PRR_RESET_ADDR),
> > >>                           smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> > >>          return;
> > >> }
> > >>
> > >>
> > >> writel_relaxed(lower_32_bits(page_to_phys(prr_page)),
> > >>                  smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> > >>
> > >> writel_relaxed(upper_32_bits(page_to_phys(prr_page)),
> > >>                  smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> > >>
> > >> reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> > >> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> > >> -----------------
> > >>
> > >> If looks good, will implement the same in next version.
> > >
> > > yeah, that looks like it could work..
> > >
> > > you probably don't need to zero out the PRR_CFG_*ADDR when disabling,
> > > and probably could avoid double writing ACTLR, but that is getting
> > > into bikeshedding
> > >
> >
> > Actually Rob, since you rightly pointed this out.
> > I crosschecked again on these registers.
> > PRR_CFG_*ADDR is a global register in SMMU space but
> > ACTLR register including PRR bit is a per-domain register.
> > There might also be a situation where PRR feature need to be
> > disabled or enabled separately for each domain.
> > So I think it would be cleaner to have two apis, set_prr_addr(),
> > set_prr_bit().
> > set_prr_addr() will be used only to set this PRR_CFG_*ADDR
> > register by passing a 'struct page *'
> > set_prr_bit() will be used as a switch for PRR feature,
> > where required smmu_domain will be passed along with
> > the bool value to set/reset the PRR bit depending on which this
> > feature will be enabled/disabled for the selected domain.
>
> on a related note, adreno has been using arm-smmu for a number of
> generations, I guess not all support PRR?  The drm driver will need to
> know whether PRR is supported (and expose that to userspace to let the
> UMD know whether to expose certain extensions).  How should this work?

So, I noticed in the x1e80100.dtsi that there is a gpu_prr_mem
reserved section..  maybe we should be connecting this to the smmu
driver in dt, and using that to detect presence of PRR?  Ie. the smmu
driver would configure PRR_CFG_*ADDR based on the reserved mem, and
the interface to drm would just be to enable/disable PRR, returning an
error code if the reserved mem section isn't there.

This simplifies the interface, and handles the question of how to
detect if PRR is supported.

BR,
-R

> BR,
> -R
>
> > Thanks & regards,
> > Bibek
> >
> > > BR,
> > > -R
> > >
> > >>
> > >> Thanks & regards,
> > >> Bibek
> > >>
> > >>> BR,
> > >>> -R
> > >>>
> > >>>>> Otherwise, lgtm
> > >>>>>
> > >>>>> BR,
> > >>>>> -R
> > >>>>>
> > >>>>
> > >>>> Thanks & regards,
> > >>>> Bibek
> > >>>>
> > >>>>>> +}
> > >>>>>> +
> > >>>>>>     #define QCOM_ADRENO_SMMU_GPU_SID 0
> > >>>>>>
> > >>>>>>     static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> > >>>>>> @@ -407,6 +429,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> > >>>>>>            priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
> > >>>>>>            priv->set_stall = qcom_adreno_smmu_set_stall;
> > >>>>>>            priv->resume_translation = qcom_adreno_smmu_resume_translation;
> > >>>>>> +       priv->set_prr = qcom_adreno_smmu_set_prr;
> > >>>>>>
> > >>>>>>            actlrvar = qsmmu->data->actlrvar;
> > >>>>>>            if (!actlrvar)
> > >>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> > >>>>>> index d9c2ef8c1653..3076bef49e20 100644
> > >>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> > >>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> > >>>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
> > >>>>>>     #define ARM_SMMU_SCTLR_M               BIT(0)
> > >>>>>>
> > >>>>>>     #define ARM_SMMU_CB_ACTLR              0x4
> > >>>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> > >>>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
> > >>>>>>
> > >>>>>>     #define ARM_SMMU_CB_RESUME             0x8
> > >>>>>>     #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> > >>>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> > >>>>>> index c637e0997f6d..d6e2ca9f8d8c 100644
> > >>>>>> --- a/include/linux/adreno-smmu-priv.h
> > >>>>>> +++ b/include/linux/adreno-smmu-priv.h
> > >>>>>> @@ -49,7 +49,10 @@ struct adreno_smmu_fault_info {
> > >>>>>>      *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
> > >>>>>>      *                 the GPU driver must call resume_translation()
> > >>>>>>      * @resume_translation: Resume translation after a fault
> > >>>>>> - *
> > >>>>>> + * @set_prr:      Extendible interface to be used by GPU to modify the
> > >>>>>> + *                 ACTLR register bits, currently used to configure
> > >>>>>> + *                 Partially-Resident-Region (PRR) feature's
> > >>>>>> + *                 setup and reset sequence as requested.
> > >>>>>>      *
> > >>>>>>      * The GPU driver (drm/msm) and adreno-smmu work together for controlling
> > >>>>>>      * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> > >>>>>> @@ -67,6 +70,7 @@ struct adreno_smmu_priv {
> > >>>>>>         void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
> > >>>>>>         void (*set_stall)(const void *cookie, bool enabled);
> > >>>>>>         void (*resume_translation)(const void *cookie, bool terminate);
> > >>>>>> +    void (*set_prr)(const void *cookie, phys_addr_t page_addr, bool set);
> > >>>>>>     };
> > >>>>>>
> > >>>>>>     #endif /* __ADRENO_SMMU_PRIV_H */
> > >>>>>> --
> > >>>>>> 2.34.1
> > >>>>>>
Bibek Kumar Patro July 9, 2024, 7:42 p.m. UTC | #6
On 7/4/2024 9:28 PM, Rob Clark wrote:
> On Thu, Jul 4, 2024 at 7:46 AM Rob Clark <robdclark@gmail.com> wrote:
>>
>> On Wed, Jul 3, 2024 at 4:38 AM Bibek Kumar Patro
>> <quic_bibekkum@quicinc.com> wrote:
>>>
>>>
>>>
>>> On 7/2/2024 2:01 AM, Rob Clark wrote:
>>>> On Mon, Jul 1, 2024 at 4:01 AM Bibek Kumar Patro
>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>
>>>>>
>>>>>
>>>>> On 6/28/2024 9:14 PM, Rob Clark wrote:
>>>>>> On Fri, Jun 28, 2024 at 8:10 AM Bibek Kumar Patro
>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> On 6/28/2024 7:47 PM, Rob Clark wrote:
>>>>>>>> On Fri, Jun 28, 2024 at 7:05 AM Bibek Kumar Patro
>>>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>>>
>>>>>>>>> Add an adreno-smmu-priv interface for drm/msm to call
>>>>>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>>>>>>>> sequence as per request.
>>>>>>>>>
>>>>>>>>> This will be used by GPU to setup the PRR bit and related
>>>>>>>>> configuration registers through adreno-smmu private
>>>>>>>>> interface instead of directly poking the smmu hardware.
>>>>>>>>>
>>>>>>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
>>>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>>>>> ---
>>>>>>>>>      drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 23 ++++++++++++++++++++++
>>>>>>>>>      drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>>>>>>>      include/linux/adreno-smmu-priv.h           |  6 +++++-
>>>>>>>>>      3 files changed, 30 insertions(+), 1 deletion(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>> index bd101a161d04..64571a1c47b8 100644
>>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>> @@ -28,6 +28,7 @@
>>>>>>>>>      #define PREFETCH_SHALLOW       (1 << PREFETCH_SHIFT)
>>>>>>>>>      #define PREFETCH_MODERATE      (2 << PREFETCH_SHIFT)
>>>>>>>>>      #define PREFETCH_DEEP          (3 << PREFETCH_SHIFT)
>>>>>>>>> +#define GFX_ACTLR_PRR          (1 << 5)
>>>>>>>>>
>>>>>>>>>      static const struct actlr_config sc7280_apps_actlr_cfg[] = {
>>>>>>>>>             { 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
>>>>>>>>> @@ -235,6 +236,27 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>>>>>>>             arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>>>>>>>      }
>>>>>>>>>
>>>>>>>>> +static void qcom_adreno_smmu_set_prr(const void *cookie, phys_addr_t page_addr, bool set)
>>>>>>>>> +{
>>>>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>>>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>>>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>>>>> +       u32 reg = 0;
>>>>>>>>> +
>>>>>>>>> +       writel_relaxed(lower_32_bits(page_addr),
>>>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>>>>>> +
>>>>>>>>> +       writel_relaxed(upper_32_bits(page_addr),
>>>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>>>>>> +
>>>>>>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>>>>>>> +       reg &= ~GFX_ACTLR_PRR;
>>>>>>>>> +       if (set)
>>>>>>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>>>>>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>>>>>> +
>>>>>>>>
>>>>>>>> nit, extra line
>>>>>>>>
>>>>>>>
>>>>>>> Ack, will remove this. Thanks for pointing out.
>>>>>>>
>>>>>>>> Also, if you passed a `struct page *` instead, then you could drop the
>>>>>>>> bool param, ie. passing NULL for the page would disable PRR.  But I
>>>>>>>> can go either way if others have a strong preference for phys_addr_t.
>>>>>>>>
>>>>>>>
>>>>>>> Oh okay, this looks simple to reset the prr bit.
>>>>>>> But since this page is allocated and is used inside gfx driver
>>>>>>> before being utilized for prr bit operation, would it be safe for
>>>>>>> drm/gfx driver to keep a reference to this page in smmu driver?
>>>>>>>
>>>>>>> Since we only need the page address for configuring the
>>>>>>> CFG_UADDR/CFG_LADDR registers so passed the phys_addr_t.
>>>>>>
>>>>>> I don't think the smmu driver needs to keep a reference to the page..
>>>>>> we can just say it is the responsibility of the drm driver to call
>>>>>> set_prr(NULL) before freeing the page
>>>>>>
>>>>>
>>>>> That makes sense. If we go by this NULL page method to disable the PRR,
>>>>> we would have to set the address registers to reset value as well.
>>>>>
>>>>> The sequence would be like the following as per my understaning:
>>>>> - Check if it's NULL page
>>>>> - Set the PRR_CFG_UADDR/PRR_CFG_LADDR to reset values i.e - 0x0 for
>>>>>      these registers
>>>>> - Reset the PRR bit in actlr register
>>>>>
>>>>> Similar to this snippet:
>>>>>
>>>>> #PRR_RESET_ADDR 0x0
>>>>>
>>>>> --------------
>>>>> reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>>> reg &= ~GFX_ACTLR_PRR;
>>>>> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>>
>>>>> if (!prr_page) {
>>>>>           writel_relaxed(PRR_RESET_ADDR,
>>>>>                           smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>>           writel_relaxed(PRR_RESET_ADDR),
>>>>>                            smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>>           return;
>>>>> }
>>>>>
>>>>>
>>>>> writel_relaxed(lower_32_bits(page_to_phys(prr_page)),
>>>>>                   smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>>
>>>>> writel_relaxed(upper_32_bits(page_to_phys(prr_page)),
>>>>>                   smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>>
>>>>> reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>>>> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>> -----------------
>>>>>
>>>>> If looks good, will implement the same in next version.
>>>>
>>>> yeah, that looks like it could work..
>>>>
>>>> you probably don't need to zero out the PRR_CFG_*ADDR when disabling,
>>>> and probably could avoid double writing ACTLR, but that is getting
>>>> into bikeshedding
>>>>
>>>
>>> Actually Rob, since you rightly pointed this out.
>>> I crosschecked again on these registers.
>>> PRR_CFG_*ADDR is a global register in SMMU space but
>>> ACTLR register including PRR bit is a per-domain register.
>>> There might also be a situation where PRR feature need to be
>>> disabled or enabled separately for each domain.
>>> So I think it would be cleaner to have two apis, set_prr_addr(),
>>> set_prr_bit().
>>> set_prr_addr() will be used only to set this PRR_CFG_*ADDR
>>> register by passing a 'struct page *'
>>> set_prr_bit() will be used as a switch for PRR feature,
>>> where required smmu_domain will be passed along with
>>> the bool value to set/reset the PRR bit depending on which this
>>> feature will be enabled/disabled for the selected domain.
>>
>> on a related note, adreno has been using arm-smmu for a number of
>> generations, I guess not all support PRR?  The drm driver will need to
>> know whether PRR is supported (and expose that to userspace to let the
>> UMD know whether to expose certain extensions).  How should this work?
> 
> So, I noticed in the x1e80100.dtsi that there is a gpu_prr_mem
> reserved section..  maybe we should be connecting this to the smmu
> driver in dt, and using that to detect presence of PRR?  Ie. the smmu
> driver would configure PRR_CFG_*ADDR based on the reserved mem, and
> the interface to drm would just be to enable/disable PRR, returning an
> error code if the reserved mem section isn't there.
> 
> This simplifies the interface, and handles the question of how to
> detect if PRR is supported.
> 
> BR,
> -R
> 

As I checked gpu_prr_mem reserved mem section is not used for mobile
targets hence not present for other DT only compute targets like
x1e80100.dtsi has the same. PRR looks to be smmu version specific
property.
This feature is present in all the targets using SMMU-500,
I am still checking for targets using versions prior to smmu-500.
Then maybe we can use the smmu compatible string itself (e.g.
qcom,smmu-500 && qcom,adreno-smmu) to connect to smmu driver for
checking the presence of PRR ?
And if the compatible string is different then we can return the
error code signifying PRR feature is not supported on the particular
smmu version.

Thanks & regards,
Bibek

>> BR,
>> -R
>>
>>> Thanks & regards,
>>> Bibek
>>>
>>>> BR,
>>>> -R
>>>>
>>>>>
>>>>> Thanks & regards,
>>>>> Bibek
>>>>>
>>>>>> BR,
>>>>>> -R
>>>>>>
>>>>>>>> Otherwise, lgtm
>>>>>>>>
>>>>>>>> BR,
>>>>>>>> -R
>>>>>>>>
>>>>>>>
>>>>>>> Thanks & regards,
>>>>>>> Bibek
>>>>>>>
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>      #define QCOM_ADRENO_SMMU_GPU_SID 0
>>>>>>>>>
>>>>>>>>>      static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>>>>>>>>> @@ -407,6 +429,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>>>>>>             priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>>>>>>>>             priv->set_stall = qcom_adreno_smmu_set_stall;
>>>>>>>>>             priv->resume_translation = qcom_adreno_smmu_resume_translation;
>>>>>>>>> +       priv->set_prr = qcom_adreno_smmu_set_prr;
>>>>>>>>>
>>>>>>>>>             actlrvar = qsmmu->data->actlrvar;
>>>>>>>>>             if (!actlrvar)
>>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>>>> index d9c2ef8c1653..3076bef49e20 100644
>>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>>>>>>>>      #define ARM_SMMU_SCTLR_M               BIT(0)
>>>>>>>>>
>>>>>>>>>      #define ARM_SMMU_CB_ACTLR              0x4
>>>>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
>>>>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>>>>>>>>>
>>>>>>>>>      #define ARM_SMMU_CB_RESUME             0x8
>>>>>>>>>      #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
>>>>>>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
>>>>>>>>> index c637e0997f6d..d6e2ca9f8d8c 100644
>>>>>>>>> --- a/include/linux/adreno-smmu-priv.h
>>>>>>>>> +++ b/include/linux/adreno-smmu-priv.h
>>>>>>>>> @@ -49,7 +49,10 @@ struct adreno_smmu_fault_info {
>>>>>>>>>       *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
>>>>>>>>>       *                 the GPU driver must call resume_translation()
>>>>>>>>>       * @resume_translation: Resume translation after a fault
>>>>>>>>> - *
>>>>>>>>> + * @set_prr:      Extendible interface to be used by GPU to modify the
>>>>>>>>> + *                 ACTLR register bits, currently used to configure
>>>>>>>>> + *                 Partially-Resident-Region (PRR) feature's
>>>>>>>>> + *                 setup and reset sequence as requested.
>>>>>>>>>       *
>>>>>>>>>       * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>>>>>>>>>       * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
>>>>>>>>> @@ -67,6 +70,7 @@ struct adreno_smmu_priv {
>>>>>>>>>          void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>>>>>>>>>          void (*set_stall)(const void *cookie, bool enabled);
>>>>>>>>>          void (*resume_translation)(const void *cookie, bool terminate);
>>>>>>>>> +    void (*set_prr)(const void *cookie, phys_addr_t page_addr, bool set);
>>>>>>>>>      };
>>>>>>>>>
>>>>>>>>>      #endif /* __ADRENO_SMMU_PRIV_H */
>>>>>>>>> --
>>>>>>>>> 2.34.1
>>>>>>>>>
Bibek Kumar Patro July 15, 2024, 10:59 a.m. UTC | #7
On 7/10/2024 10:31 PM, Rob Clark wrote:
> On Tue, Jul 9, 2024 at 12:43 PM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>>
>>
>> On 7/4/2024 9:28 PM, Rob Clark wrote:
>>> On Thu, Jul 4, 2024 at 7:46 AM Rob Clark <robdclark@gmail.com> wrote:
>>>>
>>>> On Wed, Jul 3, 2024 at 4:38 AM Bibek Kumar Patro
>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>
>>>>>
>>>>>
>>>>> On 7/2/2024 2:01 AM, Rob Clark wrote:
>>>>>> On Mon, Jul 1, 2024 at 4:01 AM Bibek Kumar Patro
>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> On 6/28/2024 9:14 PM, Rob Clark wrote:
>>>>>>>> On Fri, Jun 28, 2024 at 8:10 AM Bibek Kumar Patro
>>>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 6/28/2024 7:47 PM, Rob Clark wrote:
>>>>>>>>>> On Fri, Jun 28, 2024 at 7:05 AM Bibek Kumar Patro
>>>>>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>>>>>
>>>>>>>>>>> Add an adreno-smmu-priv interface for drm/msm to call
>>>>>>>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>>>>>>>>>> sequence as per request.
>>>>>>>>>>>
>>>>>>>>>>> This will be used by GPU to setup the PRR bit and related
>>>>>>>>>>> configuration registers through adreno-smmu private
>>>>>>>>>>> interface instead of directly poking the smmu hardware.
>>>>>>>>>>>
>>>>>>>>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
>>>>>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>>>>>>> ---
>>>>>>>>>>>       drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 23 ++++++++++++++++++++++
>>>>>>>>>>>       drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>>>>>>>>>       include/linux/adreno-smmu-priv.h           |  6 +++++-
>>>>>>>>>>>       3 files changed, 30 insertions(+), 1 deletion(-)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>>>> index bd101a161d04..64571a1c47b8 100644
>>>>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>>>> @@ -28,6 +28,7 @@
>>>>>>>>>>>       #define PREFETCH_SHALLOW       (1 << PREFETCH_SHIFT)
>>>>>>>>>>>       #define PREFETCH_MODERATE      (2 << PREFETCH_SHIFT)
>>>>>>>>>>>       #define PREFETCH_DEEP          (3 << PREFETCH_SHIFT)
>>>>>>>>>>> +#define GFX_ACTLR_PRR          (1 << 5)
>>>>>>>>>>>
>>>>>>>>>>>       static const struct actlr_config sc7280_apps_actlr_cfg[] = {
>>>>>>>>>>>              { 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
>>>>>>>>>>> @@ -235,6 +236,27 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>>>>>>>>>              arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>>>>>>>>>       }
>>>>>>>>>>>
>>>>>>>>>>> +static void qcom_adreno_smmu_set_prr(const void *cookie, phys_addr_t page_addr, bool set)
>>>>>>>>>>> +{
>>>>>>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>>>>>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>>>>>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>>>>>>> +       u32 reg = 0;
>>>>>>>>>>> +
>>>>>>>>>>> +       writel_relaxed(lower_32_bits(page_addr),
>>>>>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>>>>>>>> +
>>>>>>>>>>> +       writel_relaxed(upper_32_bits(page_addr),
>>>>>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>>>>>>>> +
>>>>>>>>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>>>>>>>>> +       reg &= ~GFX_ACTLR_PRR;
>>>>>>>>>>> +       if (set)
>>>>>>>>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>>>>>>>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>>>>>>>> +
>>>>>>>>>>
>>>>>>>>>> nit, extra line
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Ack, will remove this. Thanks for pointing out.
>>>>>>>>>
>>>>>>>>>> Also, if you passed a `struct page *` instead, then you could drop the
>>>>>>>>>> bool param, ie. passing NULL for the page would disable PRR.  But I
>>>>>>>>>> can go either way if others have a strong preference for phys_addr_t.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Oh okay, this looks simple to reset the prr bit.
>>>>>>>>> But since this page is allocated and is used inside gfx driver
>>>>>>>>> before being utilized for prr bit operation, would it be safe for
>>>>>>>>> drm/gfx driver to keep a reference to this page in smmu driver?
>>>>>>>>>
>>>>>>>>> Since we only need the page address for configuring the
>>>>>>>>> CFG_UADDR/CFG_LADDR registers so passed the phys_addr_t.
>>>>>>>>
>>>>>>>> I don't think the smmu driver needs to keep a reference to the page..
>>>>>>>> we can just say it is the responsibility of the drm driver to call
>>>>>>>> set_prr(NULL) before freeing the page
>>>>>>>>
>>>>>>>
>>>>>>> That makes sense. If we go by this NULL page method to disable the PRR,
>>>>>>> we would have to set the address registers to reset value as well.
>>>>>>>
>>>>>>> The sequence would be like the following as per my understaning:
>>>>>>> - Check if it's NULL page
>>>>>>> - Set the PRR_CFG_UADDR/PRR_CFG_LADDR to reset values i.e - 0x0 for
>>>>>>>       these registers
>>>>>>> - Reset the PRR bit in actlr register
>>>>>>>
>>>>>>> Similar to this snippet:
>>>>>>>
>>>>>>> #PRR_RESET_ADDR 0x0
>>>>>>>
>>>>>>> --------------
>>>>>>> reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>>>>> reg &= ~GFX_ACTLR_PRR;
>>>>>>> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>>>>
>>>>>>> if (!prr_page) {
>>>>>>>            writel_relaxed(PRR_RESET_ADDR,
>>>>>>>                            smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>>>>            writel_relaxed(PRR_RESET_ADDR),
>>>>>>>                             smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>>>>            return;
>>>>>>> }
>>>>>>>
>>>>>>>
>>>>>>> writel_relaxed(lower_32_bits(page_to_phys(prr_page)),
>>>>>>>                    smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>>>>
>>>>>>> writel_relaxed(upper_32_bits(page_to_phys(prr_page)),
>>>>>>>                    smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>>>>
>>>>>>> reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>>>>>> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>>>> -----------------
>>>>>>>
>>>>>>> If looks good, will implement the same in next version.
>>>>>>
>>>>>> yeah, that looks like it could work..
>>>>>>
>>>>>> you probably don't need to zero out the PRR_CFG_*ADDR when disabling,
>>>>>> and probably could avoid double writing ACTLR, but that is getting
>>>>>> into bikeshedding
>>>>>>
>>>>>
>>>>> Actually Rob, since you rightly pointed this out.
>>>>> I crosschecked again on these registers.
>>>>> PRR_CFG_*ADDR is a global register in SMMU space but
>>>>> ACTLR register including PRR bit is a per-domain register.
>>>>> There might also be a situation where PRR feature need to be
>>>>> disabled or enabled separately for each domain.
>>>>> So I think it would be cleaner to have two apis, set_prr_addr(),
>>>>> set_prr_bit().
>>>>> set_prr_addr() will be used only to set this PRR_CFG_*ADDR
>>>>> register by passing a 'struct page *'
>>>>> set_prr_bit() will be used as a switch for PRR feature,
>>>>> where required smmu_domain will be passed along with
>>>>> the bool value to set/reset the PRR bit depending on which this
>>>>> feature will be enabled/disabled for the selected domain.
>>>>
>>>> on a related note, adreno has been using arm-smmu for a number of
>>>> generations, I guess not all support PRR?  The drm driver will need to
>>>> know whether PRR is supported (and expose that to userspace to let the
>>>> UMD know whether to expose certain extensions).  How should this work?
>>>
>>> So, I noticed in the x1e80100.dtsi that there is a gpu_prr_mem
>>> reserved section..  maybe we should be connecting this to the smmu
>>> driver in dt, and using that to detect presence of PRR?  Ie. the smmu
>>> driver would configure PRR_CFG_*ADDR based on the reserved mem, and
>>> the interface to drm would just be to enable/disable PRR, returning an
>>> error code if the reserved mem section isn't there.
>>>
>>> This simplifies the interface, and handles the question of how to
>>> detect if PRR is supported.
>>>
>>> BR,
>>> -R
>>>
>>
>> As I checked gpu_prr_mem reserved mem section is not used for mobile
>> targets hence not present for other DT only compute targets like
>> x1e80100.dtsi has the same. PRR looks to be smmu version specific
>> property.
> 
> I only see it in gpu_prr_mem in x1e80100.dtsi, but not documented
> anywhere.  I'm only assuming based on the name that it is intended to
> be for PRR (but not sure why it is larger than 0x1000).  Are the
> PRR_CFG_*ADDR regs programmed by the fw (and access blocked in EL1) on
> this device?
> 

As I checked, if the drm/gfx driver allocates the page for drm, then 
this reserved-memory region is not required.

PRR_CFG_*ADDR regs have read and write access in EL1 only for this 
device, behavior is same as other devices as well. These are not
programmed by fw.

> As far as other DT, we haven't enabled PRR anywhere yet.  I think it
> would be perfectly valid to require a new reserved-memory node to
> enable PRR.
> 

As mentioned above reserved-memory region is not required if gfx/drm
allocates the page.

>> This feature is present in all the targets using SMMU-500,
>> I am still checking for targets using versions prior to smmu-500.
>> Then maybe we can use the smmu compatible string itself (e.g.
>> qcom,smmu-500 && qcom,adreno-smmu) to connect to smmu driver for
>> checking the presence of PRR ?
> 
> If there is a clean break, such as all smmu-500 have PRR and all
> smmu-v2 do not, then it would be reasonable to use the compat strings.
> If not, I think we need a different way.
> 

Yes, from SMMU block perspective PRR bit is present for ACTLR register
on targets with MMU-500, so feature support is available. So I think we 
can just use the mmu-500 compatible string.

Thanks & regards,
Bibek

> BR,
> -R
> 
>> And if the compatible string is different then we can return the
>> error code signifying PRR feature is not supported on the particular
>> smmu version.
>>
>> Thanks & regards,
>> Bibek
>>
>>>> BR,
>>>> -R
>>>>
>>>>> Thanks & regards,
>>>>> Bibek
>>>>>
>>>>>> BR,
>>>>>> -R
>>>>>>
>>>>>>>
>>>>>>> Thanks & regards,
>>>>>>> Bibek
>>>>>>>
>>>>>>>> BR,
>>>>>>>> -R
>>>>>>>>
>>>>>>>>>> Otherwise, lgtm
>>>>>>>>>>
>>>>>>>>>> BR,
>>>>>>>>>> -R
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Thanks & regards,
>>>>>>>>> Bibek
>>>>>>>>>
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>>       #define QCOM_ADRENO_SMMU_GPU_SID 0
>>>>>>>>>>>
>>>>>>>>>>>       static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>>>>>>>>>>> @@ -407,6 +429,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>>>>>>>>              priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>>>>>>>>>>              priv->set_stall = qcom_adreno_smmu_set_stall;
>>>>>>>>>>>              priv->resume_translation = qcom_adreno_smmu_resume_translation;
>>>>>>>>>>> +       priv->set_prr = qcom_adreno_smmu_set_prr;
>>>>>>>>>>>
>>>>>>>>>>>              actlrvar = qsmmu->data->actlrvar;
>>>>>>>>>>>              if (!actlrvar)
>>>>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>>>>>> index d9c2ef8c1653..3076bef49e20 100644
>>>>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>>>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>>>>>>>>>>       #define ARM_SMMU_SCTLR_M               BIT(0)
>>>>>>>>>>>
>>>>>>>>>>>       #define ARM_SMMU_CB_ACTLR              0x4
>>>>>>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
>>>>>>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>>>>>>>>>>>
>>>>>>>>>>>       #define ARM_SMMU_CB_RESUME             0x8
>>>>>>>>>>>       #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
>>>>>>>>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
>>>>>>>>>>> index c637e0997f6d..d6e2ca9f8d8c 100644
>>>>>>>>>>> --- a/include/linux/adreno-smmu-priv.h
>>>>>>>>>>> +++ b/include/linux/adreno-smmu-priv.h
>>>>>>>>>>> @@ -49,7 +49,10 @@ struct adreno_smmu_fault_info {
>>>>>>>>>>>        *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
>>>>>>>>>>>        *                 the GPU driver must call resume_translation()
>>>>>>>>>>>        * @resume_translation: Resume translation after a fault
>>>>>>>>>>> - *
>>>>>>>>>>> + * @set_prr:      Extendible interface to be used by GPU to modify the
>>>>>>>>>>> + *                 ACTLR register bits, currently used to configure
>>>>>>>>>>> + *                 Partially-Resident-Region (PRR) feature's
>>>>>>>>>>> + *                 setup and reset sequence as requested.
>>>>>>>>>>>        *
>>>>>>>>>>>        * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>>>>>>>>>>>        * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
>>>>>>>>>>> @@ -67,6 +70,7 @@ struct adreno_smmu_priv {
>>>>>>>>>>>           void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>>>>>>>>>>>           void (*set_stall)(const void *cookie, bool enabled);
>>>>>>>>>>>           void (*resume_translation)(const void *cookie, bool terminate);
>>>>>>>>>>> +    void (*set_prr)(const void *cookie, phys_addr_t page_addr, bool set);
>>>>>>>>>>>       };
>>>>>>>>>>>
>>>>>>>>>>>       #endif /* __ADRENO_SMMU_PRIV_H */
>>>>>>>>>>> --
>>>>>>>>>>> 2.34.1
>>>>>>>>>>>
Bibek Kumar Patro July 15, 2024, 11:01 a.m. UTC | #8
On 7/11/2024 3:31 AM, Konrad Dybcio wrote:
> On 10.07.2024 7:01 PM, Rob Clark wrote:
>> On Tue, Jul 9, 2024 at 12:43 PM Bibek Kumar Patro
>> <quic_bibekkum@quicinc.com> wrote:
> 
> [...]
> 
>> If there is a clean break, such as all smmu-500 have PRR and all
>> smmu-v2 do not, then it would be reasonable to use the compat strings.
>> If not, I think we need a different way.
> 
> if (a reserved-memory region is passed in dt) {
> 	init_prr()
> }
> 

This would have been a straight forward way to pass as a check for PRR
support, but as I mentioned in the above thread reserved-memory region 
is not required if gfx/drm allocates the page.
So I think compatible string would be more approachable way?

Thanks & regards,
Bibek

> Konrad
Rob Clark July 15, 2024, 8:07 p.m. UTC | #9
On Mon, Jul 15, 2024 at 4:00 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
>
>
> On 7/10/2024 10:31 PM, Rob Clark wrote:
> > On Tue, Jul 9, 2024 at 12:43 PM Bibek Kumar Patro
> > <quic_bibekkum@quicinc.com> wrote:
> >>
> >>
> >>
> >> On 7/4/2024 9:28 PM, Rob Clark wrote:
> >>> On Thu, Jul 4, 2024 at 7:46 AM Rob Clark <robdclark@gmail.com> wrote:
> >>>>
> >>>> On Wed, Jul 3, 2024 at 4:38 AM Bibek Kumar Patro
> >>>> <quic_bibekkum@quicinc.com> wrote:
> >>>>>
> >>>>>
> >>>>>
> >>>>> On 7/2/2024 2:01 AM, Rob Clark wrote:
> >>>>>> On Mon, Jul 1, 2024 at 4:01 AM Bibek Kumar Patro
> >>>>>> <quic_bibekkum@quicinc.com> wrote:
> >>>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>>> On 6/28/2024 9:14 PM, Rob Clark wrote:
> >>>>>>>> On Fri, Jun 28, 2024 at 8:10 AM Bibek Kumar Patro
> >>>>>>>> <quic_bibekkum@quicinc.com> wrote:
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> On 6/28/2024 7:47 PM, Rob Clark wrote:
> >>>>>>>>>> On Fri, Jun 28, 2024 at 7:05 AM Bibek Kumar Patro
> >>>>>>>>>> <quic_bibekkum@quicinc.com> wrote:
> >>>>>>>>>>>
> >>>>>>>>>>> Add an adreno-smmu-priv interface for drm/msm to call
> >>>>>>>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
> >>>>>>>>>>> sequence as per request.
> >>>>>>>>>>>
> >>>>>>>>>>> This will be used by GPU to setup the PRR bit and related
> >>>>>>>>>>> configuration registers through adreno-smmu private
> >>>>>>>>>>> interface instead of directly poking the smmu hardware.
> >>>>>>>>>>>
> >>>>>>>>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
> >>>>>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> >>>>>>>>>>> ---
> >>>>>>>>>>>       drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 23 ++++++++++++++++++++++
> >>>>>>>>>>>       drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
> >>>>>>>>>>>       include/linux/adreno-smmu-priv.h           |  6 +++++-
> >>>>>>>>>>>       3 files changed, 30 insertions(+), 1 deletion(-)
> >>>>>>>>>>>
> >>>>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>>>>>> index bd101a161d04..64571a1c47b8 100644
> >>>>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>>>>>> @@ -28,6 +28,7 @@
> >>>>>>>>>>>       #define PREFETCH_SHALLOW       (1 << PREFETCH_SHIFT)
> >>>>>>>>>>>       #define PREFETCH_MODERATE      (2 << PREFETCH_SHIFT)
> >>>>>>>>>>>       #define PREFETCH_DEEP          (3 << PREFETCH_SHIFT)
> >>>>>>>>>>> +#define GFX_ACTLR_PRR          (1 << 5)
> >>>>>>>>>>>
> >>>>>>>>>>>       static const struct actlr_config sc7280_apps_actlr_cfg[] = {
> >>>>>>>>>>>              { 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
> >>>>>>>>>>> @@ -235,6 +236,27 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
> >>>>>>>>>>>              arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
> >>>>>>>>>>>       }
> >>>>>>>>>>>
> >>>>>>>>>>> +static void qcom_adreno_smmu_set_prr(const void *cookie, phys_addr_t page_addr, bool set)
> >>>>>>>>>>> +{
> >>>>>>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >>>>>>>>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> >>>>>>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>>>>>>>>> +       u32 reg = 0;
> >>>>>>>>>>> +
> >>>>>>>>>>> +       writel_relaxed(lower_32_bits(page_addr),
> >>>>>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> >>>>>>>>>>> +
> >>>>>>>>>>> +       writel_relaxed(upper_32_bits(page_addr),
> >>>>>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> >>>>>>>>>>> +
> >>>>>>>>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> >>>>>>>>>>> +       reg &= ~GFX_ACTLR_PRR;
> >>>>>>>>>>> +       if (set)
> >>>>>>>>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> >>>>>>>>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> >>>>>>>>>>> +
> >>>>>>>>>>
> >>>>>>>>>> nit, extra line
> >>>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> Ack, will remove this. Thanks for pointing out.
> >>>>>>>>>
> >>>>>>>>>> Also, if you passed a `struct page *` instead, then you could drop the
> >>>>>>>>>> bool param, ie. passing NULL for the page would disable PRR.  But I
> >>>>>>>>>> can go either way if others have a strong preference for phys_addr_t.
> >>>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> Oh okay, this looks simple to reset the prr bit.
> >>>>>>>>> But since this page is allocated and is used inside gfx driver
> >>>>>>>>> before being utilized for prr bit operation, would it be safe for
> >>>>>>>>> drm/gfx driver to keep a reference to this page in smmu driver?
> >>>>>>>>>
> >>>>>>>>> Since we only need the page address for configuring the
> >>>>>>>>> CFG_UADDR/CFG_LADDR registers so passed the phys_addr_t.
> >>>>>>>>
> >>>>>>>> I don't think the smmu driver needs to keep a reference to the page..
> >>>>>>>> we can just say it is the responsibility of the drm driver to call
> >>>>>>>> set_prr(NULL) before freeing the page
> >>>>>>>>
> >>>>>>>
> >>>>>>> That makes sense. If we go by this NULL page method to disable the PRR,
> >>>>>>> we would have to set the address registers to reset value as well.
> >>>>>>>
> >>>>>>> The sequence would be like the following as per my understaning:
> >>>>>>> - Check if it's NULL page
> >>>>>>> - Set the PRR_CFG_UADDR/PRR_CFG_LADDR to reset values i.e - 0x0 for
> >>>>>>>       these registers
> >>>>>>> - Reset the PRR bit in actlr register
> >>>>>>>
> >>>>>>> Similar to this snippet:
> >>>>>>>
> >>>>>>> #PRR_RESET_ADDR 0x0
> >>>>>>>
> >>>>>>> --------------
> >>>>>>> reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> >>>>>>> reg &= ~GFX_ACTLR_PRR;
> >>>>>>> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> >>>>>>>
> >>>>>>> if (!prr_page) {
> >>>>>>>            writel_relaxed(PRR_RESET_ADDR,
> >>>>>>>                            smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> >>>>>>>            writel_relaxed(PRR_RESET_ADDR),
> >>>>>>>                             smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> >>>>>>>            return;
> >>>>>>> }
> >>>>>>>
> >>>>>>>
> >>>>>>> writel_relaxed(lower_32_bits(page_to_phys(prr_page)),
> >>>>>>>                    smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> >>>>>>>
> >>>>>>> writel_relaxed(upper_32_bits(page_to_phys(prr_page)),
> >>>>>>>                    smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> >>>>>>>
> >>>>>>> reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> >>>>>>> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> >>>>>>> -----------------
> >>>>>>>
> >>>>>>> If looks good, will implement the same in next version.
> >>>>>>
> >>>>>> yeah, that looks like it could work..
> >>>>>>
> >>>>>> you probably don't need to zero out the PRR_CFG_*ADDR when disabling,
> >>>>>> and probably could avoid double writing ACTLR, but that is getting
> >>>>>> into bikeshedding
> >>>>>>
> >>>>>
> >>>>> Actually Rob, since you rightly pointed this out.
> >>>>> I crosschecked again on these registers.
> >>>>> PRR_CFG_*ADDR is a global register in SMMU space but
> >>>>> ACTLR register including PRR bit is a per-domain register.
> >>>>> There might also be a situation where PRR feature need to be
> >>>>> disabled or enabled separately for each domain.
> >>>>> So I think it would be cleaner to have two apis, set_prr_addr(),
> >>>>> set_prr_bit().
> >>>>> set_prr_addr() will be used only to set this PRR_CFG_*ADDR
> >>>>> register by passing a 'struct page *'
> >>>>> set_prr_bit() will be used as a switch for PRR feature,
> >>>>> where required smmu_domain will be passed along with
> >>>>> the bool value to set/reset the PRR bit depending on which this
> >>>>> feature will be enabled/disabled for the selected domain.
> >>>>
> >>>> on a related note, adreno has been using arm-smmu for a number of
> >>>> generations, I guess not all support PRR?  The drm driver will need to
> >>>> know whether PRR is supported (and expose that to userspace to let the
> >>>> UMD know whether to expose certain extensions).  How should this work?
> >>>
> >>> So, I noticed in the x1e80100.dtsi that there is a gpu_prr_mem
> >>> reserved section..  maybe we should be connecting this to the smmu
> >>> driver in dt, and using that to detect presence of PRR?  Ie. the smmu
> >>> driver would configure PRR_CFG_*ADDR based on the reserved mem, and
> >>> the interface to drm would just be to enable/disable PRR, returning an
> >>> error code if the reserved mem section isn't there.
> >>>
> >>> This simplifies the interface, and handles the question of how to
> >>> detect if PRR is supported.
> >>>
> >>> BR,
> >>> -R
> >>>
> >>
> >> As I checked gpu_prr_mem reserved mem section is not used for mobile
> >> targets hence not present for other DT only compute targets like
> >> x1e80100.dtsi has the same. PRR looks to be smmu version specific
> >> property.
> >
> > I only see it in gpu_prr_mem in x1e80100.dtsi, but not documented
> > anywhere.  I'm only assuming based on the name that it is intended to
> > be for PRR (but not sure why it is larger than 0x1000).  Are the
> > PRR_CFG_*ADDR regs programmed by the fw (and access blocked in EL1) on
> > this device?
> >
>
> As I checked, if the drm/gfx driver allocates the page for drm, then
> this reserved-memory region is not required.
>
> PRR_CFG_*ADDR regs have read and write access in EL1 only for this
> device, behavior is same as other devices as well. These are not
> programmed by fw.

If there is any device which _doesn't_ have EL1 access to these regs,
I think going the reserved memory route seems more future proof?
Otherwise we later on have to deal with two different ways to do
things.  But I'm not sure if there is any such device or risk.

BR,
-R

> > As far as other DT, we haven't enabled PRR anywhere yet.  I think it
> > would be perfectly valid to require a new reserved-memory node to
> > enable PRR.
> >
>
> As mentioned above reserved-memory region is not required if gfx/drm
> allocates the page.
>
> >> This feature is present in all the targets using SMMU-500,
> >> I am still checking for targets using versions prior to smmu-500.
> >> Then maybe we can use the smmu compatible string itself (e.g.
> >> qcom,smmu-500 && qcom,adreno-smmu) to connect to smmu driver for
> >> checking the presence of PRR ?
> >
> > If there is a clean break, such as all smmu-500 have PRR and all
> > smmu-v2 do not, then it would be reasonable to use the compat strings.
> > If not, I think we need a different way.
> >
>
> Yes, from SMMU block perspective PRR bit is present for ACTLR register
> on targets with MMU-500, so feature support is available. So I think we
> can just use the mmu-500 compatible string.
>
> Thanks & regards,
> Bibek
>
> > BR,
> > -R
> >
> >> And if the compatible string is different then we can return the
> >> error code signifying PRR feature is not supported on the particular
> >> smmu version.
> >>
> >> Thanks & regards,
> >> Bibek
> >>
> >>>> BR,
> >>>> -R
> >>>>
> >>>>> Thanks & regards,
> >>>>> Bibek
> >>>>>
> >>>>>> BR,
> >>>>>> -R
> >>>>>>
> >>>>>>>
> >>>>>>> Thanks & regards,
> >>>>>>> Bibek
> >>>>>>>
> >>>>>>>> BR,
> >>>>>>>> -R
> >>>>>>>>
> >>>>>>>>>> Otherwise, lgtm
> >>>>>>>>>>
> >>>>>>>>>> BR,
> >>>>>>>>>> -R
> >>>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> Thanks & regards,
> >>>>>>>>> Bibek
> >>>>>>>>>
> >>>>>>>>>>> +}
> >>>>>>>>>>> +
> >>>>>>>>>>>       #define QCOM_ADRENO_SMMU_GPU_SID 0
> >>>>>>>>>>>
> >>>>>>>>>>>       static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> >>>>>>>>>>> @@ -407,6 +429,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>>>>>>>>>>              priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
> >>>>>>>>>>>              priv->set_stall = qcom_adreno_smmu_set_stall;
> >>>>>>>>>>>              priv->resume_translation = qcom_adreno_smmu_resume_translation;
> >>>>>>>>>>> +       priv->set_prr = qcom_adreno_smmu_set_prr;
> >>>>>>>>>>>
> >>>>>>>>>>>              actlrvar = qsmmu->data->actlrvar;
> >>>>>>>>>>>              if (!actlrvar)
> >>>>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>>>>>>>>> index d9c2ef8c1653..3076bef49e20 100644
> >>>>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>>>>>>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
> >>>>>>>>>>>       #define ARM_SMMU_SCTLR_M               BIT(0)
> >>>>>>>>>>>
> >>>>>>>>>>>       #define ARM_SMMU_CB_ACTLR              0x4
> >>>>>>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> >>>>>>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
> >>>>>>>>>>>
> >>>>>>>>>>>       #define ARM_SMMU_CB_RESUME             0x8
> >>>>>>>>>>>       #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> >>>>>>>>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> >>>>>>>>>>> index c637e0997f6d..d6e2ca9f8d8c 100644
> >>>>>>>>>>> --- a/include/linux/adreno-smmu-priv.h
> >>>>>>>>>>> +++ b/include/linux/adreno-smmu-priv.h
> >>>>>>>>>>> @@ -49,7 +49,10 @@ struct adreno_smmu_fault_info {
> >>>>>>>>>>>        *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
> >>>>>>>>>>>        *                 the GPU driver must call resume_translation()
> >>>>>>>>>>>        * @resume_translation: Resume translation after a fault
> >>>>>>>>>>> - *
> >>>>>>>>>>> + * @set_prr:      Extendible interface to be used by GPU to modify the
> >>>>>>>>>>> + *                 ACTLR register bits, currently used to configure
> >>>>>>>>>>> + *                 Partially-Resident-Region (PRR) feature's
> >>>>>>>>>>> + *                 setup and reset sequence as requested.
> >>>>>>>>>>>        *
> >>>>>>>>>>>        * The GPU driver (drm/msm) and adreno-smmu work together for controlling
> >>>>>>>>>>>        * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> >>>>>>>>>>> @@ -67,6 +70,7 @@ struct adreno_smmu_priv {
> >>>>>>>>>>>           void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
> >>>>>>>>>>>           void (*set_stall)(const void *cookie, bool enabled);
> >>>>>>>>>>>           void (*resume_translation)(const void *cookie, bool terminate);
> >>>>>>>>>>> +    void (*set_prr)(const void *cookie, phys_addr_t page_addr, bool set);
> >>>>>>>>>>>       };
> >>>>>>>>>>>
> >>>>>>>>>>>       #endif /* __ADRENO_SMMU_PRIV_H */
> >>>>>>>>>>> --
> >>>>>>>>>>> 2.34.1
> >>>>>>>>>>>
Bibek Kumar Patro July 19, 2024, 12:38 p.m. UTC | #10
On 7/17/2024 8:30 PM, Rob Clark wrote:
> On Wed, Jul 17, 2024 at 3:27 AM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>>
>>
>> On 7/16/2024 1:37 AM, Rob Clark wrote:
>>> On Mon, Jul 15, 2024 at 4:00 AM Bibek Kumar Patro
>>> <quic_bibekkum@quicinc.com> wrote:
>>>>
>>>>
>>>>
>>>> On 7/10/2024 10:31 PM, Rob Clark wrote:
>>>>> On Tue, Jul 9, 2024 at 12:43 PM Bibek Kumar Patro
>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 7/4/2024 9:28 PM, Rob Clark wrote:
>>>>>>> On Thu, Jul 4, 2024 at 7:46 AM Rob Clark <robdclark@gmail.com> wrote:
>>>>>>>>
>>>>>>>> On Wed, Jul 3, 2024 at 4:38 AM Bibek Kumar Patro
>>>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 7/2/2024 2:01 AM, Rob Clark wrote:
>>>>>>>>>> On Mon, Jul 1, 2024 at 4:01 AM Bibek Kumar Patro
>>>>>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> On 6/28/2024 9:14 PM, Rob Clark wrote:
>>>>>>>>>>>> On Fri, Jun 28, 2024 at 8:10 AM Bibek Kumar Patro
>>>>>>>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> On 6/28/2024 7:47 PM, Rob Clark wrote:
>>>>>>>>>>>>>> On Fri, Jun 28, 2024 at 7:05 AM Bibek Kumar Patro
>>>>>>>>>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Add an adreno-smmu-priv interface for drm/msm to call
>>>>>>>>>>>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>>>>>>>>>>>>>> sequence as per request.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> This will be used by GPU to setup the PRR bit and related
>>>>>>>>>>>>>>> configuration registers through adreno-smmu private
>>>>>>>>>>>>>>> interface instead of directly poking the smmu hardware.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
>>>>>>>>>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>>         drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 23 ++++++++++++++++++++++
>>>>>>>>>>>>>>>         drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>>>>>>>>>>>>>         include/linux/adreno-smmu-priv.h           |  6 +++++-
>>>>>>>>>>>>>>>         3 files changed, 30 insertions(+), 1 deletion(-)
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>>>>>>>> index bd101a161d04..64571a1c47b8 100644
>>>>>>>>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>>>>>>>> @@ -28,6 +28,7 @@
>>>>>>>>>>>>>>>         #define PREFETCH_SHALLOW       (1 << PREFETCH_SHIFT)
>>>>>>>>>>>>>>>         #define PREFETCH_MODERATE      (2 << PREFETCH_SHIFT)
>>>>>>>>>>>>>>>         #define PREFETCH_DEEP          (3 << PREFETCH_SHIFT)
>>>>>>>>>>>>>>> +#define GFX_ACTLR_PRR          (1 << 5)
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>         static const struct actlr_config sc7280_apps_actlr_cfg[] = {
>>>>>>>>>>>>>>>                { 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
>>>>>>>>>>>>>>> @@ -235,6 +236,27 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>>>>>>>>>>>>>                arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>>>>>>>>>>>>>         }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> +static void qcom_adreno_smmu_set_prr(const void *cookie, phys_addr_t page_addr, bool set)
>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>>>>>>>>>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>>>>>>>>>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>>>>>>>>>>> +       u32 reg = 0;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +       writel_relaxed(lower_32_bits(page_addr),
>>>>>>>>>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +       writel_relaxed(upper_32_bits(page_addr),
>>>>>>>>>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>>>>>>>>>>>>> +       reg &= ~GFX_ACTLR_PRR;
>>>>>>>>>>>>>>> +       if (set)
>>>>>>>>>>>>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>>>>>>>>>>>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> nit, extra line
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Ack, will remove this. Thanks for pointing out.
>>>>>>>>>>>>>
>>>>>>>>>>>>>> Also, if you passed a `struct page *` instead, then you could drop the
>>>>>>>>>>>>>> bool param, ie. passing NULL for the page would disable PRR.  But I
>>>>>>>>>>>>>> can go either way if others have a strong preference for phys_addr_t.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Oh okay, this looks simple to reset the prr bit.
>>>>>>>>>>>>> But since this page is allocated and is used inside gfx driver
>>>>>>>>>>>>> before being utilized for prr bit operation, would it be safe for
>>>>>>>>>>>>> drm/gfx driver to keep a reference to this page in smmu driver?
>>>>>>>>>>>>>
>>>>>>>>>>>>> Since we only need the page address for configuring the
>>>>>>>>>>>>> CFG_UADDR/CFG_LADDR registers so passed the phys_addr_t.
>>>>>>>>>>>>
>>>>>>>>>>>> I don't think the smmu driver needs to keep a reference to the page..
>>>>>>>>>>>> we can just say it is the responsibility of the drm driver to call
>>>>>>>>>>>> set_prr(NULL) before freeing the page
>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> That makes sense. If we go by this NULL page method to disable the PRR,
>>>>>>>>>>> we would have to set the address registers to reset value as well.
>>>>>>>>>>>
>>>>>>>>>>> The sequence would be like the following as per my understaning:
>>>>>>>>>>> - Check if it's NULL page
>>>>>>>>>>> - Set the PRR_CFG_UADDR/PRR_CFG_LADDR to reset values i.e - 0x0 for
>>>>>>>>>>>         these registers
>>>>>>>>>>> - Reset the PRR bit in actlr register
>>>>>>>>>>>
>>>>>>>>>>> Similar to this snippet:
>>>>>>>>>>>
>>>>>>>>>>> #PRR_RESET_ADDR 0x0
>>>>>>>>>>>
>>>>>>>>>>> --------------
>>>>>>>>>>> reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>>>>>>>>> reg &= ~GFX_ACTLR_PRR;
>>>>>>>>>>> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>>>>>>>>
>>>>>>>>>>> if (!prr_page) {
>>>>>>>>>>>              writel_relaxed(PRR_RESET_ADDR,
>>>>>>>>>>>                              smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>>>>>>>>              writel_relaxed(PRR_RESET_ADDR),
>>>>>>>>>>>                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>>>>>>>>              return;
>>>>>>>>>>> }
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> writel_relaxed(lower_32_bits(page_to_phys(prr_page)),
>>>>>>>>>>>                      smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>>>>>>>>
>>>>>>>>>>> writel_relaxed(upper_32_bits(page_to_phys(prr_page)),
>>>>>>>>>>>                      smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>>>>>>>>
>>>>>>>>>>> reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>>>>>>>>>> arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>>>>>>>> -----------------
>>>>>>>>>>>
>>>>>>>>>>> If looks good, will implement the same in next version.
>>>>>>>>>>
>>>>>>>>>> yeah, that looks like it could work..
>>>>>>>>>>
>>>>>>>>>> you probably don't need to zero out the PRR_CFG_*ADDR when disabling,
>>>>>>>>>> and probably could avoid double writing ACTLR, but that is getting
>>>>>>>>>> into bikeshedding
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Actually Rob, since you rightly pointed this out.
>>>>>>>>> I crosschecked again on these registers.
>>>>>>>>> PRR_CFG_*ADDR is a global register in SMMU space but
>>>>>>>>> ACTLR register including PRR bit is a per-domain register.
>>>>>>>>> There might also be a situation where PRR feature need to be
>>>>>>>>> disabled or enabled separately for each domain.
>>>>>>>>> So I think it would be cleaner to have two apis, set_prr_addr(),
>>>>>>>>> set_prr_bit().
>>>>>>>>> set_prr_addr() will be used only to set this PRR_CFG_*ADDR
>>>>>>>>> register by passing a 'struct page *'
>>>>>>>>> set_prr_bit() will be used as a switch for PRR feature,
>>>>>>>>> where required smmu_domain will be passed along with
>>>>>>>>> the bool value to set/reset the PRR bit depending on which this
>>>>>>>>> feature will be enabled/disabled for the selected domain.
>>>>>>>>
>>>>>>>> on a related note, adreno has been using arm-smmu for a number of
>>>>>>>> generations, I guess not all support PRR?  The drm driver will need to
>>>>>>>> know whether PRR is supported (and expose that to userspace to let the
>>>>>>>> UMD know whether to expose certain extensions).  How should this work?
>>>>>>>
>>>>>>> So, I noticed in the x1e80100.dtsi that there is a gpu_prr_mem
>>>>>>> reserved section..  maybe we should be connecting this to the smmu
>>>>>>> driver in dt, and using that to detect presence of PRR?  Ie. the smmu
>>>>>>> driver would configure PRR_CFG_*ADDR based on the reserved mem, and
>>>>>>> the interface to drm would just be to enable/disable PRR, returning an
>>>>>>> error code if the reserved mem section isn't there.
>>>>>>>
>>>>>>> This simplifies the interface, and handles the question of how to
>>>>>>> detect if PRR is supported.
>>>>>>>
>>>>>>> BR,
>>>>>>> -R
>>>>>>>
>>>>>>
>>>>>> As I checked gpu_prr_mem reserved mem section is not used for mobile
>>>>>> targets hence not present for other DT only compute targets like
>>>>>> x1e80100.dtsi has the same. PRR looks to be smmu version specific
>>>>>> property.
>>>>>
>>>>> I only see it in gpu_prr_mem in x1e80100.dtsi, but not documented
>>>>> anywhere.  I'm only assuming based on the name that it is intended to
>>>>> be for PRR (but not sure why it is larger than 0x1000).  Are the
>>>>> PRR_CFG_*ADDR regs programmed by the fw (and access blocked in EL1) on
>>>>> this device?
>>>>>
>>>>
>>>> As I checked, if the drm/gfx driver allocates the page for drm, then
>>>> this reserved-memory region is not required.
>>>>
>>>> PRR_CFG_*ADDR regs have read and write access in EL1 only for this
>>>> device, behavior is same as other devices as well. These are not
>>>> programmed by fw.
>>>
>>> If there is any device which _doesn't_ have EL1 access to these regs,
>>> I think going the reserved memory route seems more future proof > Otherwise we later on have to deal with two different ways to do
>>> things.  But I'm not sure if there is any such device or risk.
>>>
>>
>> PRR is a bit in ACTLR register which is in SMMU space,
>> so is the PRR_CFG_*ADDR registers - with EL1 having access
>> to both the registers in all targets released till now with MMU-500.
>> It's unlikely that this design would change in future
>> for MMU-500 based targets, so I feel this risk is somewhat negligible.
> 
> I wasn't worried about the ACTLR register, but the PRR_CFG_*ADDR regs ;-)
> 
> IIRC those were in the SMMU global space, why hyp tends to like to own.
> 

Yes correct, those are in SMMU global space which hyp likes to own, and
read/write access are avaialble by design for EL1.

>> Also would the reserved memory route look a bit hackish?
>> Because, since this reserved-memory node is not used when page is
>> allocated through drm - so it might turn out to be redundant.
>> If we are aiming for a device-tree handle/node for reference then I
>> think a better way would be to create a bool parameter inside smmu-node
>> indicating presence of PRR ?
> 
> tbh, I don't think there is anything better or worse about having the
> reserved-memory node vs dynamically allocating it.  (If we dynamically
> allocate, we should remove the reserved memory node from
> x1e80100.dtsi)
> 

Right, if the page is dynamically allocated then this reserved memory 
node should be removed as it will stay as an unused region in DDR acting 
as a deadweight. I checked the history on this region for x1e80100 and 
it went as a part of platform upstreaming - we can remove the same.

> The thing I was more concerned about was whether there was any chance
> that some existing or future SoC+fw combo _relied_ on a reserved
> memory node and the fw programming PRR_CFG_*ADDR. If there was any > chance of that, and we went the dynamic allocation route, then we'd
> have some devices with a reserved memory node, and some without.  That
> seems a bit ugly to me.

Agree on the concern, that would surely look a bit ugly - with different 
targets having these allocation differently and having this 
reserved-memory node

But as far as I checked the targets present with MMU-500, none of them 
is relying on reserved-memory node yet.
And the chances are negligible in future targets as well.
Since FW is not setting up these registers - so this reserved region
should not be considered as per me.
Also since in our api we are only taking the address of the prr page to 
setup these registers, so even if client chooses any page allocation 
method <not surely from firmware as per design> then the same api 
smmu-driver is exposing would still work indentical.


> If there is no chance of this, then we can go either route.
> 
>> Personally,I feel since the PRR enablement mechanism is same for all
>> MMU-500 targets - compat string would be a robust approach.
> 
> I guess if it is all mmu-500, then we can just pick based on compat
> string.  If it turns out some subset of smmu-v2 have PRR, we can just
> have a list of compat strings in arm-smmu-qcom.c.. there would only be
> a finite # of them ;-)
> 

Yes right, atleast one thing which we can conclude is - in
MMU-500 version the PRR enablement steps are same from SMMU block and
driver perspective <i.e configure the PRR_CFG_*ADDR regs and set PRR bit
in ACTLR>. So all targets on MMU-500 - we would do the same steps.
Hence under MMU-500 compat string we can keep this.
Agree on the smmu-v2 part as well.


Thanks & regards,
Bibek

> BR,
> -R
Bibek Kumar Patro July 19, 2024, 12:59 p.m. UTC | #11
On 7/16/2024 5:44 PM, Konrad Dybcio wrote:
> On 15.07.2024 10:07 PM, Rob Clark wrote:
>> On Mon, Jul 15, 2024 at 4:00 AM Bibek Kumar Patro
>> <quic_bibekkum@quicinc.com> wrote:
> 
> [...]
> 
>>>>> As I checked gpu_prr_mem reserved mem section is not used for mobile
>>>>> targets hence not present for other DT only compute targets like
>>>>> x1e80100.dtsi has the same. PRR looks to be smmu version specific
>>>>> property.
>>>>
>>>> I only see it in gpu_prr_mem in x1e80100.dtsi, but not documented
>>>> anywhere.  I'm only assuming based on the name that it is intended to
>>>> be for PRR (but not sure why it is larger than 0x1000).  Are the
>>>> PRR_CFG_*ADDR regs programmed by the fw (and access blocked in EL1) on
>>>> this device?
>>>>
>>>
>>> As I checked, if the drm/gfx driver allocates the page for drm, then
>>> this reserved-memory region is not required.
>>>
>>> PRR_CFG_*ADDR regs have read and write access in EL1 only for this
>>> device, behavior is same as other devices as well. These are not
>>> programmed by fw.
>>
>> If there is any device which _doesn't_ have EL1 access to these regs,
>> I think going the reserved memory route seems more future proof?
>> Otherwise we later on have to deal with two different ways to do
>> things.  But I'm not sure if there is any such device or risk.
> 
> We can have our cake and eat it too, if we keep the check for a
> reserved memory node handle, but make it a dynamic allocation (see
> [1] for example), this way there's a way to opt into using this from
> the DT and there's no need for adding more properties
> 

Coming to allocation methodology, here the client/user(drm/gpu
driver in this case) would be allocating the page and pass the address 
to our SMMU interface which would just configure it in the PRR_CFG_*ADDR 
register.
So I believe the choice of allocation method should be better left to 
the client, rather than restricting them to a specific strategy. (?)
If dynamically allocated without a reserved pool, this region might only 
be present in some targets and not others, which could make it look a 
bit ugly [1].

Thanks & regards,
Bibek

[1]:https://lore.kernel.org/all/CAF6AEGvZWdN+CC9O3tq7kjYPq424U6__KgAnFNCV0bCqE8wPuQ@mail.gmail.com/#:~:text=That%0Aseems%20a%20bit%20ugly%20to%20me.

> Konrad
> 
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/boot/dts/qcom/msm8916.dtsi?h=v6.10#n109
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index bd101a161d04..64571a1c47b8 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -28,6 +28,7 @@ 
 #define PREFETCH_SHALLOW	(1 << PREFETCH_SHIFT)
 #define PREFETCH_MODERATE	(2 << PREFETCH_SHIFT)
 #define PREFETCH_DEEP		(3 << PREFETCH_SHIFT)
+#define GFX_ACTLR_PRR		(1 << 5)

 static const struct actlr_config sc7280_apps_actlr_cfg[] = {
 	{ 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
@@ -235,6 +236,27 @@  static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
 	arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
 }

+static void qcom_adreno_smmu_set_prr(const void *cookie, phys_addr_t page_addr, bool set)
+{
+	struct arm_smmu_domain *smmu_domain = (void *)cookie;
+	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	u32 reg = 0;
+
+	writel_relaxed(lower_32_bits(page_addr),
+				smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
+
+	writel_relaxed(upper_32_bits(page_addr),
+				smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
+
+	reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
+	reg &= ~GFX_ACTLR_PRR;
+	if (set)
+		reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
+	arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
+
+}
+
 #define QCOM_ADRENO_SMMU_GPU_SID 0

 static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
@@ -407,6 +429,7 @@  static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
 	priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
 	priv->set_stall = qcom_adreno_smmu_set_stall;
 	priv->resume_translation = qcom_adreno_smmu_resume_translation;
+	priv->set_prr = qcom_adreno_smmu_set_prr;

 	actlrvar = qsmmu->data->actlrvar;
 	if (!actlrvar)
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index d9c2ef8c1653..3076bef49e20 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -154,6 +154,8 @@  enum arm_smmu_cbar_type {
 #define ARM_SMMU_SCTLR_M		BIT(0)

 #define ARM_SMMU_CB_ACTLR		0x4
+#define ARM_SMMU_GFX_PRR_CFG_LADDR	0x6008
+#define ARM_SMMU_GFX_PRR_CFG_UADDR	0x600C

 #define ARM_SMMU_CB_RESUME		0x8
 #define ARM_SMMU_RESUME_TERMINATE	BIT(0)
diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
index c637e0997f6d..d6e2ca9f8d8c 100644
--- a/include/linux/adreno-smmu-priv.h
+++ b/include/linux/adreno-smmu-priv.h
@@ -49,7 +49,10 @@  struct adreno_smmu_fault_info {
  *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
  *                 the GPU driver must call resume_translation()
  * @resume_translation: Resume translation after a fault
- *
+ * @set_prr:	   Extendible interface to be used by GPU to modify the
+ *		    ACTLR register bits, currently used to configure
+ *		    Partially-Resident-Region (PRR) feature's
+ *		    setup and reset sequence as requested.
  *
  * The GPU driver (drm/msm) and adreno-smmu work together for controlling
  * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
@@ -67,6 +70,7 @@  struct adreno_smmu_priv {
     void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
     void (*set_stall)(const void *cookie, bool enabled);
     void (*resume_translation)(const void *cookie, bool terminate);
+    void (*set_prr)(const void *cookie, phys_addr_t page_addr, bool set);
 };

 #endif /* __ADRENO_SMMU_PRIV_H */