diff mbox series

[v1,1/3] PCI: qcom: Enable cache coherency for SA8775P RC

Message ID 1698767186-5046-2-git-send-email-quic_msarkar@quicinc.com
State Superseded
Headers show
Series arm64: qcom: sa8775p: add cache coherency support for SA8775P | expand

Commit Message

Mrinmay Sarkar Oct. 31, 2023, 3:46 p.m. UTC
This change will enable cache snooping logic to support
cache coherency for SA8755P RC platform.

Signed-off-by: Mrinmay Sarkar <quic_msarkar@quicinc.com>
---
 drivers/pci/controller/dwc/pcie-qcom.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

Comments

Mrinmay Sarkar Nov. 2, 2023, 10:16 a.m. UTC | #1
On 10/31/2023 10:20 PM, Konrad Dybcio wrote:
> On 31.10.2023 16:46, Mrinmay Sarkar wrote:
>> This change will enable cache snooping logic to support
>> cache coherency for SA8755P RC platform.
> 8775
>
>> Signed-off-by: Mrinmay Sarkar <quic_msarkar@quicinc.com>
>> ---
>>   drivers/pci/controller/dwc/pcie-qcom.c | 11 +++++++++++
>>   1 file changed, 11 insertions(+)
>>
>> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
>> index 6902e97..6f240fc 100644
>> --- a/drivers/pci/controller/dwc/pcie-qcom.c
>> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
>> @@ -51,6 +51,7 @@
>>   #define PARF_SID_OFFSET				0x234
>>   #define PARF_BDF_TRANSLATE_CFG			0x24c
>>   #define PARF_SLV_ADDR_SPACE_SIZE		0x358
>> +#define PCIE_PARF_NO_SNOOP_OVERIDE		0x3d4
>>   #define PARF_DEVICE_TYPE			0x1000
>>   #define PARF_BDF_TO_SID_TABLE_N			0x2000
>>   
>> @@ -117,6 +118,9 @@
>>   /* PARF_LTSSM register fields */
>>   #define LTSSM_EN				BIT(8)
>>   
>> +/* PARF_NO_SNOOP_OVERIDE register value */
> override
>> +#define NO_SNOOP_OVERIDE_EN			0xa
> is this actually some magic value and not BIT(1) | BIT(3)?
we need to set 1st and 3rd bit. yes, we can use BIT(1) | BIT(3).
>
>>   /* PARF_DEVICE_TYPE register fields */
>>   #define DEVICE_TYPE_RC				0x4
>>   
>> @@ -961,6 +965,13 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
>>   
>>   static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie)
>>   {
>> +	struct dw_pcie *pci = pcie->pci;
>> +	struct device *dev = pci->dev;
>> +
>> +	/* Enable cache snooping for SA8775P */
>> +	if (of_device_is_compatible(dev->of_node, "qcom,pcie-sa8775p"))
>> +		writel(NO_SNOOP_OVERIDE_EN, pcie->parf + PCIE_PARF_NO_SNOOP_OVERIDE);
> Why only for 8775 and not for other v2.7, or perhaps all other
> revisions?
yes this is only required for 8775 due to hw requirement we need to enable
cache snooping from the register level for 8775.
> Konrad
Thanks,
Mrinmay
Dmitry Baryshkov Nov. 2, 2023, 3:34 p.m. UTC | #2
On Tue, 31 Oct 2023 at 17:46, Mrinmay Sarkar <quic_msarkar@quicinc.com> wrote:
>
> This change will enable cache snooping logic to support
> cache coherency for SA8755P RC platform.
>
> Signed-off-by: Mrinmay Sarkar <quic_msarkar@quicinc.com>
> ---
>  drivers/pci/controller/dwc/pcie-qcom.c | 11 +++++++++++
>  1 file changed, 11 insertions(+)
>
> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> index 6902e97..6f240fc 100644
> --- a/drivers/pci/controller/dwc/pcie-qcom.c
> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
> @@ -51,6 +51,7 @@
>  #define PARF_SID_OFFSET                                0x234
>  #define PARF_BDF_TRANSLATE_CFG                 0x24c
>  #define PARF_SLV_ADDR_SPACE_SIZE               0x358
> +#define PCIE_PARF_NO_SNOOP_OVERIDE             0x3d4
>  #define PARF_DEVICE_TYPE                       0x1000
>  #define PARF_BDF_TO_SID_TABLE_N                        0x2000
>
> @@ -117,6 +118,9 @@
>  /* PARF_LTSSM register fields */
>  #define LTSSM_EN                               BIT(8)
>
> +/* PARF_NO_SNOOP_OVERIDE register value */
> +#define NO_SNOOP_OVERIDE_EN                    0xa
> +
>  /* PARF_DEVICE_TYPE register fields */
>  #define DEVICE_TYPE_RC                         0x4
>
> @@ -961,6 +965,13 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
>
>  static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie)
>  {
> +       struct dw_pcie *pci = pcie->pci;
> +       struct device *dev = pci->dev;
> +
> +       /* Enable cache snooping for SA8775P */
> +       if (of_device_is_compatible(dev->of_node, "qcom,pcie-sa8775p"))

Obviously: please populate a flag in the data structures instead of
doing of_device_is_compatible(). Same applies to the patch 2.

> +               writel(NO_SNOOP_OVERIDE_EN, pcie->parf + PCIE_PARF_NO_SNOOP_OVERIDE);
> +
>         qcom_pcie_clear_hpc(pcie->pci);
>
>         return 0;
Manivannan Sadhasivam Nov. 2, 2023, 4:36 p.m. UTC | #3
On Thu, Nov 02, 2023 at 05:34:24PM +0200, Dmitry Baryshkov wrote:
> On Tue, 31 Oct 2023 at 17:46, Mrinmay Sarkar <quic_msarkar@quicinc.com> wrote:
> >
> > This change will enable cache snooping logic to support
> > cache coherency for SA8755P RC platform.
> >
> > Signed-off-by: Mrinmay Sarkar <quic_msarkar@quicinc.com>
> > ---
> >  drivers/pci/controller/dwc/pcie-qcom.c | 11 +++++++++++
> >  1 file changed, 11 insertions(+)
> >
> > diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> > index 6902e97..6f240fc 100644
> > --- a/drivers/pci/controller/dwc/pcie-qcom.c
> > +++ b/drivers/pci/controller/dwc/pcie-qcom.c
> > @@ -51,6 +51,7 @@
> >  #define PARF_SID_OFFSET                                0x234
> >  #define PARF_BDF_TRANSLATE_CFG                 0x24c
> >  #define PARF_SLV_ADDR_SPACE_SIZE               0x358
> > +#define PCIE_PARF_NO_SNOOP_OVERIDE             0x3d4
> >  #define PARF_DEVICE_TYPE                       0x1000
> >  #define PARF_BDF_TO_SID_TABLE_N                        0x2000
> >
> > @@ -117,6 +118,9 @@
> >  /* PARF_LTSSM register fields */
> >  #define LTSSM_EN                               BIT(8)
> >
> > +/* PARF_NO_SNOOP_OVERIDE register value */
> > +#define NO_SNOOP_OVERIDE_EN                    0xa
> > +
> >  /* PARF_DEVICE_TYPE register fields */
> >  #define DEVICE_TYPE_RC                         0x4
> >
> > @@ -961,6 +965,13 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
> >
> >  static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie)
> >  {
> > +       struct dw_pcie *pci = pcie->pci;
> > +       struct device *dev = pci->dev;
> > +
> > +       /* Enable cache snooping for SA8775P */
> > +       if (of_device_is_compatible(dev->of_node, "qcom,pcie-sa8775p"))
> 
> Obviously: please populate a flag in the data structures instead of
> doing of_device_is_compatible(). Same applies to the patch 2.
> 

Not necessary at this point. For some unknown reasons, the HW team ended up
disabling cache snooping on this specific platform. Whereas on other platforms,
it is enabled by default. So I have low expectations that we would need this
setting on other platforms in the future.

My concern with the usage of flag is that it warrants a new "qcom_pcie_cfg"
instance just for this quirk and it looks overkill to me.

So if we endup seeing this behavior on other platforms as well (unlikely) then
we can switch to the flag approach.

- Mani

> > +               writel(NO_SNOOP_OVERIDE_EN, pcie->parf + PCIE_PARF_NO_SNOOP_OVERIDE);
> > +
> >         qcom_pcie_clear_hpc(pcie->pci);
> >
> >         return 0;
> 
> 
> 
> -- 
> With best wishes
> Dmitry
Konrad Dybcio Nov. 2, 2023, 10:25 p.m. UTC | #4
On 02/11/2023 17:36, Manivannan Sadhasivam wrote:
> On Thu, Nov 02, 2023 at 05:34:24PM +0200, Dmitry Baryshkov wrote:
>> On Tue, 31 Oct 2023 at 17:46, Mrinmay Sarkar <quic_msarkar@quicinc.com> wrote:
>>>
>>> This change will enable cache snooping logic to support
>>> cache coherency for SA8755P RC platform.
>>>
>>> Signed-off-by: Mrinmay Sarkar <quic_msarkar@quicinc.com>
>>> ---
>>>   drivers/pci/controller/dwc/pcie-qcom.c | 11 +++++++++++
>>>   1 file changed, 11 insertions(+)
>>>
>>> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
>>> index 6902e97..6f240fc 100644
>>> --- a/drivers/pci/controller/dwc/pcie-qcom.c
>>> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
>>> @@ -51,6 +51,7 @@
>>>   #define PARF_SID_OFFSET                                0x234
>>>   #define PARF_BDF_TRANSLATE_CFG                 0x24c
>>>   #define PARF_SLV_ADDR_SPACE_SIZE               0x358
>>> +#define PCIE_PARF_NO_SNOOP_OVERIDE             0x3d4
>>>   #define PARF_DEVICE_TYPE                       0x1000
>>>   #define PARF_BDF_TO_SID_TABLE_N                        0x2000
>>>
>>> @@ -117,6 +118,9 @@
>>>   /* PARF_LTSSM register fields */
>>>   #define LTSSM_EN                               BIT(8)
>>>
>>> +/* PARF_NO_SNOOP_OVERIDE register value */
>>> +#define NO_SNOOP_OVERIDE_EN                    0xa
>>> +
>>>   /* PARF_DEVICE_TYPE register fields */
>>>   #define DEVICE_TYPE_RC                         0x4
>>>
>>> @@ -961,6 +965,13 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
>>>
>>>   static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie)
>>>   {
>>> +       struct dw_pcie *pci = pcie->pci;
>>> +       struct device *dev = pci->dev;
>>> +
>>> +       /* Enable cache snooping for SA8775P */
>>> +       if (of_device_is_compatible(dev->of_node, "qcom,pcie-sa8775p"))
>>
>> Obviously: please populate a flag in the data structures instead of
>> doing of_device_is_compatible(). Same applies to the patch 2.
>>
> 
> Not necessary at this point. For some unknown reasons, the HW team ended up
> disabling cache snooping on this specific platform. Whereas on other platforms,
> it is enabled by default. So I have low expectations that we would need this
> setting on other platforms in the future.
> 
> My concern with the usage of flag is that it warrants a new "qcom_pcie_cfg"
> instance just for this quirk and it looks overkill to me.
> 
> So if we endup seeing this behavior on other platforms as well (unlikely) then
> we can switch to the flag approach.
This register reads zeroes on 8250, can we confirm it works as
expected there? I guess some benchmarks with and without
'dma-coherent'?

Konrad
Konrad Dybcio Nov. 2, 2023, 10:27 p.m. UTC | #5
On 02/11/2023 11:16, Mrinmay Sarkar wrote:
> 
> On 10/31/2023 10:20 PM, Konrad Dybcio wrote:
>> On 31.10.2023 16:46, Mrinmay Sarkar wrote:
>>> This change will enable cache snooping logic to support
>>> cache coherency for SA8755P RC platform.
>> 8775
>>
>>> Signed-off-by: Mrinmay Sarkar <quic_msarkar@quicinc.com>
>>> ---
>>>   drivers/pci/controller/dwc/pcie-qcom.c | 11 +++++++++++
>>>   1 file changed, 11 insertions(+)
>>>
>>> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c 
>>> b/drivers/pci/controller/dwc/pcie-qcom.c
>>> index 6902e97..6f240fc 100644
>>> --- a/drivers/pci/controller/dwc/pcie-qcom.c
>>> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
>>> @@ -51,6 +51,7 @@
>>>   #define PARF_SID_OFFSET                0x234
>>>   #define PARF_BDF_TRANSLATE_CFG            0x24c
>>>   #define PARF_SLV_ADDR_SPACE_SIZE        0x358
>>> +#define PCIE_PARF_NO_SNOOP_OVERIDE        0x3d4
>>>   #define PARF_DEVICE_TYPE            0x1000
>>>   #define PARF_BDF_TO_SID_TABLE_N            0x2000
>>> @@ -117,6 +118,9 @@
>>>   /* PARF_LTSSM register fields */
>>>   #define LTSSM_EN                BIT(8)
>>> +/* PARF_NO_SNOOP_OVERIDE register value */
>> override
>>> +#define NO_SNOOP_OVERIDE_EN            0xa
>> is this actually some magic value and not BIT(1) | BIT(3)?
> we need to set 1st and 3rd bit. yes, we can use BIT(1) | BIT(3).
It would be great if you could explain what each of these bits means
separately, #defining them instead and ORing at usage time.

Konrad
Manivannan Sadhasivam Nov. 3, 2023, 7:58 a.m. UTC | #6
On Thu, Nov 02, 2023 at 11:25:36PM +0100, Konrad Dybcio wrote:
> 
> 
> On 02/11/2023 17:36, Manivannan Sadhasivam wrote:
> > On Thu, Nov 02, 2023 at 05:34:24PM +0200, Dmitry Baryshkov wrote:
> > > On Tue, 31 Oct 2023 at 17:46, Mrinmay Sarkar <quic_msarkar@quicinc.com> wrote:
> > > > 
> > > > This change will enable cache snooping logic to support
> > > > cache coherency for SA8755P RC platform.
> > > > 
> > > > Signed-off-by: Mrinmay Sarkar <quic_msarkar@quicinc.com>
> > > > ---
> > > >   drivers/pci/controller/dwc/pcie-qcom.c | 11 +++++++++++
> > > >   1 file changed, 11 insertions(+)
> > > > 
> > > > diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> > > > index 6902e97..6f240fc 100644
> > > > --- a/drivers/pci/controller/dwc/pcie-qcom.c
> > > > +++ b/drivers/pci/controller/dwc/pcie-qcom.c
> > > > @@ -51,6 +51,7 @@
> > > >   #define PARF_SID_OFFSET                                0x234
> > > >   #define PARF_BDF_TRANSLATE_CFG                 0x24c
> > > >   #define PARF_SLV_ADDR_SPACE_SIZE               0x358
> > > > +#define PCIE_PARF_NO_SNOOP_OVERIDE             0x3d4
> > > >   #define PARF_DEVICE_TYPE                       0x1000
> > > >   #define PARF_BDF_TO_SID_TABLE_N                        0x2000
> > > > 
> > > > @@ -117,6 +118,9 @@
> > > >   /* PARF_LTSSM register fields */
> > > >   #define LTSSM_EN                               BIT(8)
> > > > 
> > > > +/* PARF_NO_SNOOP_OVERIDE register value */
> > > > +#define NO_SNOOP_OVERIDE_EN                    0xa
> > > > +
> > > >   /* PARF_DEVICE_TYPE register fields */
> > > >   #define DEVICE_TYPE_RC                         0x4
> > > > 
> > > > @@ -961,6 +965,13 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
> > > > 
> > > >   static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie)
> > > >   {
> > > > +       struct dw_pcie *pci = pcie->pci;
> > > > +       struct device *dev = pci->dev;
> > > > +
> > > > +       /* Enable cache snooping for SA8775P */
> > > > +       if (of_device_is_compatible(dev->of_node, "qcom,pcie-sa8775p"))
> > > 
> > > Obviously: please populate a flag in the data structures instead of
> > > doing of_device_is_compatible(). Same applies to the patch 2.
> > > 
> > 
> > Not necessary at this point. For some unknown reasons, the HW team ended up
> > disabling cache snooping on this specific platform. Whereas on other platforms,
> > it is enabled by default. So I have low expectations that we would need this
> > setting on other platforms in the future.
> > 
> > My concern with the usage of flag is that it warrants a new "qcom_pcie_cfg"
> > instance just for this quirk and it looks overkill to me.
> > 
> > So if we endup seeing this behavior on other platforms as well (unlikely) then
> > we can switch to the flag approach.
> This register reads zeroes on 8250, can we confirm it works as
> expected there?

I don't know if this register is even implemented in 8250. Mrinmay, can you
check?

> I guess some benchmarks with and without
> 'dma-coherent'?
> 

The performance benefit can be measured by saturating the link. But it is
obvious that snooping the cache will give better performance (plus it avoids
cache flush in kernel).

- Mani
Mrinmay Sarkar Nov. 6, 2023, 7:19 a.m. UTC | #7
On 11/3/2023 1:28 PM, Manivannan Sadhasivam wrote:
> On Thu, Nov 02, 2023 at 11:25:36PM +0100, Konrad Dybcio wrote:
>>
>> On 02/11/2023 17:36, Manivannan Sadhasivam wrote:
>>> On Thu, Nov 02, 2023 at 05:34:24PM +0200, Dmitry Baryshkov wrote:
>>>> On Tue, 31 Oct 2023 at 17:46, Mrinmay Sarkar <quic_msarkar@quicinc.com> wrote:
>>>>> This change will enable cache snooping logic to support
>>>>> cache coherency for SA8755P RC platform.
>>>>>
>>>>> Signed-off-by: Mrinmay Sarkar <quic_msarkar@quicinc.com>
>>>>> ---
>>>>>    drivers/pci/controller/dwc/pcie-qcom.c | 11 +++++++++++
>>>>>    1 file changed, 11 insertions(+)
>>>>>
>>>>> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
>>>>> index 6902e97..6f240fc 100644
>>>>> --- a/drivers/pci/controller/dwc/pcie-qcom.c
>>>>> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
>>>>> @@ -51,6 +51,7 @@
>>>>>    #define PARF_SID_OFFSET                                0x234
>>>>>    #define PARF_BDF_TRANSLATE_CFG                 0x24c
>>>>>    #define PARF_SLV_ADDR_SPACE_SIZE               0x358
>>>>> +#define PCIE_PARF_NO_SNOOP_OVERIDE             0x3d4
>>>>>    #define PARF_DEVICE_TYPE                       0x1000
>>>>>    #define PARF_BDF_TO_SID_TABLE_N                        0x2000
>>>>>
>>>>> @@ -117,6 +118,9 @@
>>>>>    /* PARF_LTSSM register fields */
>>>>>    #define LTSSM_EN                               BIT(8)
>>>>>
>>>>> +/* PARF_NO_SNOOP_OVERIDE register value */
>>>>> +#define NO_SNOOP_OVERIDE_EN                    0xa
>>>>> +
>>>>>    /* PARF_DEVICE_TYPE register fields */
>>>>>    #define DEVICE_TYPE_RC                         0x4
>>>>>
>>>>> @@ -961,6 +965,13 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
>>>>>
>>>>>    static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie)
>>>>>    {
>>>>> +       struct dw_pcie *pci = pcie->pci;
>>>>> +       struct device *dev = pci->dev;
>>>>> +
>>>>> +       /* Enable cache snooping for SA8775P */
>>>>> +       if (of_device_is_compatible(dev->of_node, "qcom,pcie-sa8775p"))
>>>> Obviously: please populate a flag in the data structures instead of
>>>> doing of_device_is_compatible(). Same applies to the patch 2.
>>>>
>>> Not necessary at this point. For some unknown reasons, the HW team ended up
>>> disabling cache snooping on this specific platform. Whereas on other platforms,
>>> it is enabled by default. So I have low expectations that we would need this
>>> setting on other platforms in the future.
>>>
>>> My concern with the usage of flag is that it warrants a new "qcom_pcie_cfg"
>>> instance just for this quirk and it looks overkill to me.
>>>
>>> So if we endup seeing this behavior on other platforms as well (unlikely) then
>>> we can switch to the flag approach.
>> This register reads zeroes on 8250, can we confirm it works as
>> expected there?
> I don't know if this register is even implemented in 8250. Mrinmay, can you
> check?
Yes we have this register in 8250 platform as well
and I can see the default value is 0x0.

--Mrinmay
>> I guess some benchmarks with and without
>> 'dma-coherent'?
>>
> The performance benefit can be measured by saturating the link. But it is
> obvious that snooping the cache will give better performance (plus it avoids
> cache flush in kernel).
>
> - Mani
>
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 6902e97..6f240fc 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -51,6 +51,7 @@ 
 #define PARF_SID_OFFSET				0x234
 #define PARF_BDF_TRANSLATE_CFG			0x24c
 #define PARF_SLV_ADDR_SPACE_SIZE		0x358
+#define PCIE_PARF_NO_SNOOP_OVERIDE		0x3d4
 #define PARF_DEVICE_TYPE			0x1000
 #define PARF_BDF_TO_SID_TABLE_N			0x2000
 
@@ -117,6 +118,9 @@ 
 /* PARF_LTSSM register fields */
 #define LTSSM_EN				BIT(8)
 
+/* PARF_NO_SNOOP_OVERIDE register value */
+#define NO_SNOOP_OVERIDE_EN			0xa
+
 /* PARF_DEVICE_TYPE register fields */
 #define DEVICE_TYPE_RC				0x4
 
@@ -961,6 +965,13 @@  static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
 
 static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie)
 {
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+
+	/* Enable cache snooping for SA8775P */
+	if (of_device_is_compatible(dev->of_node, "qcom,pcie-sa8775p"))
+		writel(NO_SNOOP_OVERIDE_EN, pcie->parf + PCIE_PARF_NO_SNOOP_OVERIDE);
+
 	qcom_pcie_clear_hpc(pcie->pci);
 
 	return 0;