Message ID | 20201026185428.101443-1-jcrouse@codeaurora.org |
---|---|
State | Accepted |
Commit | 3d247123b5a16f5f43ddc0c86dba05b417b6cadc |
Headers | show |
Series | drm/msm/a6xx: Add support for using system cache on MMU500 based targets | expand |
On 2020-10-27 00:24, Jordan Crouse wrote: > This is an extension to the series [1] to enable the System Cache (LLC) > for > Adreno a6xx targets. > > GPU targets with an MMU-500 attached have a slightly different process > for > enabling system cache. Use the compatible string on the IOMMU phandle > to see if an MMU-500 is attached and modify the programming sequence > accordingly. > > [1] https://patchwork.freedesktop.org/series/83037/ > > Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> > --- > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 46 +++++++++++++++++++++------ > drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 1 + > 2 files changed, 37 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > index 95c98c642876..b7737732fbb6 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > @@ -1042,6 +1042,8 @@ static void a6xx_llc_deactivate(struct a6xx_gpu > *a6xx_gpu) > > static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) > { > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > + struct msm_gpu *gpu = &adreno_gpu->base; > u32 cntl1_regval = 0; > > if (IS_ERR(a6xx_gpu->llc_mmio)) > @@ -1055,11 +1057,17 @@ static void a6xx_llc_activate(struct a6xx_gpu > *a6xx_gpu) > (gpu_scid << 15) | (gpu_scid << 20); > } > > + /* > + * For targets with a MMU500, activate the slice but don't program > the > + * register. The XBL will take care of that. > + */ > if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) { > - u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice); > + if (!a6xx_gpu->have_mmu500) { > + u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice); > > - gpuhtw_scid &= 0x1f; > - cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid); > + gpuhtw_scid &= 0x1f; > + cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid); > + } > } > > if (cntl1_regval) { > @@ -1067,13 +1075,20 @@ static void a6xx_llc_activate(struct a6xx_gpu > *a6xx_gpu) > * Program the slice IDs for the various GPU blocks and GPU MMU > * pagetables > */ > - a6xx_llc_write(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, > cntl1_regval); > - > - /* > - * Program cacheability overrides to not allocate cache lines on > - * a write miss > - */ > - a6xx_llc_rmw(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, > 0x03); > + if (a6xx_gpu->have_mmu500) > + gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), > + cntl1_regval); > + else { > + a6xx_llc_write(a6xx_gpu, > + REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); > + > + /* > + * Program cacheability overrides to not allocate cache > + * lines on a write miss > + */ > + a6xx_llc_rmw(a6xx_gpu, > + REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); > + } > } > } > > @@ -1086,10 +1101,21 @@ static void a6xx_llc_slices_destroy(struct > a6xx_gpu *a6xx_gpu) > static void a6xx_llc_slices_init(struct platform_device *pdev, > struct a6xx_gpu *a6xx_gpu) > { > + struct device_node *phandle; > + > a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx"); > if (IS_ERR(a6xx_gpu->llc_mmio)) > return; > > + /* > + * There is a different programming path for targets with an mmu500 > + * attached, so detect if that is the case > + */ > + phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0); > + a6xx_gpu->have_mmu500 = (phandle && > + of_device_is_compatible(phandle, "arm,mmu500")); > + of_node_put(phandle); > + > a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); > a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > index 9e6079af679c..e793d329e77b 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > @@ -32,6 +32,7 @@ struct a6xx_gpu { > void __iomem *llc_mmio; > void *llc_slice; > void *htw_llc_slice; > + bool have_mmu500; > }; > > #define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base) Thanks Jordan for the patch.
On Tue, Oct 27, 2020 at 12:38:02PM +0530, Sai Prakash Ranjan wrote: > On 2020-10-27 00:24, Jordan Crouse wrote: > >This is an extension to the series [1] to enable the System Cache (LLC) > >for > >Adreno a6xx targets. > > > >GPU targets with an MMU-500 attached have a slightly different process for > >enabling system cache. Use the compatible string on the IOMMU phandle > >to see if an MMU-500 is attached and modify the programming sequence > >accordingly. > > > >[1] https://patchwork.freedesktop.org/series/83037/ > > > >Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> > >--- > > > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 46 +++++++++++++++++++++------ > > drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 1 + > > 2 files changed, 37 insertions(+), 10 deletions(-) > > > >diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > >b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > >index 95c98c642876..b7737732fbb6 100644 > >--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > >+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > >@@ -1042,6 +1042,8 @@ static void a6xx_llc_deactivate(struct a6xx_gpu > >*a6xx_gpu) > > > > static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) > > { > >+ struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > >+ struct msm_gpu *gpu = &adreno_gpu->base; > > u32 cntl1_regval = 0; > > > > if (IS_ERR(a6xx_gpu->llc_mmio)) > >@@ -1055,11 +1057,17 @@ static void a6xx_llc_activate(struct a6xx_gpu > >*a6xx_gpu) > > (gpu_scid << 15) | (gpu_scid << 20); > > } > > > >+ /* > >+ * For targets with a MMU500, activate the slice but don't program the > >+ * register. The XBL will take care of that. > >+ */ > > if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) { > >- u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice); > >+ if (!a6xx_gpu->have_mmu500) { > >+ u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice); > > > >- gpuhtw_scid &= 0x1f; > >- cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid); > >+ gpuhtw_scid &= 0x1f; > >+ cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid); > >+ } > > } > > > > if (cntl1_regval) { > >@@ -1067,13 +1075,20 @@ static void a6xx_llc_activate(struct a6xx_gpu > >*a6xx_gpu) > > * Program the slice IDs for the various GPU blocks and GPU MMU > > * pagetables > > */ > >- a6xx_llc_write(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, > >cntl1_regval); > >- > >- /* > >- * Program cacheability overrides to not allocate cache lines on > >- * a write miss > >- */ > >- a6xx_llc_rmw(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, > >0x03); > >+ if (a6xx_gpu->have_mmu500) > >+ gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), > >+ cntl1_regval); > >+ else { > >+ a6xx_llc_write(a6xx_gpu, > >+ REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); > >+ > >+ /* > >+ * Program cacheability overrides to not allocate cache > >+ * lines on a write miss > >+ */ > >+ a6xx_llc_rmw(a6xx_gpu, > >+ REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); > >+ } > > } > > } > > > >@@ -1086,10 +1101,21 @@ static void a6xx_llc_slices_destroy(struct > >a6xx_gpu *a6xx_gpu) > > static void a6xx_llc_slices_init(struct platform_device *pdev, > > struct a6xx_gpu *a6xx_gpu) > > { > >+ struct device_node *phandle; > >+ > > a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx"); > > if (IS_ERR(a6xx_gpu->llc_mmio)) > > return; > > > >+ /* > >+ * There is a different programming path for targets with an mmu500 > >+ * attached, so detect if that is the case > >+ */ > >+ phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0); > >+ a6xx_gpu->have_mmu500 = (phandle && > >+ of_device_is_compatible(phandle, "arm,mmu500")); > >+ of_node_put(phandle); > >+ > > a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); > > a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); > > > >diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > >b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > >index 9e6079af679c..e793d329e77b 100644 > >--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > >+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > >@@ -32,6 +32,7 @@ struct a6xx_gpu { > > void __iomem *llc_mmio; > > void *llc_slice; > > void *htw_llc_slice; > >+ bool have_mmu500; > > }; > > > > #define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base) > > Thanks Jordan for the patch. If it makes your life or Rob's life easier, please feel free to squash them. Jordan
On 2020-10-27 20:09, Jordan Crouse wrote: > On Tue, Oct 27, 2020 at 12:38:02PM +0530, Sai Prakash Ranjan wrote: >> On 2020-10-27 00:24, Jordan Crouse wrote: >> >This is an extension to the series [1] to enable the System Cache (LLC) >> >for >> >Adreno a6xx targets. >> > >> >GPU targets with an MMU-500 attached have a slightly different process for >> >enabling system cache. Use the compatible string on the IOMMU phandle >> >to see if an MMU-500 is attached and modify the programming sequence >> >accordingly. >> > >> >[1] https://patchwork.freedesktop.org/series/83037/ >> > >> >Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> >> >--- >> > >> > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 46 +++++++++++++++++++++------ >> > drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 1 + >> > 2 files changed, 37 insertions(+), 10 deletions(-) >> > >> >diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> >b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> >index 95c98c642876..b7737732fbb6 100644 >> >--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> >+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> >@@ -1042,6 +1042,8 @@ static void a6xx_llc_deactivate(struct a6xx_gpu >> >*a6xx_gpu) >> > >> > static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) >> > { >> >+ struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; >> >+ struct msm_gpu *gpu = &adreno_gpu->base; >> > u32 cntl1_regval = 0; >> > >> > if (IS_ERR(a6xx_gpu->llc_mmio)) >> >@@ -1055,11 +1057,17 @@ static void a6xx_llc_activate(struct a6xx_gpu >> >*a6xx_gpu) >> > (gpu_scid << 15) | (gpu_scid << 20); >> > } >> > >> >+ /* >> >+ * For targets with a MMU500, activate the slice but don't program the >> >+ * register. The XBL will take care of that. >> >+ */ >> > if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) { >> >- u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice); >> >+ if (!a6xx_gpu->have_mmu500) { >> >+ u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice); >> > >> >- gpuhtw_scid &= 0x1f; >> >- cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid); >> >+ gpuhtw_scid &= 0x1f; >> >+ cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid); >> >+ } >> > } >> > >> > if (cntl1_regval) { >> >@@ -1067,13 +1075,20 @@ static void a6xx_llc_activate(struct a6xx_gpu >> >*a6xx_gpu) >> > * Program the slice IDs for the various GPU blocks and GPU MMU >> > * pagetables >> > */ >> >- a6xx_llc_write(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, >> >cntl1_regval); >> >- >> >- /* >> >- * Program cacheability overrides to not allocate cache lines on >> >- * a write miss >> >- */ >> >- a6xx_llc_rmw(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, >> >0x03); >> >+ if (a6xx_gpu->have_mmu500) >> >+ gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), >> >+ cntl1_regval); >> >+ else { >> >+ a6xx_llc_write(a6xx_gpu, >> >+ REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); >> >+ >> >+ /* >> >+ * Program cacheability overrides to not allocate cache >> >+ * lines on a write miss >> >+ */ >> >+ a6xx_llc_rmw(a6xx_gpu, >> >+ REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); >> >+ } >> > } >> > } >> > >> >@@ -1086,10 +1101,21 @@ static void a6xx_llc_slices_destroy(struct >> >a6xx_gpu *a6xx_gpu) >> > static void a6xx_llc_slices_init(struct platform_device *pdev, >> > struct a6xx_gpu *a6xx_gpu) >> > { >> >+ struct device_node *phandle; >> >+ >> > a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx"); >> > if (IS_ERR(a6xx_gpu->llc_mmio)) >> > return; >> > >> >+ /* >> >+ * There is a different programming path for targets with an mmu500 >> >+ * attached, so detect if that is the case >> >+ */ >> >+ phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0); >> >+ a6xx_gpu->have_mmu500 = (phandle && >> >+ of_device_is_compatible(phandle, "arm,mmu500")); >> >+ of_node_put(phandle); >> >+ >> > a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); >> > a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); >> > >> >diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h >> >b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h >> >index 9e6079af679c..e793d329e77b 100644 >> >--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h >> >+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h >> >@@ -32,6 +32,7 @@ struct a6xx_gpu { >> > void __iomem *llc_mmio; >> > void *llc_slice; >> > void *htw_llc_slice; >> >+ bool have_mmu500; >> > }; >> > >> > #define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base) >> >> Thanks Jordan for the patch. > > If it makes your life or Rob's life easier, please feel free to squash > them. > Sure, I will squash them in the next version with a change which Robin pointed out. Thanks, Sai
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 95c98c642876..b7737732fbb6 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1042,6 +1042,8 @@ static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu) static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) { + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; u32 cntl1_regval = 0; if (IS_ERR(a6xx_gpu->llc_mmio)) @@ -1055,11 +1057,17 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) (gpu_scid << 15) | (gpu_scid << 20); } + /* + * For targets with a MMU500, activate the slice but don't program the + * register. The XBL will take care of that. + */ if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) { - u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice); + if (!a6xx_gpu->have_mmu500) { + u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice); - gpuhtw_scid &= 0x1f; - cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid); + gpuhtw_scid &= 0x1f; + cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid); + } } if (cntl1_regval) { @@ -1067,13 +1075,20 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) * Program the slice IDs for the various GPU blocks and GPU MMU * pagetables */ - a6xx_llc_write(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); - - /* - * Program cacheability overrides to not allocate cache lines on - * a write miss - */ - a6xx_llc_rmw(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); + if (a6xx_gpu->have_mmu500) + gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), + cntl1_regval); + else { + a6xx_llc_write(a6xx_gpu, + REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); + + /* + * Program cacheability overrides to not allocate cache + * lines on a write miss + */ + a6xx_llc_rmw(a6xx_gpu, + REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); + } } } @@ -1086,10 +1101,21 @@ static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu) static void a6xx_llc_slices_init(struct platform_device *pdev, struct a6xx_gpu *a6xx_gpu) { + struct device_node *phandle; + a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx"); if (IS_ERR(a6xx_gpu->llc_mmio)) return; + /* + * There is a different programming path for targets with an mmu500 + * attached, so detect if that is the case + */ + phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0); + a6xx_gpu->have_mmu500 = (phandle && + of_device_is_compatible(phandle, "arm,mmu500")); + of_node_put(phandle); + a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 9e6079af679c..e793d329e77b 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -32,6 +32,7 @@ struct a6xx_gpu { void __iomem *llc_mmio; void *llc_slice; void *htw_llc_slice; + bool have_mmu500; }; #define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base)
This is an extension to the series [1] to enable the System Cache (LLC) for Adreno a6xx targets. GPU targets with an MMU-500 attached have a slightly different process for enabling system cache. Use the compatible string on the IOMMU phandle to see if an MMU-500 is attached and modify the programming sequence accordingly. [1] https://patchwork.freedesktop.org/series/83037/ Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 46 +++++++++++++++++++++------ drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 1 + 2 files changed, 37 insertions(+), 10 deletions(-)