Message ID | 20240425134354.1233862-5-cwabbott0@gmail.com |
---|---|
State | New |
Headers | show |
Series | drm/msm: Support a750 "software fuse" for raytracing | expand |
On Thu, 25 Apr 2024 at 16:44, Connor Abbott <cwabbott0@gmail.com> wrote: > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > initialize cx_mem. Copy this from downstream (minus BCL which we > currently don't support). On a750, this includes a new "fuse" register > which can be used by qcom_scm to fuse off certain features like > raytracing in software. The fuse is default off, and is initialized by > calling the method. Afterwards we have to read it to find out which > features were enabled. > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > --- > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + > 2 files changed, 90 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > index cf0b1de1c071..fb2722574ae5 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > @@ -10,6 +10,7 @@ > > #include <linux/bitfield.h> > #include <linux/devfreq.h> > +#include <linux/firmware/qcom/qcom_scm.h> > #include <linux/pm_domain.h> > #include <linux/soc/qcom/llcc-qcom.h> > > @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) > A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ > A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ > A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ > - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) > + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ > + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ > A6XX_CP_APRIV_CNTL_RBFETCH | \ > @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) > kthread_queue_work(gpu->worker, &gpu->recover_work); > } > > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > +{ > + u32 status; > + > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > + > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > + > + /* Ignore FASTBLEND violations, because the HW will silently fall back > + * to legacy blending. > + */ > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > + del_timer(&gpu->hangcheck_timer); > + > + kthread_queue_work(gpu->worker, &gpu->recover_work); > + } > +} > + > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > { > struct msm_drm_private *priv = gpu->dev->dev_private; > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > + a7xx_sw_fuse_violation_irq(gpu); > + > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > msm_gpu_retire(gpu); > > @@ -2525,6 +2550,60 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > } > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > +{ > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > + struct msm_gpu *gpu = &adreno_gpu->base; > + u32 gpu_req = QCOM_SCM_GPU_ALWAYS_EN_REQ; > + u32 fuse_val; > + int ret; > + > + if (adreno_is_a740(adreno_gpu)) { > + /* Raytracing is always enabled on a740 */ > + adreno_gpu->has_ray_tracing = true; > + } > + > + if (!qcom_scm_is_available()) { > + /* Assume that if qcom scm isn't available, that whatever > + * replacement allows writing the fuse register ourselves. > + * Users of alternative firmware need to make sure this > + * register is writeable or indicate that it's not somehow. > + * Print a warning because if you mess this up you're about to > + * crash horribly. > + */ > + if (adreno_is_a750(adreno_gpu)) { > + dev_warn_once(gpu->dev->dev, > + "SCM is not available, poking fuse register\n"); > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > + adreno_gpu->has_ray_tracing = true; > + } > + > + return 0; > + } > + > + if (adreno_is_a750(adreno_gpu)) Most of the function is under the if (adreno_is_a750) conditions. Can we invert the logic and add a single block of if(adreno_is_a750) and then place all the code underneath? > + gpu_req |= QCOM_SCM_GPU_TSENSE_EN_REQ; > + > + ret = qcom_scm_gpu_init_regs(gpu_req); > + if (ret) > + return ret; > + > + /* On a750 raytracing may be disabled by the firmware, find out whether > + * that's the case. The scm call above sets the fuse register. > + */ > + if (adreno_is_a750(adreno_gpu)) { > + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); This register isn't accessible with the current sm8650.dtsi. Since DT and driver are going through different trees, please add safety guards here, so that the driver doesn't crash if used with older dtsi (not to mention that dts is considered to be an ABI and newer kernels are supposed not to break with older DT files). > + adreno_gpu->has_ray_tracing = > + !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING); > + } > + > + return 0; > +} > + > + > #define GBIF_CLIENT_HALT_MASK BIT(0) > #define GBIF_ARB_HALT_MASK BIT(1) > #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) > @@ -3094,6 +3173,14 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) > return ERR_PTR(ret); > } > > + if (adreno_is_a7xx(adreno_gpu)) { > + ret = a7xx_cx_mem_init(a6xx_gpu); > + if (ret) { > + a6xx_destroy(&(a6xx_gpu->base.base)); > + return ERR_PTR(ret); > + } > + } > + > if (gpu->aspace) > msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, > a6xx_fault_handler); > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h > index 77526892eb8c..4180f3149dd8 100644 > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h > @@ -182,6 +182,8 @@ struct adreno_gpu { > */ > const unsigned int *reg_offsets; > bool gmu_is_wrapper; > + > + bool has_ray_tracing; > }; > #define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base) > > -- > 2.31.1 >
On Fri, Apr 26, 2024 at 12:02 AM Dmitry Baryshkov <dmitry.baryshkov@linaro.org> wrote: > > On Thu, 25 Apr 2024 at 16:44, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > > initialize cx_mem. Copy this from downstream (minus BCL which we > > currently don't support). On a750, this includes a new "fuse" register > > which can be used by qcom_scm to fuse off certain features like > > raytracing in software. The fuse is default off, and is initialized by > > calling the method. Afterwards we have to read it to find out which > > features were enabled. > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > --- > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- > > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + > > 2 files changed, 90 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > index cf0b1de1c071..fb2722574ae5 100644 > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > @@ -10,6 +10,7 @@ > > > > #include <linux/bitfield.h> > > #include <linux/devfreq.h> > > +#include <linux/firmware/qcom/qcom_scm.h> > > #include <linux/pm_domain.h> > > #include <linux/soc/qcom/llcc-qcom.h> > > > > @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) > > A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ > > A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ > > A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ > > - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) > > + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ > > + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ > > A6XX_CP_APRIV_CNTL_RBFETCH | \ > > @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) > > kthread_queue_work(gpu->worker, &gpu->recover_work); > > } > > > > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > > +{ > > + u32 status; > > + > > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > > + > > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > > + > > + /* Ignore FASTBLEND violations, because the HW will silently fall back > > + * to legacy blending. > > + */ > > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > > + del_timer(&gpu->hangcheck_timer); > > + > > + kthread_queue_work(gpu->worker, &gpu->recover_work); > > + } > > +} > > + > > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > { > > struct msm_drm_private *priv = gpu->dev->dev_private; > > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > + a7xx_sw_fuse_violation_irq(gpu); > > + > > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > > msm_gpu_retire(gpu); > > > > @@ -2525,6 +2550,60 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > > } > > > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > > +{ > > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > > + struct msm_gpu *gpu = &adreno_gpu->base; > > + u32 gpu_req = QCOM_SCM_GPU_ALWAYS_EN_REQ; > > + u32 fuse_val; > > + int ret; > > + > > + if (adreno_is_a740(adreno_gpu)) { > > + /* Raytracing is always enabled on a740 */ > > + adreno_gpu->has_ray_tracing = true; > > + } > > + > > + if (!qcom_scm_is_available()) { > > + /* Assume that if qcom scm isn't available, that whatever > > + * replacement allows writing the fuse register ourselves. > > + * Users of alternative firmware need to make sure this > > + * register is writeable or indicate that it's not somehow. > > + * Print a warning because if you mess this up you're about to > > + * crash horribly. > > + */ > > + if (adreno_is_a750(adreno_gpu)) { > > + dev_warn_once(gpu->dev->dev, > > + "SCM is not available, poking fuse register\n"); > > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > > + adreno_gpu->has_ray_tracing = true; > > + } > > + > > + return 0; > > + } > > + > > + if (adreno_is_a750(adreno_gpu)) > > Most of the function is under the if (adreno_is_a750) conditions. Can > we invert the logic and add a single block of if(adreno_is_a750) and > then place all the code underneath? You mean to duplicate the qcom_scm_is_available check and qcom_scm_ > > > + gpu_req |= QCOM_SCM_GPU_TSENSE_EN_REQ; > > + > > + ret = qcom_scm_gpu_init_regs(gpu_req); > > + if (ret) > > + return ret; > > + > > + /* On a750 raytracing may be disabled by the firmware, find out whether > > + * that's the case. The scm call above sets the fuse register. > > + */ > > + if (adreno_is_a750(adreno_gpu)) { > > + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); > > This register isn't accessible with the current sm8650.dtsi. Since DT > and driver are going through different trees, please add safety guards > here, so that the driver doesn't crash if used with older dtsi I don't see how this is an issue. msm-next is currently based on 6.9, which doesn't have the GPU defined in sm8650.dtsi. AFAIK patches 1 and 2 will have to go through the linux-arm-msm tree, which will have to be merged into msm-next before this patch lands there, so there will never be any breakage. > (not to mention that dts is considered to be an ABI and newer kernels > are supposed not to break with older DT files). That policy only applies to released kernels, so that's irrelevant here. > > > + adreno_gpu->has_ray_tracing = > > + !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING); > > + } > > + > > + return 0; > > +} > > + > > + > > #define GBIF_CLIENT_HALT_MASK BIT(0) > > #define GBIF_ARB_HALT_MASK BIT(1) > > #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) > > @@ -3094,6 +3173,14 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) > > return ERR_PTR(ret); > > } > > > > + if (adreno_is_a7xx(adreno_gpu)) { > > + ret = a7xx_cx_mem_init(a6xx_gpu); > > + if (ret) { > > + a6xx_destroy(&(a6xx_gpu->base.base)); > > + return ERR_PTR(ret); > > + } > > + } > > + > > if (gpu->aspace) > > msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, > > a6xx_fault_handler); > > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h > > index 77526892eb8c..4180f3149dd8 100644 > > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h > > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h > > @@ -182,6 +182,8 @@ struct adreno_gpu { > > */ > > const unsigned int *reg_offsets; > > bool gmu_is_wrapper; > > + > > + bool has_ray_tracing; > > }; > > #define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base) > > > > -- > > 2.31.1 > > > > > -- > With best wishes > Dmitry
On Thu, Apr 25, 2024 at 4:02 PM Dmitry Baryshkov <dmitry.baryshkov@linaro.org> wrote: > > On Thu, 25 Apr 2024 at 16:44, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > > initialize cx_mem. Copy this from downstream (minus BCL which we > > currently don't support). On a750, this includes a new "fuse" register > > which can be used by qcom_scm to fuse off certain features like > > raytracing in software. The fuse is default off, and is initialized by > > calling the method. Afterwards we have to read it to find out which > > features were enabled. > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > --- > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- > > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + > > 2 files changed, 90 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > index cf0b1de1c071..fb2722574ae5 100644 > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > @@ -10,6 +10,7 @@ > > > > #include <linux/bitfield.h> > > #include <linux/devfreq.h> > > +#include <linux/firmware/qcom/qcom_scm.h> > > #include <linux/pm_domain.h> > > #include <linux/soc/qcom/llcc-qcom.h> > > > > @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) > > A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ > > A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ > > A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ > > - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) > > + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ > > + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ > > A6XX_CP_APRIV_CNTL_RBFETCH | \ > > @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) > > kthread_queue_work(gpu->worker, &gpu->recover_work); > > } > > > > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > > +{ > > + u32 status; > > + > > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > > + > > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > > + > > + /* Ignore FASTBLEND violations, because the HW will silently fall back > > + * to legacy blending. > > + */ > > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > > + del_timer(&gpu->hangcheck_timer); > > + > > + kthread_queue_work(gpu->worker, &gpu->recover_work); > > + } > > +} > > + > > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > { > > struct msm_drm_private *priv = gpu->dev->dev_private; > > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > + a7xx_sw_fuse_violation_irq(gpu); > > + > > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > > msm_gpu_retire(gpu); > > > > @@ -2525,6 +2550,60 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > > } > > > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > > +{ > > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > > + struct msm_gpu *gpu = &adreno_gpu->base; > > + u32 gpu_req = QCOM_SCM_GPU_ALWAYS_EN_REQ; > > + u32 fuse_val; > > + int ret; > > + > > + if (adreno_is_a740(adreno_gpu)) { > > + /* Raytracing is always enabled on a740 */ > > + adreno_gpu->has_ray_tracing = true; > > + } > > + > > + if (!qcom_scm_is_available()) { > > + /* Assume that if qcom scm isn't available, that whatever > > + * replacement allows writing the fuse register ourselves. > > + * Users of alternative firmware need to make sure this > > + * register is writeable or indicate that it's not somehow. > > + * Print a warning because if you mess this up you're about to > > + * crash horribly. > > + */ > > + if (adreno_is_a750(adreno_gpu)) { > > + dev_warn_once(gpu->dev->dev, > > + "SCM is not available, poking fuse register\n"); > > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > > + adreno_gpu->has_ray_tracing = true; > > + } > > + > > + return 0; > > + } > > + > > + if (adreno_is_a750(adreno_gpu)) > > Most of the function is under the if (adreno_is_a750) conditions. Can > we invert the logic and add a single block of if(adreno_is_a750) and > then place all the code underneath? > > > + gpu_req |= QCOM_SCM_GPU_TSENSE_EN_REQ; > > + > > + ret = qcom_scm_gpu_init_regs(gpu_req); > > + if (ret) > > + return ret; > > + > > + /* On a750 raytracing may be disabled by the firmware, find out whether > > + * that's the case. The scm call above sets the fuse register. > > + */ > > + if (adreno_is_a750(adreno_gpu)) { > > + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); > > This register isn't accessible with the current sm8650.dtsi. Since DT > and driver are going through different trees, please add safety guards > here, so that the driver doesn't crash if used with older dtsi > (not to mention that dts is considered to be an ABI and newer kernels > are supposed not to break with older DT files). I'd be happy if older kernels consistently worked with newer dtb, the other direction is too much to ask. If necessary we can ask for ack to land the dts fix thru msm-next somehow, but since the gpu is newly enabled device landing in the same merge window I think that is not necessary. BR, -R > > + adreno_gpu->has_ray_tracing = > > + !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING); > > + } > > + > > + return 0; > > +} > > + > > + > > #define GBIF_CLIENT_HALT_MASK BIT(0) > > #define GBIF_ARB_HALT_MASK BIT(1) > > #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) > > @@ -3094,6 +3173,14 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) > > return ERR_PTR(ret); > > } > > > > + if (adreno_is_a7xx(adreno_gpu)) { > > + ret = a7xx_cx_mem_init(a6xx_gpu); > > + if (ret) { > > + a6xx_destroy(&(a6xx_gpu->base.base)); > > + return ERR_PTR(ret); > > + } > > + } > > + > > if (gpu->aspace) > > msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, > > a6xx_fault_handler); > > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h > > index 77526892eb8c..4180f3149dd8 100644 > > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h > > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h > > @@ -182,6 +182,8 @@ struct adreno_gpu { > > */ > > const unsigned int *reg_offsets; > > bool gmu_is_wrapper; > > + > > + bool has_ray_tracing; > > }; > > #define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base) > > > > -- > > 2.31.1 > > > > > -- > With best wishes > Dmitry
On Fri, Apr 26, 2024 at 1:35 PM Connor Abbott <cwabbott0@gmail.com> wrote: > > On Fri, Apr 26, 2024 at 12:02 AM Dmitry Baryshkov > <dmitry.baryshkov@linaro.org> wrote: > > > > On Thu, 25 Apr 2024 at 16:44, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > > > initialize cx_mem. Copy this from downstream (minus BCL which we > > > currently don't support). On a750, this includes a new "fuse" register > > > which can be used by qcom_scm to fuse off certain features like > > > raytracing in software. The fuse is default off, and is initialized by > > > calling the method. Afterwards we have to read it to find out which > > > features were enabled. > > > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > > --- > > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- > > > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + > > > 2 files changed, 90 insertions(+), 1 deletion(-) > > > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > index cf0b1de1c071..fb2722574ae5 100644 > > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > @@ -10,6 +10,7 @@ > > > > > > #include <linux/bitfield.h> > > > #include <linux/devfreq.h> > > > +#include <linux/firmware/qcom/qcom_scm.h> > > > #include <linux/pm_domain.h> > > > #include <linux/soc/qcom/llcc-qcom.h> > > > > > > @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) > > > A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ > > > A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ > > > A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ > > > - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) > > > + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ > > > + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ > > > A6XX_CP_APRIV_CNTL_RBFETCH | \ > > > @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) > > > kthread_queue_work(gpu->worker, &gpu->recover_work); > > > } > > > > > > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > > > +{ > > > + u32 status; > > > + > > > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > > > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > > > + > > > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > > > + > > > + /* Ignore FASTBLEND violations, because the HW will silently fall back > > > + * to legacy blending. > > > + */ > > > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > > > + del_timer(&gpu->hangcheck_timer); > > > + > > > + kthread_queue_work(gpu->worker, &gpu->recover_work); > > > + } > > > +} > > > + > > > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > { > > > struct msm_drm_private *priv = gpu->dev->dev_private; > > > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > > > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > > > > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > + a7xx_sw_fuse_violation_irq(gpu); > > > + > > > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > > > msm_gpu_retire(gpu); > > > > > > @@ -2525,6 +2550,60 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > > > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > > > } > > > > > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > > > +{ > > > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > > > + struct msm_gpu *gpu = &adreno_gpu->base; > > > + u32 gpu_req = QCOM_SCM_GPU_ALWAYS_EN_REQ; > > > + u32 fuse_val; > > > + int ret; > > > + > > > + if (adreno_is_a740(adreno_gpu)) { > > > + /* Raytracing is always enabled on a740 */ > > > + adreno_gpu->has_ray_tracing = true; > > > + } > > > + > > > + if (!qcom_scm_is_available()) { > > > + /* Assume that if qcom scm isn't available, that whatever > > > + * replacement allows writing the fuse register ourselves. > > > + * Users of alternative firmware need to make sure this > > > + * register is writeable or indicate that it's not somehow. > > > + * Print a warning because if you mess this up you're about to > > > + * crash horribly. > > > + */ > > > + if (adreno_is_a750(adreno_gpu)) { > > > + dev_warn_once(gpu->dev->dev, > > > + "SCM is not available, poking fuse register\n"); > > > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > > > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > > > + adreno_gpu->has_ray_tracing = true; > > > + } > > > + > > > + return 0; > > > + } > > > + > > > + if (adreno_is_a750(adreno_gpu)) > > > > Most of the function is under the if (adreno_is_a750) conditions. Can > > we invert the logic and add a single block of if(adreno_is_a750) and > > then place all the code underneath? > > You mean to duplicate the qcom_scm_is_available check and qcom_scm_ > Sorry, didn't finish this thought. I meant to ask if you wanted to duplicate qcom_scm_is_available check and qcom_scm_gpu_init_regs between a750+ and everything else. Connor
On Fri, 26 Apr 2024 at 15:46, Rob Clark <robdclark@gmail.com> wrote: > > On Thu, Apr 25, 2024 at 4:02 PM Dmitry Baryshkov > <dmitry.baryshkov@linaro.org> wrote: > > > > On Thu, 25 Apr 2024 at 16:44, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > > > initialize cx_mem. Copy this from downstream (minus BCL which we > > > currently don't support). On a750, this includes a new "fuse" register > > > which can be used by qcom_scm to fuse off certain features like > > > raytracing in software. The fuse is default off, and is initialized by > > > calling the method. Afterwards we have to read it to find out which > > > features were enabled. > > > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > > --- > > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- > > > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + > > > 2 files changed, 90 insertions(+), 1 deletion(-) > > > [...] > > > + gpu_req |= QCOM_SCM_GPU_TSENSE_EN_REQ; > > > + > > > + ret = qcom_scm_gpu_init_regs(gpu_req); > > > + if (ret) > > > + return ret; > > > + > > > + /* On a750 raytracing may be disabled by the firmware, find out whether > > > + * that's the case. The scm call above sets the fuse register. > > > + */ > > > + if (adreno_is_a750(adreno_gpu)) { > > > + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); > > > > This register isn't accessible with the current sm8650.dtsi. Since DT > > and driver are going through different trees, please add safety guards > > here, so that the driver doesn't crash if used with older dtsi > > (not to mention that dts is considered to be an ABI and newer kernels > > are supposed not to break with older DT files). > > I'd be happy if older kernels consistently worked with newer dtb, the > other direction is too much to ask. Well, we guarantee that newer kernels work with older dts. > If necessary we can ask for ack > to land the dts fix thru msm-next somehow, but since the gpu is newly > enabled device landing in the same merge window I think that is not > necessary. This might work too.
On Fri, 26 Apr 2024 at 15:35, Connor Abbott <cwabbott0@gmail.com> wrote: > > On Fri, Apr 26, 2024 at 12:02 AM Dmitry Baryshkov > <dmitry.baryshkov@linaro.org> wrote: > > > > On Thu, 25 Apr 2024 at 16:44, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > > > initialize cx_mem. Copy this from downstream (minus BCL which we > > > currently don't support). On a750, this includes a new "fuse" register > > > which can be used by qcom_scm to fuse off certain features like > > > raytracing in software. The fuse is default off, and is initialized by > > > calling the method. Afterwards we have to read it to find out which > > > features were enabled. > > > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > > --- > > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- > > > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + > > > 2 files changed, 90 insertions(+), 1 deletion(-) > > > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > index cf0b1de1c071..fb2722574ae5 100644 > > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > @@ -10,6 +10,7 @@ > > > > > > #include <linux/bitfield.h> > > > #include <linux/devfreq.h> > > > +#include <linux/firmware/qcom/qcom_scm.h> > > > #include <linux/pm_domain.h> > > > #include <linux/soc/qcom/llcc-qcom.h> > > > > > > @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) > > > A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ > > > A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ > > > A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ > > > - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) > > > + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ > > > + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ > > > A6XX_CP_APRIV_CNTL_RBFETCH | \ > > > @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) > > > kthread_queue_work(gpu->worker, &gpu->recover_work); > > > } > > > > > > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > > > +{ > > > + u32 status; > > > + > > > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > > > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > > > + > > > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > > > + > > > + /* Ignore FASTBLEND violations, because the HW will silently fall back > > > + * to legacy blending. > > > + */ > > > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > > > + del_timer(&gpu->hangcheck_timer); > > > + > > > + kthread_queue_work(gpu->worker, &gpu->recover_work); > > > + } > > > +} > > > + > > > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > { > > > struct msm_drm_private *priv = gpu->dev->dev_private; > > > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > > > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > > > > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > + a7xx_sw_fuse_violation_irq(gpu); > > > + > > > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > > > msm_gpu_retire(gpu); > > > > > > @@ -2525,6 +2550,60 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > > > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > > > } > > > > > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > > > +{ > > > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > > > + struct msm_gpu *gpu = &adreno_gpu->base; > > > + u32 gpu_req = QCOM_SCM_GPU_ALWAYS_EN_REQ; > > > + u32 fuse_val; > > > + int ret; > > > + > > > + if (adreno_is_a740(adreno_gpu)) { > > > + /* Raytracing is always enabled on a740 */ > > > + adreno_gpu->has_ray_tracing = true; > > > + } > > > + > > > + if (!qcom_scm_is_available()) { > > > + /* Assume that if qcom scm isn't available, that whatever > > > + * replacement allows writing the fuse register ourselves. > > > + * Users of alternative firmware need to make sure this > > > + * register is writeable or indicate that it's not somehow. > > > + * Print a warning because if you mess this up you're about to > > > + * crash horribly. > > > + */ > > > + if (adreno_is_a750(adreno_gpu)) { > > > + dev_warn_once(gpu->dev->dev, > > > + "SCM is not available, poking fuse register\n"); > > > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > > > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > > > + adreno_gpu->has_ray_tracing = true; > > > + } > > > + > > > + return 0; > > > + } > > > + > > > + if (adreno_is_a750(adreno_gpu)) > > > > Most of the function is under the if (adreno_is_a750) conditions. Can > > we invert the logic and add a single block of if(adreno_is_a750) and > > then place all the code underneath? > > You mean to duplicate the qcom_scm_is_available check and qcom_scm_ > > > > > > + gpu_req |= QCOM_SCM_GPU_TSENSE_EN_REQ; > > > + > > > + ret = qcom_scm_gpu_init_regs(gpu_req); > > > + if (ret) > > > + return ret; > > > + > > > + /* On a750 raytracing may be disabled by the firmware, find out whether > > > + * that's the case. The scm call above sets the fuse register. > > > + */ > > > + if (adreno_is_a750(adreno_gpu)) { > > > + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); > > > > This register isn't accessible with the current sm8650.dtsi. Since DT > > and driver are going through different trees, please add safety guards > > here, so that the driver doesn't crash if used with older dtsi > > I don't see how this is an issue. msm-next is currently based on 6.9, > which doesn't have the GPU defined in sm8650.dtsi. AFAIK patches 1 and > 2 will have to go through the linux-arm-msm tree, which will have to > be merged into msm-next before this patch lands there, so there will > never be any breakage. linux-arm-msm isn't going to be merged into msm-next. If we do not ask for ack for the fix to go through msm-next, they will get these patches in parallel. Another option is to get dtsi fix into 6.9 and delay the raytracing until 6.10-rc which doesn't make a lot of sense from my POV). > > > (not to mention that dts is considered to be an ABI and newer kernels > > are supposed not to break with older DT files). > > That policy only applies to released kernels, so that's irrelevant here. It applies to all kernels, the reason being pretty simple: git-bisect should not be broken.
On Fri, 26 Apr 2024 at 15:54, Connor Abbott <cwabbott0@gmail.com> wrote: > > On Fri, Apr 26, 2024 at 1:35 PM Connor Abbott <cwabbott0@gmail.com> wrote: > > > > On Fri, Apr 26, 2024 at 12:02 AM Dmitry Baryshkov > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > On Thu, 25 Apr 2024 at 16:44, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > > > > initialize cx_mem. Copy this from downstream (minus BCL which we > > > > currently don't support). On a750, this includes a new "fuse" register > > > > which can be used by qcom_scm to fuse off certain features like > > > > raytracing in software. The fuse is default off, and is initialized by > > > > calling the method. Afterwards we have to read it to find out which > > > > features were enabled. > > > > > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > > > --- > > > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- > > > > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + > > > > 2 files changed, 90 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > index cf0b1de1c071..fb2722574ae5 100644 > > > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > @@ -10,6 +10,7 @@ > > > > > > > > #include <linux/bitfield.h> > > > > #include <linux/devfreq.h> > > > > +#include <linux/firmware/qcom/qcom_scm.h> > > > > #include <linux/pm_domain.h> > > > > #include <linux/soc/qcom/llcc-qcom.h> > > > > > > > > @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) > > > > A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ > > > > A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ > > > > A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ > > > > - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) > > > > + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ > > > > + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > > > #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ > > > > A6XX_CP_APRIV_CNTL_RBFETCH | \ > > > > @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) > > > > kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > } > > > > > > > > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > > > > +{ > > > > + u32 status; > > > > + > > > > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > > > > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > > > > + > > > > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > > > > + > > > > + /* Ignore FASTBLEND violations, because the HW will silently fall back > > > > + * to legacy blending. > > > > + */ > > > > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > > > > + del_timer(&gpu->hangcheck_timer); > > > > + > > > > + kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > + } > > > > +} > > > > + > > > > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > { > > > > struct msm_drm_private *priv = gpu->dev->dev_private; > > > > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > > > > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > > > > > > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > + a7xx_sw_fuse_violation_irq(gpu); > > > > + > > > > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > > > > msm_gpu_retire(gpu); > > > > > > > > @@ -2525,6 +2550,60 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > > > > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > > > > } > > > > > > > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > > > > +{ > > > > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > > > > + struct msm_gpu *gpu = &adreno_gpu->base; > > > > + u32 gpu_req = QCOM_SCM_GPU_ALWAYS_EN_REQ; > > > > + u32 fuse_val; > > > > + int ret; > > > > + > > > > + if (adreno_is_a740(adreno_gpu)) { > > > > + /* Raytracing is always enabled on a740 */ > > > > + adreno_gpu->has_ray_tracing = true; > > > > + } > > > > + > > > > + if (!qcom_scm_is_available()) { > > > > + /* Assume that if qcom scm isn't available, that whatever > > > > + * replacement allows writing the fuse register ourselves. > > > > + * Users of alternative firmware need to make sure this > > > > + * register is writeable or indicate that it's not somehow. > > > > + * Print a warning because if you mess this up you're about to > > > > + * crash horribly. > > > > + */ > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > + dev_warn_once(gpu->dev->dev, > > > > + "SCM is not available, poking fuse register\n"); > > > > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > > > > + adreno_gpu->has_ray_tracing = true; > > > > + } > > > > + > > > > + return 0; > > > > + } > > > > + > > > > + if (adreno_is_a750(adreno_gpu)) > > > > > > Most of the function is under the if (adreno_is_a750) conditions. Can > > > we invert the logic and add a single block of if(adreno_is_a750) and > > > then place all the code underneath? > > > > You mean to duplicate the qcom_scm_is_available check and qcom_scm_ > > > > Sorry, didn't finish this thought. I meant to ask if you wanted to > duplicate qcom_scm_is_available check and qcom_scm_gpu_init_regs > between a750+ and everything else. I don't see !qcom_scm_is_available()) being useful anywhere else, at least for now. So it becomes: if (adreno_is_a740(adreno_gpu)) { /* Raytracing is always enabled on a740 */ adreno_gpu->has_ray_tracing = true; // FIXME: Do we need this at all on a740? qcom_scm_gpu_init_regs(gpu_req); } else if (adreno_is_a750(adreno_gpu)) { if (!qcom_scm_is_available()) { dev_warn_once(); adreno_gpu->has_ray_tracing = true; return 0; } gpu_req |= ...; qcom_scm_gpu_init_regs(gpu_req); fuse_val ....; }
On Fri, Apr 26, 2024 at 2:31 PM Dmitry Baryshkov <dmitry.baryshkov@linaro.org> wrote: > > On Fri, 26 Apr 2024 at 15:35, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > On Fri, Apr 26, 2024 at 12:02 AM Dmitry Baryshkov > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > On Thu, 25 Apr 2024 at 16:44, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > > > > initialize cx_mem. Copy this from downstream (minus BCL which we > > > > currently don't support). On a750, this includes a new "fuse" register > > > > which can be used by qcom_scm to fuse off certain features like > > > > raytracing in software. The fuse is default off, and is initialized by > > > > calling the method. Afterwards we have to read it to find out which > > > > features were enabled. > > > > > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > > > --- > > > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- > > > > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + > > > > 2 files changed, 90 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > index cf0b1de1c071..fb2722574ae5 100644 > > > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > @@ -10,6 +10,7 @@ > > > > > > > > #include <linux/bitfield.h> > > > > #include <linux/devfreq.h> > > > > +#include <linux/firmware/qcom/qcom_scm.h> > > > > #include <linux/pm_domain.h> > > > > #include <linux/soc/qcom/llcc-qcom.h> > > > > > > > > @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) > > > > A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ > > > > A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ > > > > A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ > > > > - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) > > > > + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ > > > > + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > > > #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ > > > > A6XX_CP_APRIV_CNTL_RBFETCH | \ > > > > @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) > > > > kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > } > > > > > > > > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > > > > +{ > > > > + u32 status; > > > > + > > > > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > > > > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > > > > + > > > > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > > > > + > > > > + /* Ignore FASTBLEND violations, because the HW will silently fall back > > > > + * to legacy blending. > > > > + */ > > > > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > > > > + del_timer(&gpu->hangcheck_timer); > > > > + > > > > + kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > + } > > > > +} > > > > + > > > > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > { > > > > struct msm_drm_private *priv = gpu->dev->dev_private; > > > > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > > > > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > > > > > > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > + a7xx_sw_fuse_violation_irq(gpu); > > > > + > > > > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > > > > msm_gpu_retire(gpu); > > > > > > > > @@ -2525,6 +2550,60 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > > > > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > > > > } > > > > > > > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > > > > +{ > > > > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > > > > + struct msm_gpu *gpu = &adreno_gpu->base; > > > > + u32 gpu_req = QCOM_SCM_GPU_ALWAYS_EN_REQ; > > > > + u32 fuse_val; > > > > + int ret; > > > > + > > > > + if (adreno_is_a740(adreno_gpu)) { > > > > + /* Raytracing is always enabled on a740 */ > > > > + adreno_gpu->has_ray_tracing = true; > > > > + } > > > > + > > > > + if (!qcom_scm_is_available()) { > > > > + /* Assume that if qcom scm isn't available, that whatever > > > > + * replacement allows writing the fuse register ourselves. > > > > + * Users of alternative firmware need to make sure this > > > > + * register is writeable or indicate that it's not somehow. > > > > + * Print a warning because if you mess this up you're about to > > > > + * crash horribly. > > > > + */ > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > + dev_warn_once(gpu->dev->dev, > > > > + "SCM is not available, poking fuse register\n"); > > > > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > > > > + adreno_gpu->has_ray_tracing = true; > > > > + } > > > > + > > > > + return 0; > > > > + } > > > > + > > > > + if (adreno_is_a750(adreno_gpu)) > > > > > > Most of the function is under the if (adreno_is_a750) conditions. Can > > > we invert the logic and add a single block of if(adreno_is_a750) and > > > then place all the code underneath? > > > > You mean to duplicate the qcom_scm_is_available check and qcom_scm_ > > > > > > > > > + gpu_req |= QCOM_SCM_GPU_TSENSE_EN_REQ; > > > > + > > > > + ret = qcom_scm_gpu_init_regs(gpu_req); > > > > + if (ret) > > > > + return ret; > > > > + > > > > + /* On a750 raytracing may be disabled by the firmware, find out whether > > > > + * that's the case. The scm call above sets the fuse register. > > > > + */ > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); > > > > > > This register isn't accessible with the current sm8650.dtsi. Since DT > > > and driver are going through different trees, please add safety guards > > > here, so that the driver doesn't crash if used with older dtsi > > > > I don't see how this is an issue. msm-next is currently based on 6.9, > > which doesn't have the GPU defined in sm8650.dtsi. AFAIK patches 1 and > > 2 will have to go through the linux-arm-msm tree, which will have to > > be merged into msm-next before this patch lands there, so there will > > never be any breakage. > > linux-arm-msm isn't going to be merged into msm-next. If we do not ask > for ack for the fix to go through msm-next, they will get these > patches in parallel. I'm not familiar with how complicated cross-tree changes like this get merged, but why would we merge these in parallel given that this patch depends on the previous patch that introduces qcom_scm_gpu_init_regs(), and that would (I assume?) normally go through the same tree as patch 1? Even if patch 1 gets merged in parallel in linux-arm-msm, in what scenario would we have a broken boot? You won't have a devicetree with a working sm8650 GPU and drm/msm with raytracing until linux-arm-msm is merged into msm-next at which point patch 1 will have landed somehow. > > Another option is to get dtsi fix into 6.9 and delay the raytracing > until 6.10-rc which doesn't make a lot of sense from my POV). > > > > > > (not to mention that dts is considered to be an ABI and newer kernels > > > are supposed not to break with older DT files). > > > > That policy only applies to released kernels, so that's irrelevant here. > > It applies to all kernels, the reason being pretty simple: git-bisect > should not be broken. As I wrote above, this is not an issue. The point I was making is that mixing and matching dtb's from one unmerged subsystem tree and a kernel from another isn't supported AFAIK, and that's the only scenario where this could break. Connor
On Fri, 26 Apr 2024 at 17:05, Connor Abbott <cwabbott0@gmail.com> wrote: > > On Fri, Apr 26, 2024 at 2:31 PM Dmitry Baryshkov > <dmitry.baryshkov@linaro.org> wrote: > > > > On Fri, 26 Apr 2024 at 15:35, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > On Fri, Apr 26, 2024 at 12:02 AM Dmitry Baryshkov > > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > > > On Thu, 25 Apr 2024 at 16:44, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > > > > > initialize cx_mem. Copy this from downstream (minus BCL which we > > > > > currently don't support). On a750, this includes a new "fuse" register > > > > > which can be used by qcom_scm to fuse off certain features like > > > > > raytracing in software. The fuse is default off, and is initialized by > > > > > calling the method. Afterwards we have to read it to find out which > > > > > features were enabled. > > > > > > > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > > > > --- > > > > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- > > > > > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + > > > > > 2 files changed, 90 insertions(+), 1 deletion(-) > > > > > > > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > index cf0b1de1c071..fb2722574ae5 100644 > > > > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > @@ -10,6 +10,7 @@ > > > > > > > > > > #include <linux/bitfield.h> > > > > > #include <linux/devfreq.h> > > > > > +#include <linux/firmware/qcom/qcom_scm.h> > > > > > #include <linux/pm_domain.h> > > > > > #include <linux/soc/qcom/llcc-qcom.h> > > > > > > > > > > @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) > > > > > A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ > > > > > A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ > > > > > A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ > > > > > - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) > > > > > + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ > > > > > + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > > > > > #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ > > > > > A6XX_CP_APRIV_CNTL_RBFETCH | \ > > > > > @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) > > > > > kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > > } > > > > > > > > > > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > > > > > +{ > > > > > + u32 status; > > > > > + > > > > > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > > > > > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > > > > > + > > > > > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > > > > > + > > > > > + /* Ignore FASTBLEND violations, because the HW will silently fall back > > > > > + * to legacy blending. > > > > > + */ > > > > > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > > > > > + del_timer(&gpu->hangcheck_timer); > > > > > + > > > > > + kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > > + } > > > > > +} > > > > > + > > > > > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > > { > > > > > struct msm_drm_private *priv = gpu->dev->dev_private; > > > > > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > > > > > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > > > > > > > > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > + a7xx_sw_fuse_violation_irq(gpu); > > > > > + > > > > > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > > > > > msm_gpu_retire(gpu); > > > > > > > > > > @@ -2525,6 +2550,60 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > > > > > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > > > > > } > > > > > > > > > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > > > > > +{ > > > > > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > > > > > + struct msm_gpu *gpu = &adreno_gpu->base; > > > > > + u32 gpu_req = QCOM_SCM_GPU_ALWAYS_EN_REQ; > > > > > + u32 fuse_val; > > > > > + int ret; > > > > > + > > > > > + if (adreno_is_a740(adreno_gpu)) { > > > > > + /* Raytracing is always enabled on a740 */ > > > > > + adreno_gpu->has_ray_tracing = true; > > > > > + } > > > > > + > > > > > + if (!qcom_scm_is_available()) { > > > > > + /* Assume that if qcom scm isn't available, that whatever > > > > > + * replacement allows writing the fuse register ourselves. > > > > > + * Users of alternative firmware need to make sure this > > > > > + * register is writeable or indicate that it's not somehow. > > > > > + * Print a warning because if you mess this up you're about to > > > > > + * crash horribly. > > > > > + */ > > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > > + dev_warn_once(gpu->dev->dev, > > > > > + "SCM is not available, poking fuse register\n"); > > > > > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > > > > > + adreno_gpu->has_ray_tracing = true; > > > > > + } > > > > > + > > > > > + return 0; > > > > > + } > > > > > + > > > > > + if (adreno_is_a750(adreno_gpu)) > > > > > > > > Most of the function is under the if (adreno_is_a750) conditions. Can > > > > we invert the logic and add a single block of if(adreno_is_a750) and > > > > then place all the code underneath? > > > > > > You mean to duplicate the qcom_scm_is_available check and qcom_scm_ > > > > > > > > > > > > + gpu_req |= QCOM_SCM_GPU_TSENSE_EN_REQ; > > > > > + > > > > > + ret = qcom_scm_gpu_init_regs(gpu_req); > > > > > + if (ret) > > > > > + return ret; > > > > > + > > > > > + /* On a750 raytracing may be disabled by the firmware, find out whether > > > > > + * that's the case. The scm call above sets the fuse register. > > > > > + */ > > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > > + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); > > > > > > > > This register isn't accessible with the current sm8650.dtsi. Since DT > > > > and driver are going through different trees, please add safety guards > > > > here, so that the driver doesn't crash if used with older dtsi > > > > > > I don't see how this is an issue. msm-next is currently based on 6.9, > > > which doesn't have the GPU defined in sm8650.dtsi. AFAIK patches 1 and > > > 2 will have to go through the linux-arm-msm tree, which will have to > > > be merged into msm-next before this patch lands there, so there will > > > never be any breakage. > > > > linux-arm-msm isn't going to be merged into msm-next. If we do not ask > > for ack for the fix to go through msm-next, they will get these > > patches in parallel. > > I'm not familiar with how complicated cross-tree changes like this get > merged, but why would we merge these in parallel given that this patch > depends on the previous patch that introduces > qcom_scm_gpu_init_regs(), and that would (I assume?) normally go > through the same tree as patch 1? Even if patch 1 gets merged in > parallel in linux-arm-msm, in what scenario would we have a broken > boot? You won't have a devicetree with a working sm8650 GPU and > drm/msm with raytracing until linux-arm-msm is merged into msm-next at > which point patch 1 will have landed somehow. arch/arm64/qcom/dts and drivers/firmware/qcom are two separate trees. So yes, this needs a lot of coordination. > > > > > Another option is to get dtsi fix into 6.9 and delay the raytracing > > until 6.10-rc which doesn't make a lot of sense from my POV). > > > > > > > > > (not to mention that dts is considered to be an ABI and newer kernels > > > > are supposed not to break with older DT files). > > > > > > That policy only applies to released kernels, so that's irrelevant here. > > > > It applies to all kernels, the reason being pretty simple: git-bisect > > should not be broken. > > As I wrote above, this is not an issue. The point I was making is that > mixing and matching dtb's from one unmerged subsystem tree and a > kernel from another isn't supported AFAIK, and that's the only > scenario where this could break. And it can happen if somebody running a bisect ends up in the branch with these patches in, but with the dtsi bits not being picked up.
On Fri, Apr 26, 2024 at 3:53 PM Dmitry Baryshkov <dmitry.baryshkov@linaro.org> wrote: > > On Fri, 26 Apr 2024 at 17:05, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > On Fri, Apr 26, 2024 at 2:31 PM Dmitry Baryshkov > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > On Fri, 26 Apr 2024 at 15:35, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > On Fri, Apr 26, 2024 at 12:02 AM Dmitry Baryshkov > > > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > > > > > On Thu, 25 Apr 2024 at 16:44, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > > > > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > > > > > > initialize cx_mem. Copy this from downstream (minus BCL which we > > > > > > currently don't support). On a750, this includes a new "fuse" register > > > > > > which can be used by qcom_scm to fuse off certain features like > > > > > > raytracing in software. The fuse is default off, and is initialized by > > > > > > calling the method. Afterwards we have to read it to find out which > > > > > > features were enabled. > > > > > > > > > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > > > > > --- > > > > > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- > > > > > > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + > > > > > > 2 files changed, 90 insertions(+), 1 deletion(-) > > > > > > > > > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > index cf0b1de1c071..fb2722574ae5 100644 > > > > > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > @@ -10,6 +10,7 @@ > > > > > > > > > > > > #include <linux/bitfield.h> > > > > > > #include <linux/devfreq.h> > > > > > > +#include <linux/firmware/qcom/qcom_scm.h> > > > > > > #include <linux/pm_domain.h> > > > > > > #include <linux/soc/qcom/llcc-qcom.h> > > > > > > > > > > > > @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) > > > > > > A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ > > > > > > A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ > > > > > > A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ > > > > > > - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) > > > > > > + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ > > > > > > + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > > > > > > > #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ > > > > > > A6XX_CP_APRIV_CNTL_RBFETCH | \ > > > > > > @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) > > > > > > kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > > > } > > > > > > > > > > > > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > > > > > > +{ > > > > > > + u32 status; > > > > > > + > > > > > > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > > > > > > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > > > > > > + > > > > > > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > > > > > > + > > > > > > + /* Ignore FASTBLEND violations, because the HW will silently fall back > > > > > > + * to legacy blending. > > > > > > + */ > > > > > > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > > > > > > + del_timer(&gpu->hangcheck_timer); > > > > > > + > > > > > > + kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > > > + } > > > > > > +} > > > > > > + > > > > > > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > > > { > > > > > > struct msm_drm_private *priv = gpu->dev->dev_private; > > > > > > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > > > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > > > > > > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > > > > > > > > > > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > + a7xx_sw_fuse_violation_irq(gpu); > > > > > > + > > > > > > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > > > > > > msm_gpu_retire(gpu); > > > > > > > > > > > > @@ -2525,6 +2550,60 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > > > > > > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > > > > > > } > > > > > > > > > > > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > > > > > > +{ > > > > > > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > > > > > > + struct msm_gpu *gpu = &adreno_gpu->base; > > > > > > + u32 gpu_req = QCOM_SCM_GPU_ALWAYS_EN_REQ; > > > > > > + u32 fuse_val; > > > > > > + int ret; > > > > > > + > > > > > > + if (adreno_is_a740(adreno_gpu)) { > > > > > > + /* Raytracing is always enabled on a740 */ > > > > > > + adreno_gpu->has_ray_tracing = true; > > > > > > + } > > > > > > + > > > > > > + if (!qcom_scm_is_available()) { > > > > > > + /* Assume that if qcom scm isn't available, that whatever > > > > > > + * replacement allows writing the fuse register ourselves. > > > > > > + * Users of alternative firmware need to make sure this > > > > > > + * register is writeable or indicate that it's not somehow. > > > > > > + * Print a warning because if you mess this up you're about to > > > > > > + * crash horribly. > > > > > > + */ > > > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > > > + dev_warn_once(gpu->dev->dev, > > > > > > + "SCM is not available, poking fuse register\n"); > > > > > > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > > > > > > + adreno_gpu->has_ray_tracing = true; > > > > > > + } > > > > > > + > > > > > > + return 0; > > > > > > + } > > > > > > + > > > > > > + if (adreno_is_a750(adreno_gpu)) > > > > > > > > > > Most of the function is under the if (adreno_is_a750) conditions. Can > > > > > we invert the logic and add a single block of if(adreno_is_a750) and > > > > > then place all the code underneath? > > > > > > > > You mean to duplicate the qcom_scm_is_available check and qcom_scm_ > > > > > > > > > > > > > > > + gpu_req |= QCOM_SCM_GPU_TSENSE_EN_REQ; > > > > > > + > > > > > > + ret = qcom_scm_gpu_init_regs(gpu_req); > > > > > > + if (ret) > > > > > > + return ret; > > > > > > + > > > > > > + /* On a750 raytracing may be disabled by the firmware, find out whether > > > > > > + * that's the case. The scm call above sets the fuse register. > > > > > > + */ > > > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > > > + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); > > > > > > > > > > This register isn't accessible with the current sm8650.dtsi. Since DT > > > > > and driver are going through different trees, please add safety guards > > > > > here, so that the driver doesn't crash if used with older dtsi > > > > > > > > I don't see how this is an issue. msm-next is currently based on 6.9, > > > > which doesn't have the GPU defined in sm8650.dtsi. AFAIK patches 1 and > > > > 2 will have to go through the linux-arm-msm tree, which will have to > > > > be merged into msm-next before this patch lands there, so there will > > > > never be any breakage. > > > > > > linux-arm-msm isn't going to be merged into msm-next. If we do not ask > > > for ack for the fix to go through msm-next, they will get these > > > patches in parallel. > > > > I'm not familiar with how complicated cross-tree changes like this get > > merged, but why would we merge these in parallel given that this patch > > depends on the previous patch that introduces > > qcom_scm_gpu_init_regs(), and that would (I assume?) normally go > > through the same tree as patch 1? Even if patch 1 gets merged in > > parallel in linux-arm-msm, in what scenario would we have a broken > > boot? You won't have a devicetree with a working sm8650 GPU and > > drm/msm with raytracing until linux-arm-msm is merged into msm-next at > > which point patch 1 will have landed somehow. > > arch/arm64/qcom/dts and drivers/firmware/qcom are two separate trees. > So yes, this needs a lot of coordination. > > > > > > > > > Another option is to get dtsi fix into 6.9 and delay the raytracing > > > until 6.10-rc which doesn't make a lot of sense from my POV). > > > > > > > > > > > > (not to mention that dts is considered to be an ABI and newer kernels > > > > > are supposed not to break with older DT files). > > > > > > > > That policy only applies to released kernels, so that's irrelevant here. > > > > > > It applies to all kernels, the reason being pretty simple: git-bisect > > > should not be broken. > > > > As I wrote above, this is not an issue. The point I was making is that > > mixing and matching dtb's from one unmerged subsystem tree and a > > kernel from another isn't supported AFAIK, and that's the only > > scenario where this could break. > > And it can happen if somebody running a bisect ends up in the branch > with these patches in, but with the dtsi bits not being picked up. That wouldn't be possible unless we merged the "bad" commit introducing the GPU node to sm8650.dtsi into msm-next but not the fix. So yeah, it's going to require a lot of careful cooperation but it should be possible to avoid that happening. Connor
On Fri, 26 Apr 2024 at 18:08, Connor Abbott <cwabbott0@gmail.com> wrote: > > On Fri, Apr 26, 2024 at 3:53 PM Dmitry Baryshkov > <dmitry.baryshkov@linaro.org> wrote: > > > > On Fri, 26 Apr 2024 at 17:05, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > On Fri, Apr 26, 2024 at 2:31 PM Dmitry Baryshkov > > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > > > On Fri, 26 Apr 2024 at 15:35, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > > > On Fri, Apr 26, 2024 at 12:02 AM Dmitry Baryshkov > > > > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > > > > > > > On Thu, 25 Apr 2024 at 16:44, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > > > > > > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > > > > > > > initialize cx_mem. Copy this from downstream (minus BCL which we > > > > > > > currently don't support). On a750, this includes a new "fuse" register > > > > > > > which can be used by qcom_scm to fuse off certain features like > > > > > > > raytracing in software. The fuse is default off, and is initialized by > > > > > > > calling the method. Afterwards we have to read it to find out which > > > > > > > features were enabled. > > > > > > > > > > > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > > > > > > --- > > > > > > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- > > > > > > > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + > > > > > > > 2 files changed, 90 insertions(+), 1 deletion(-) > > > > > > > > > > > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > > index cf0b1de1c071..fb2722574ae5 100644 > > > > > > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > > @@ -10,6 +10,7 @@ > > > > > > > > > > > > > > #include <linux/bitfield.h> > > > > > > > #include <linux/devfreq.h> > > > > > > > +#include <linux/firmware/qcom/qcom_scm.h> > > > > > > > #include <linux/pm_domain.h> > > > > > > > #include <linux/soc/qcom/llcc-qcom.h> > > > > > > > > > > > > > > @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) > > > > > > > A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ > > > > > > > A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ > > > > > > > A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ > > > > > > > - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) > > > > > > > + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ > > > > > > > + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > > > > > > > > > #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ > > > > > > > A6XX_CP_APRIV_CNTL_RBFETCH | \ > > > > > > > @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) > > > > > > > kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > > > > } > > > > > > > > > > > > > > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > > > > > > > +{ > > > > > > > + u32 status; > > > > > > > + > > > > > > > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > > > > > > > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > > > > > > > + > > > > > > > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > > > > > > > + > > > > > > > + /* Ignore FASTBLEND violations, because the HW will silently fall back > > > > > > > + * to legacy blending. > > > > > > > + */ > > > > > > > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > > > > > > > + del_timer(&gpu->hangcheck_timer); > > > > > > > + > > > > > > > + kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > > > > + } > > > > > > > +} > > > > > > > + > > > > > > > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > > > > { > > > > > > > struct msm_drm_private *priv = gpu->dev->dev_private; > > > > > > > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > > > > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > > > > > > > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > > > > > > > > > > > > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > > + a7xx_sw_fuse_violation_irq(gpu); > > > > > > > + > > > > > > > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > > > > > > > msm_gpu_retire(gpu); > > > > > > > > > > > > > > @@ -2525,6 +2550,60 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > > > > > > > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > > > > > > > } > > > > > > > > > > > > > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > > > > > > > +{ > > > > > > > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > > > > > > > + struct msm_gpu *gpu = &adreno_gpu->base; > > > > > > > + u32 gpu_req = QCOM_SCM_GPU_ALWAYS_EN_REQ; > > > > > > > + u32 fuse_val; > > > > > > > + int ret; > > > > > > > + > > > > > > > + if (adreno_is_a740(adreno_gpu)) { > > > > > > > + /* Raytracing is always enabled on a740 */ > > > > > > > + adreno_gpu->has_ray_tracing = true; > > > > > > > + } > > > > > > > + > > > > > > > + if (!qcom_scm_is_available()) { > > > > > > > + /* Assume that if qcom scm isn't available, that whatever > > > > > > > + * replacement allows writing the fuse register ourselves. > > > > > > > + * Users of alternative firmware need to make sure this > > > > > > > + * register is writeable or indicate that it's not somehow. > > > > > > > + * Print a warning because if you mess this up you're about to > > > > > > > + * crash horribly. > > > > > > > + */ > > > > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > > > > + dev_warn_once(gpu->dev->dev, > > > > > > > + "SCM is not available, poking fuse register\n"); > > > > > > > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > > > > > > > + adreno_gpu->has_ray_tracing = true; > > > > > > > + } > > > > > > > + > > > > > > > + return 0; > > > > > > > + } > > > > > > > + > > > > > > > + if (adreno_is_a750(adreno_gpu)) > > > > > > > > > > > > Most of the function is under the if (adreno_is_a750) conditions. Can > > > > > > we invert the logic and add a single block of if(adreno_is_a750) and > > > > > > then place all the code underneath? > > > > > > > > > > You mean to duplicate the qcom_scm_is_available check and qcom_scm_ > > > > > > > > > > > > > > > > > > + gpu_req |= QCOM_SCM_GPU_TSENSE_EN_REQ; > > > > > > > + > > > > > > > + ret = qcom_scm_gpu_init_regs(gpu_req); > > > > > > > + if (ret) > > > > > > > + return ret; > > > > > > > + > > > > > > > + /* On a750 raytracing may be disabled by the firmware, find out whether > > > > > > > + * that's the case. The scm call above sets the fuse register. > > > > > > > + */ > > > > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > > > > + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); > > > > > > > > > > > > This register isn't accessible with the current sm8650.dtsi. Since DT > > > > > > and driver are going through different trees, please add safety guards > > > > > > here, so that the driver doesn't crash if used with older dtsi > > > > > > > > > > I don't see how this is an issue. msm-next is currently based on 6.9, > > > > > which doesn't have the GPU defined in sm8650.dtsi. AFAIK patches 1 and > > > > > 2 will have to go through the linux-arm-msm tree, which will have to > > > > > be merged into msm-next before this patch lands there, so there will > > > > > never be any breakage. > > > > > > > > linux-arm-msm isn't going to be merged into msm-next. If we do not ask > > > > for ack for the fix to go through msm-next, they will get these > > > > patches in parallel. > > > > > > I'm not familiar with how complicated cross-tree changes like this get > > > merged, but why would we merge these in parallel given that this patch > > > depends on the previous patch that introduces > > > qcom_scm_gpu_init_regs(), and that would (I assume?) normally go > > > through the same tree as patch 1? Even if patch 1 gets merged in > > > parallel in linux-arm-msm, in what scenario would we have a broken > > > boot? You won't have a devicetree with a working sm8650 GPU and > > > drm/msm with raytracing until linux-arm-msm is merged into msm-next at > > > which point patch 1 will have landed somehow. > > > > arch/arm64/qcom/dts and drivers/firmware/qcom are two separate trees. > > So yes, this needs a lot of coordination. > > > > > > > > > > > > > > > > Another option is to get dtsi fix into 6.9 and delay the raytracing > > > > until 6.10-rc which doesn't make a lot of sense from my POV). > > > > > > > > > > > > > > > (not to mention that dts is considered to be an ABI and newer kernels > > > > > > are supposed not to break with older DT files). > > > > > > > > > > That policy only applies to released kernels, so that's irrelevant here. > > > > > > > > It applies to all kernels, the reason being pretty simple: git-bisect > > > > should not be broken. > > > > > > As I wrote above, this is not an issue. The point I was making is that > > > mixing and matching dtb's from one unmerged subsystem tree and a > > > kernel from another isn't supported AFAIK, and that's the only > > > scenario where this could break. > > > > And it can happen if somebody running a bisect ends up in the branch > > with these patches in, but with the dtsi bits not being picked up. > > That wouldn't be possible unless we merged the "bad" commit > introducing the GPU node to sm8650.dtsi into msm-next but not the fix. > So yeah, it's going to require a lot of careful cooperation but it > should be possible to avoid that happening. Well, the GPU node is already there in the linux-next. Anyway. Please. Don't break compat with old DTS. That is a rule of thumb.
On Fri, Apr 26, 2024 at 4:24 PM Dmitry Baryshkov <dmitry.baryshkov@linaro.org> wrote: > > On Fri, 26 Apr 2024 at 18:08, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > On Fri, Apr 26, 2024 at 3:53 PM Dmitry Baryshkov > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > On Fri, 26 Apr 2024 at 17:05, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > On Fri, Apr 26, 2024 at 2:31 PM Dmitry Baryshkov > > > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > > > > > On Fri, 26 Apr 2024 at 15:35, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > > > > > On Fri, Apr 26, 2024 at 12:02 AM Dmitry Baryshkov > > > > > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > > > > > > > > > On Thu, 25 Apr 2024 at 16:44, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > > > > > > > > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > > > > > > > > initialize cx_mem. Copy this from downstream (minus BCL which we > > > > > > > > currently don't support). On a750, this includes a new "fuse" register > > > > > > > > which can be used by qcom_scm to fuse off certain features like > > > > > > > > raytracing in software. The fuse is default off, and is initialized by > > > > > > > > calling the method. Afterwards we have to read it to find out which > > > > > > > > features were enabled. > > > > > > > > > > > > > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > > > > > > > --- > > > > > > > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- > > > > > > > > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + > > > > > > > > 2 files changed, 90 insertions(+), 1 deletion(-) > > > > > > > > > > > > > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > > > index cf0b1de1c071..fb2722574ae5 100644 > > > > > > > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > > > @@ -10,6 +10,7 @@ > > > > > > > > > > > > > > > > #include <linux/bitfield.h> > > > > > > > > #include <linux/devfreq.h> > > > > > > > > +#include <linux/firmware/qcom/qcom_scm.h> > > > > > > > > #include <linux/pm_domain.h> > > > > > > > > #include <linux/soc/qcom/llcc-qcom.h> > > > > > > > > > > > > > > > > @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) > > > > > > > > A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ > > > > > > > > A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ > > > > > > > > A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ > > > > > > > > - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) > > > > > > > > + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ > > > > > > > > + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > > > > > > > > > > > #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ > > > > > > > > A6XX_CP_APRIV_CNTL_RBFETCH | \ > > > > > > > > @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) > > > > > > > > kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > > > > > } > > > > > > > > > > > > > > > > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > > > > > > > > +{ > > > > > > > > + u32 status; > > > > > > > > + > > > > > > > > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > > > > > > > > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > > > > > > > > + > > > > > > > > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > > > > > > > > + > > > > > > > > + /* Ignore FASTBLEND violations, because the HW will silently fall back > > > > > > > > + * to legacy blending. > > > > > > > > + */ > > > > > > > > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > > > > > > > > + del_timer(&gpu->hangcheck_timer); > > > > > > > > + > > > > > > > > + kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > > > > > + } > > > > > > > > +} > > > > > > > > + > > > > > > > > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > > > > > { > > > > > > > > struct msm_drm_private *priv = gpu->dev->dev_private; > > > > > > > > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > > > > > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > > > > > > > > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > > > > > > > > > > > > > > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > > > + a7xx_sw_fuse_violation_irq(gpu); > > > > > > > > + > > > > > > > > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > > > > > > > > msm_gpu_retire(gpu); > > > > > > > > > > > > > > > > @@ -2525,6 +2550,60 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > > > > > > > > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > > > > > > > > } > > > > > > > > > > > > > > > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > > > > > > > > +{ > > > > > > > > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > > > > > > > > + struct msm_gpu *gpu = &adreno_gpu->base; > > > > > > > > + u32 gpu_req = QCOM_SCM_GPU_ALWAYS_EN_REQ; > > > > > > > > + u32 fuse_val; > > > > > > > > + int ret; > > > > > > > > + > > > > > > > > + if (adreno_is_a740(adreno_gpu)) { > > > > > > > > + /* Raytracing is always enabled on a740 */ > > > > > > > > + adreno_gpu->has_ray_tracing = true; > > > > > > > > + } > > > > > > > > + > > > > > > > > + if (!qcom_scm_is_available()) { > > > > > > > > + /* Assume that if qcom scm isn't available, that whatever > > > > > > > > + * replacement allows writing the fuse register ourselves. > > > > > > > > + * Users of alternative firmware need to make sure this > > > > > > > > + * register is writeable or indicate that it's not somehow. > > > > > > > > + * Print a warning because if you mess this up you're about to > > > > > > > > + * crash horribly. > > > > > > > > + */ > > > > > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > > > > > + dev_warn_once(gpu->dev->dev, > > > > > > > > + "SCM is not available, poking fuse register\n"); > > > > > > > > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > > > > > > > > + adreno_gpu->has_ray_tracing = true; > > > > > > > > + } > > > > > > > > + > > > > > > > > + return 0; > > > > > > > > + } > > > > > > > > + > > > > > > > > + if (adreno_is_a750(adreno_gpu)) > > > > > > > > > > > > > > Most of the function is under the if (adreno_is_a750) conditions. Can > > > > > > > we invert the logic and add a single block of if(adreno_is_a750) and > > > > > > > then place all the code underneath? > > > > > > > > > > > > You mean to duplicate the qcom_scm_is_available check and qcom_scm_ > > > > > > > > > > > > > > > > > > > > > + gpu_req |= QCOM_SCM_GPU_TSENSE_EN_REQ; > > > > > > > > + > > > > > > > > + ret = qcom_scm_gpu_init_regs(gpu_req); > > > > > > > > + if (ret) > > > > > > > > + return ret; > > > > > > > > + > > > > > > > > + /* On a750 raytracing may be disabled by the firmware, find out whether > > > > > > > > + * that's the case. The scm call above sets the fuse register. > > > > > > > > + */ > > > > > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > > > > > + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); > > > > > > > > > > > > > > This register isn't accessible with the current sm8650.dtsi. Since DT > > > > > > > and driver are going through different trees, please add safety guards > > > > > > > here, so that the driver doesn't crash if used with older dtsi > > > > > > > > > > > > I don't see how this is an issue. msm-next is currently based on 6.9, > > > > > > which doesn't have the GPU defined in sm8650.dtsi. AFAIK patches 1 and > > > > > > 2 will have to go through the linux-arm-msm tree, which will have to > > > > > > be merged into msm-next before this patch lands there, so there will > > > > > > never be any breakage. > > > > > > > > > > linux-arm-msm isn't going to be merged into msm-next. If we do not ask > > > > > for ack for the fix to go through msm-next, they will get these > > > > > patches in parallel. > > > > > > > > I'm not familiar with how complicated cross-tree changes like this get > > > > merged, but why would we merge these in parallel given that this patch > > > > depends on the previous patch that introduces > > > > qcom_scm_gpu_init_regs(), and that would (I assume?) normally go > > > > through the same tree as patch 1? Even if patch 1 gets merged in > > > > parallel in linux-arm-msm, in what scenario would we have a broken > > > > boot? You won't have a devicetree with a working sm8650 GPU and > > > > drm/msm with raytracing until linux-arm-msm is merged into msm-next at > > > > which point patch 1 will have landed somehow. > > > > > > arch/arm64/qcom/dts and drivers/firmware/qcom are two separate trees. > > > So yes, this needs a lot of coordination. > > > > > > > > > > > > > > > > > > > > > > > Another option is to get dtsi fix into 6.9 and delay the raytracing > > > > > until 6.10-rc which doesn't make a lot of sense from my POV). > > > > > > > > > > > > > > > > > > (not to mention that dts is considered to be an ABI and newer kernels > > > > > > > are supposed not to break with older DT files). > > > > > > > > > > > > That policy only applies to released kernels, so that's irrelevant here. > > > > > > > > > > It applies to all kernels, the reason being pretty simple: git-bisect > > > > > should not be broken. > > > > > > > > As I wrote above, this is not an issue. The point I was making is that > > > > mixing and matching dtb's from one unmerged subsystem tree and a > > > > kernel from another isn't supported AFAIK, and that's the only > > > > scenario where this could break. > > > > > > And it can happen if somebody running a bisect ends up in the branch > > > with these patches in, but with the dtsi bits not being picked up. > > > > That wouldn't be possible unless we merged the "bad" commit > > introducing the GPU node to sm8650.dtsi into msm-next but not the fix. > > So yeah, it's going to require a lot of careful cooperation but it > > should be possible to avoid that happening. > > Well, the GPU node is already there in the linux-next. And? As long as the devicetree fix lands first, linux-next will never be broken. > Anyway. Please. Don't break compat with old DTS. That is a rule of thumb. It's exactly that, a rule of thumb. This is obviously a bit of an exceptional case, and you haven't articulated any reason why we should follow it in this case when there's an obvious reason not to. Connor
On Fri, 26 Apr 2024 at 18:36, Connor Abbott <cwabbott0@gmail.com> wrote: > > On Fri, Apr 26, 2024 at 4:24 PM Dmitry Baryshkov > <dmitry.baryshkov@linaro.org> wrote: > > > > On Fri, 26 Apr 2024 at 18:08, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > On Fri, Apr 26, 2024 at 3:53 PM Dmitry Baryshkov > > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > > > On Fri, 26 Apr 2024 at 17:05, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > > > On Fri, Apr 26, 2024 at 2:31 PM Dmitry Baryshkov > > > > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > > > > > > > On Fri, 26 Apr 2024 at 15:35, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > > > > > > > On Fri, Apr 26, 2024 at 12:02 AM Dmitry Baryshkov > > > > > > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > > > > > > > > > > > On Thu, 25 Apr 2024 at 16:44, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > > > > > > > > > > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > > > > > > > > > initialize cx_mem. Copy this from downstream (minus BCL which we > > > > > > > > > currently don't support). On a750, this includes a new "fuse" register > > > > > > > > > which can be used by qcom_scm to fuse off certain features like > > > > > > > > > raytracing in software. The fuse is default off, and is initialized by > > > > > > > > > calling the method. Afterwards we have to read it to find out which > > > > > > > > > features were enabled. > > > > > > > > > > > > > > > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > > > > > > > > --- > > > > > > > > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- > > > > > > > > > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + > > > > > > > > > 2 files changed, 90 insertions(+), 1 deletion(-) > > > > > > > > > > > > > > > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > > > > index cf0b1de1c071..fb2722574ae5 100644 > > > > > > > > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > > > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > > > > @@ -10,6 +10,7 @@ > > > > > > > > > > > > > > > > > > #include <linux/bitfield.h> > > > > > > > > > #include <linux/devfreq.h> > > > > > > > > > +#include <linux/firmware/qcom/qcom_scm.h> > > > > > > > > > #include <linux/pm_domain.h> > > > > > > > > > #include <linux/soc/qcom/llcc-qcom.h> > > > > > > > > > > > > > > > > > > @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) > > > > > > > > > A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ > > > > > > > > > A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ > > > > > > > > > A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ > > > > > > > > > - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) > > > > > > > > > + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ > > > > > > > > > + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > > > > > > > > > > > > > #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ > > > > > > > > > A6XX_CP_APRIV_CNTL_RBFETCH | \ > > > > > > > > > @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) > > > > > > > > > kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > > > > > > } > > > > > > > > > > > > > > > > > > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > > > > > > > > > +{ > > > > > > > > > + u32 status; > > > > > > > > > + > > > > > > > > > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > > > > > > > > > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > > > > > > > > > + > > > > > > > > > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > > > > > > > > > + > > > > > > > > > + /* Ignore FASTBLEND violations, because the HW will silently fall back > > > > > > > > > + * to legacy blending. > > > > > > > > > + */ > > > > > > > > > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > > > > > > > > > + del_timer(&gpu->hangcheck_timer); > > > > > > > > > + > > > > > > > > > + kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > > > > > > + } > > > > > > > > > +} > > > > > > > > > + > > > > > > > > > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > > > > > > { > > > > > > > > > struct msm_drm_private *priv = gpu->dev->dev_private; > > > > > > > > > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > > > > > > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > > > > > > > > > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > > > > > > > > > > > > > > > > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > > > > + a7xx_sw_fuse_violation_irq(gpu); > > > > > > > > > + > > > > > > > > > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > > > > > > > > > msm_gpu_retire(gpu); > > > > > > > > > > > > > > > > > > @@ -2525,6 +2550,60 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > > > > > > > > > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > > > > > > > > > } > > > > > > > > > > > > > > > > > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > > > > > > > > > +{ > > > > > > > > > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > > > > > > > > > + struct msm_gpu *gpu = &adreno_gpu->base; > > > > > > > > > + u32 gpu_req = QCOM_SCM_GPU_ALWAYS_EN_REQ; > > > > > > > > > + u32 fuse_val; > > > > > > > > > + int ret; > > > > > > > > > + > > > > > > > > > + if (adreno_is_a740(adreno_gpu)) { > > > > > > > > > + /* Raytracing is always enabled on a740 */ > > > > > > > > > + adreno_gpu->has_ray_tracing = true; > > > > > > > > > + } > > > > > > > > > + > > > > > > > > > + if (!qcom_scm_is_available()) { > > > > > > > > > + /* Assume that if qcom scm isn't available, that whatever > > > > > > > > > + * replacement allows writing the fuse register ourselves. > > > > > > > > > + * Users of alternative firmware need to make sure this > > > > > > > > > + * register is writeable or indicate that it's not somehow. > > > > > > > > > + * Print a warning because if you mess this up you're about to > > > > > > > > > + * crash horribly. > > > > > > > > > + */ > > > > > > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > > > > > > + dev_warn_once(gpu->dev->dev, > > > > > > > > > + "SCM is not available, poking fuse register\n"); > > > > > > > > > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > > > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > > > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > > > > > > > > > + adreno_gpu->has_ray_tracing = true; > > > > > > > > > + } > > > > > > > > > + > > > > > > > > > + return 0; > > > > > > > > > + } > > > > > > > > > + > > > > > > > > > + if (adreno_is_a750(adreno_gpu)) > > > > > > > > > > > > > > > > Most of the function is under the if (adreno_is_a750) conditions. Can > > > > > > > > we invert the logic and add a single block of if(adreno_is_a750) and > > > > > > > > then place all the code underneath? > > > > > > > > > > > > > > You mean to duplicate the qcom_scm_is_available check and qcom_scm_ > > > > > > > > > > > > > > > > > > > > > > > > + gpu_req |= QCOM_SCM_GPU_TSENSE_EN_REQ; > > > > > > > > > + > > > > > > > > > + ret = qcom_scm_gpu_init_regs(gpu_req); > > > > > > > > > + if (ret) > > > > > > > > > + return ret; > > > > > > > > > + > > > > > > > > > + /* On a750 raytracing may be disabled by the firmware, find out whether > > > > > > > > > + * that's the case. The scm call above sets the fuse register. > > > > > > > > > + */ > > > > > > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > > > > > > + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); > > > > > > > > > > > > > > > > This register isn't accessible with the current sm8650.dtsi. Since DT > > > > > > > > and driver are going through different trees, please add safety guards > > > > > > > > here, so that the driver doesn't crash if used with older dtsi > > > > > > > > > > > > > > I don't see how this is an issue. msm-next is currently based on 6.9, > > > > > > > which doesn't have the GPU defined in sm8650.dtsi. AFAIK patches 1 and > > > > > > > 2 will have to go through the linux-arm-msm tree, which will have to > > > > > > > be merged into msm-next before this patch lands there, so there will > > > > > > > never be any breakage. > > > > > > > > > > > > linux-arm-msm isn't going to be merged into msm-next. If we do not ask > > > > > > for ack for the fix to go through msm-next, they will get these > > > > > > patches in parallel. > > > > > > > > > > I'm not familiar with how complicated cross-tree changes like this get > > > > > merged, but why would we merge these in parallel given that this patch > > > > > depends on the previous patch that introduces > > > > > qcom_scm_gpu_init_regs(), and that would (I assume?) normally go > > > > > through the same tree as patch 1? Even if patch 1 gets merged in > > > > > parallel in linux-arm-msm, in what scenario would we have a broken > > > > > boot? You won't have a devicetree with a working sm8650 GPU and > > > > > drm/msm with raytracing until linux-arm-msm is merged into msm-next at > > > > > which point patch 1 will have landed somehow. > > > > > > > > arch/arm64/qcom/dts and drivers/firmware/qcom are two separate trees. > > > > So yes, this needs a lot of coordination. > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > Another option is to get dtsi fix into 6.9 and delay the raytracing > > > > > > until 6.10-rc which doesn't make a lot of sense from my POV). > > > > > > > > > > > > > > > > > > > > > (not to mention that dts is considered to be an ABI and newer kernels > > > > > > > > are supposed not to break with older DT files). > > > > > > > > > > > > > > That policy only applies to released kernels, so that's irrelevant here. > > > > > > > > > > > > It applies to all kernels, the reason being pretty simple: git-bisect > > > > > > should not be broken. > > > > > > > > > > As I wrote above, this is not an issue. The point I was making is that > > > > > mixing and matching dtb's from one unmerged subsystem tree and a > > > > > kernel from another isn't supported AFAIK, and that's the only > > > > > scenario where this could break. > > > > > > > > And it can happen if somebody running a bisect ends up in the branch > > > > with these patches in, but with the dtsi bits not being picked up. > > > > > > That wouldn't be possible unless we merged the "bad" commit > > > introducing the GPU node to sm8650.dtsi into msm-next but not the fix. > > > So yeah, it's going to require a lot of careful cooperation but it > > > should be possible to avoid that happening. > > > > Well, the GPU node is already there in the linux-next. > > And? As long as the devicetree fix lands first, linux-next will never be broken. So we need to land dtsi for 6.10 and delay the drm/msm changes for 6.11. If that's fine with you and Bjorn, I'm ok with that. > > > Anyway. Please. Don't break compat with old DTS. That is a rule of thumb. > > It's exactly that, a rule of thumb. This is obviously a bit of an > exceptional case, and you haven't articulated any reason why we should > follow it in this case when there's an obvious reason not to.
On Fri, Apr 26, 2024 at 8:24 AM Dmitry Baryshkov <dmitry.baryshkov@linaro.org> wrote: > > On Fri, 26 Apr 2024 at 18:08, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > On Fri, Apr 26, 2024 at 3:53 PM Dmitry Baryshkov > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > On Fri, 26 Apr 2024 at 17:05, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > On Fri, Apr 26, 2024 at 2:31 PM Dmitry Baryshkov > > > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > > > > > On Fri, 26 Apr 2024 at 15:35, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > > > > > On Fri, Apr 26, 2024 at 12:02 AM Dmitry Baryshkov > > > > > > <dmitry.baryshkov@linaro.org> wrote: > > > > > > > > > > > > > > On Thu, 25 Apr 2024 at 16:44, Connor Abbott <cwabbott0@gmail.com> wrote: > > > > > > > > > > > > > > > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > > > > > > > > initialize cx_mem. Copy this from downstream (minus BCL which we > > > > > > > > currently don't support). On a750, this includes a new "fuse" register > > > > > > > > which can be used by qcom_scm to fuse off certain features like > > > > > > > > raytracing in software. The fuse is default off, and is initialized by > > > > > > > > calling the method. Afterwards we have to read it to find out which > > > > > > > > features were enabled. > > > > > > > > > > > > > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > > > > > > > --- > > > > > > > > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- > > > > > > > > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + > > > > > > > > 2 files changed, 90 insertions(+), 1 deletion(-) > > > > > > > > > > > > > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > > > index cf0b1de1c071..fb2722574ae5 100644 > > > > > > > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > > > > > > > > @@ -10,6 +10,7 @@ > > > > > > > > > > > > > > > > #include <linux/bitfield.h> > > > > > > > > #include <linux/devfreq.h> > > > > > > > > +#include <linux/firmware/qcom/qcom_scm.h> > > > > > > > > #include <linux/pm_domain.h> > > > > > > > > #include <linux/soc/qcom/llcc-qcom.h> > > > > > > > > > > > > > > > > @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) > > > > > > > > A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ > > > > > > > > A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ > > > > > > > > A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ > > > > > > > > - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) > > > > > > > > + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ > > > > > > > > + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > > > > > > > > > > > #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ > > > > > > > > A6XX_CP_APRIV_CNTL_RBFETCH | \ > > > > > > > > @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) > > > > > > > > kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > > > > > } > > > > > > > > > > > > > > > > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > > > > > > > > +{ > > > > > > > > + u32 status; > > > > > > > > + > > > > > > > > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > > > > > > > > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > > > > > > > > + > > > > > > > > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > > > > > > > > + > > > > > > > > + /* Ignore FASTBLEND violations, because the HW will silently fall back > > > > > > > > + * to legacy blending. > > > > > > > > + */ > > > > > > > > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > > > > > > > > + del_timer(&gpu->hangcheck_timer); > > > > > > > > + > > > > > > > > + kthread_queue_work(gpu->worker, &gpu->recover_work); > > > > > > > > + } > > > > > > > > +} > > > > > > > > + > > > > > > > > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > > > > > { > > > > > > > > struct msm_drm_private *priv = gpu->dev->dev_private; > > > > > > > > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > > > > > > > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > > > > > > > > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > > > > > > > > > > > > > > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > > > > > > > + a7xx_sw_fuse_violation_irq(gpu); > > > > > > > > + > > > > > > > > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > > > > > > > > msm_gpu_retire(gpu); > > > > > > > > > > > > > > > > @@ -2525,6 +2550,60 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > > > > > > > > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > > > > > > > > } > > > > > > > > > > > > > > > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > > > > > > > > +{ > > > > > > > > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > > > > > > > > + struct msm_gpu *gpu = &adreno_gpu->base; > > > > > > > > + u32 gpu_req = QCOM_SCM_GPU_ALWAYS_EN_REQ; > > > > > > > > + u32 fuse_val; > > > > > > > > + int ret; > > > > > > > > + > > > > > > > > + if (adreno_is_a740(adreno_gpu)) { > > > > > > > > + /* Raytracing is always enabled on a740 */ > > > > > > > > + adreno_gpu->has_ray_tracing = true; > > > > > > > > + } > > > > > > > > + > > > > > > > > + if (!qcom_scm_is_available()) { > > > > > > > > + /* Assume that if qcom scm isn't available, that whatever > > > > > > > > + * replacement allows writing the fuse register ourselves. > > > > > > > > + * Users of alternative firmware need to make sure this > > > > > > > > + * register is writeable or indicate that it's not somehow. > > > > > > > > + * Print a warning because if you mess this up you're about to > > > > > > > > + * crash horribly. > > > > > > > > + */ > > > > > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > > > > > + dev_warn_once(gpu->dev->dev, > > > > > > > > + "SCM is not available, poking fuse register\n"); > > > > > > > > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > > > > > > > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > > > > > > > > + adreno_gpu->has_ray_tracing = true; > > > > > > > > + } > > > > > > > > + > > > > > > > > + return 0; > > > > > > > > + } > > > > > > > > + > > > > > > > > + if (adreno_is_a750(adreno_gpu)) > > > > > > > > > > > > > > Most of the function is under the if (adreno_is_a750) conditions. Can > > > > > > > we invert the logic and add a single block of if(adreno_is_a750) and > > > > > > > then place all the code underneath? > > > > > > > > > > > > You mean to duplicate the qcom_scm_is_available check and qcom_scm_ > > > > > > > > > > > > > > > > > > > > > + gpu_req |= QCOM_SCM_GPU_TSENSE_EN_REQ; > > > > > > > > + > > > > > > > > + ret = qcom_scm_gpu_init_regs(gpu_req); > > > > > > > > + if (ret) > > > > > > > > + return ret; > > > > > > > > + > > > > > > > > + /* On a750 raytracing may be disabled by the firmware, find out whether > > > > > > > > + * that's the case. The scm call above sets the fuse register. > > > > > > > > + */ > > > > > > > > + if (adreno_is_a750(adreno_gpu)) { > > > > > > > > + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); > > > > > > > > > > > > > > This register isn't accessible with the current sm8650.dtsi. Since DT > > > > > > > and driver are going through different trees, please add safety guards > > > > > > > here, so that the driver doesn't crash if used with older dtsi > > > > > > > > > > > > I don't see how this is an issue. msm-next is currently based on 6.9, > > > > > > which doesn't have the GPU defined in sm8650.dtsi. AFAIK patches 1 and > > > > > > 2 will have to go through the linux-arm-msm tree, which will have to > > > > > > be merged into msm-next before this patch lands there, so there will > > > > > > never be any breakage. > > > > > > > > > > linux-arm-msm isn't going to be merged into msm-next. If we do not ask > > > > > for ack for the fix to go through msm-next, they will get these > > > > > patches in parallel. > > > > > > > > I'm not familiar with how complicated cross-tree changes like this get > > > > merged, but why would we merge these in parallel given that this patch > > > > depends on the previous patch that introduces > > > > qcom_scm_gpu_init_regs(), and that would (I assume?) normally go > > > > through the same tree as patch 1? Even if patch 1 gets merged in > > > > parallel in linux-arm-msm, in what scenario would we have a broken > > > > boot? You won't have a devicetree with a working sm8650 GPU and > > > > drm/msm with raytracing until linux-arm-msm is merged into msm-next at > > > > which point patch 1 will have landed somehow. > > > > > > arch/arm64/qcom/dts and drivers/firmware/qcom are two separate trees. > > > So yes, this needs a lot of coordination. > > > > > > > > > > > > > > > > > > > > > > > Another option is to get dtsi fix into 6.9 and delay the raytracing > > > > > until 6.10-rc which doesn't make a lot of sense from my POV). > > > > > > > > > > > > > > > > > > (not to mention that dts is considered to be an ABI and newer kernels > > > > > > > are supposed not to break with older DT files). > > > > > > > > > > > > That policy only applies to released kernels, so that's irrelevant here. > > > > > > > > > > It applies to all kernels, the reason being pretty simple: git-bisect > > > > > should not be broken. > > > > > > > > As I wrote above, this is not an issue. The point I was making is that > > > > mixing and matching dtb's from one unmerged subsystem tree and a > > > > kernel from another isn't supported AFAIK, and that's the only > > > > scenario where this could break. > > > > > > And it can happen if somebody running a bisect ends up in the branch > > > with these patches in, but with the dtsi bits not being picked up. > > > > That wouldn't be possible unless we merged the "bad" commit > > introducing the GPU node to sm8650.dtsi into msm-next but not the fix. > > So yeah, it's going to require a lot of careful cooperation but it > > should be possible to avoid that happening. > > Well, the GPU node is already there in the linux-next. > > Anyway. Please. Don't break compat with old DTS. That is a rule of thumb. > +Bjorn, since that is who we need to coordinate with, on two points 1) fix for sm8650.dtsi gpu node.. the gpu node is in linux-next, but not yet (AFAICT) in any pull req. So we just ask Bjorn to land the gpu node fix from this series before sending his DT pull req. Problem solved. Either drm-next gets pulled first, in which case the dt node doesn't even exist yet, or the dt is pulled with the fix before drm-next is. 2) the scm dependency.. looks like there are these in-flight scm patches: [1/4] firmware: qcom: scm: Remove log reporting memory allocation failure commit: 3de990f7895906a7a18d2dff63e3e525acaa4ecc [2/4] firmware: scm: Remove redundant scm argument from qcom_scm_waitq_wakeup() commit: 000636d91d605f6209a635a29d0487af5b12b237 [3/4] firmware: qcom: scm: Rework dload mode availability check commit: 398a4c58f3f29ac3ff4d777dc91fe40a07bbca8c [4/4] firmware: qcom: scm: Fix __scm and waitq completion variable initialization commit: 2e4955167ec5c04534cebea9e8273a907e7a75e1 [1/1] firmware: qcom: scm: Modify only the download bits in TCSR register commit: b9718298e028f9edbe0fcdf48c02a1c355409410 Those don't look like they should conflict with [2/6] firmware: qcom_scm: Add gpu_init_regs call... so maybe we could get an a-b for landing that patch via msm-next. BR, -R
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index cf0b1de1c071..fb2722574ae5 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -10,6 +10,7 @@ #include <linux/bitfield.h> #include <linux/devfreq.h> +#include <linux/firmware/qcom/qcom_scm.h> #include <linux/pm_domain.h> #include <linux/soc/qcom/llcc-qcom.h> @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ A6XX_CP_APRIV_CNTL_RBFETCH | \ @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) kthread_queue_work(gpu->worker, &gpu->recover_work); } +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) +{ + u32 status; + + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); + + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); + + /* Ignore FASTBLEND violations, because the HW will silently fall back + * to legacy blending. + */ + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { + del_timer(&gpu->hangcheck_timer); + + kthread_queue_work(gpu->worker, &gpu->recover_work); + } +} + static irqreturn_t a6xx_irq(struct msm_gpu *gpu) { struct msm_drm_private *priv = gpu->dev->dev_private; @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) + a7xx_sw_fuse_violation_irq(gpu); + if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) msm_gpu_retire(gpu); @@ -2525,6 +2550,60 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); } +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) +{ + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + u32 gpu_req = QCOM_SCM_GPU_ALWAYS_EN_REQ; + u32 fuse_val; + int ret; + + if (adreno_is_a740(adreno_gpu)) { + /* Raytracing is always enabled on a740 */ + adreno_gpu->has_ray_tracing = true; + } + + if (!qcom_scm_is_available()) { + /* Assume that if qcom scm isn't available, that whatever + * replacement allows writing the fuse register ourselves. + * Users of alternative firmware need to make sure this + * register is writeable or indicate that it's not somehow. + * Print a warning because if you mess this up you're about to + * crash horribly. + */ + if (adreno_is_a750(adreno_gpu)) { + dev_warn_once(gpu->dev->dev, + "SCM is not available, poking fuse register\n"); + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); + adreno_gpu->has_ray_tracing = true; + } + + return 0; + } + + if (adreno_is_a750(adreno_gpu)) + gpu_req |= QCOM_SCM_GPU_TSENSE_EN_REQ; + + ret = qcom_scm_gpu_init_regs(gpu_req); + if (ret) + return ret; + + /* On a750 raytracing may be disabled by the firmware, find out whether + * that's the case. The scm call above sets the fuse register. + */ + if (adreno_is_a750(adreno_gpu)) { + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); + adreno_gpu->has_ray_tracing = + !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING); + } + + return 0; +} + + #define GBIF_CLIENT_HALT_MASK BIT(0) #define GBIF_ARB_HALT_MASK BIT(1) #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) @@ -3094,6 +3173,14 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) return ERR_PTR(ret); } + if (adreno_is_a7xx(adreno_gpu)) { + ret = a7xx_cx_mem_init(a6xx_gpu); + if (ret) { + a6xx_destroy(&(a6xx_gpu->base.base)); + return ERR_PTR(ret); + } + } + if (gpu->aspace) msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a6xx_fault_handler); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 77526892eb8c..4180f3149dd8 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -182,6 +182,8 @@ struct adreno_gpu { */ const unsigned int *reg_offsets; bool gmu_is_wrapper; + + bool has_ray_tracing; }; #define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base)
On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to initialize cx_mem. Copy this from downstream (minus BCL which we currently don't support). On a750, this includes a new "fuse" register which can be used by qcom_scm to fuse off certain features like raytracing in software. The fuse is default off, and is initialized by calling the method. Afterwards we have to read it to find out which features were enabled. Signed-off-by: Connor Abbott <cwabbott0@gmail.com> --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 ++++++++++++++++++++++++- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + 2 files changed, 90 insertions(+), 1 deletion(-)