Message ID | 20230412115652.403949-1-benjamin.gaignard@collabora.com |
---|---|
Headers | show |
Series | AV1 stateless decoder for RK3588 | expand |
Le mercredi 12 avril 2023 à 14:43 +0200, Hans Verkuil a écrit : > Hi Benjamin, > > On 12/04/2023 13:56, Benjamin Gaignard wrote: > > Add rk3588 AV1 decoder to Hantro variant. > > The hardware support image from 64x64 up to 7680x4320 > > by steps of 16 pixels. > > > > Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com> > > Nicolas reviewed this patch in v5. Is there a reason that tag was dropped, > or did you just forget? If it is the latter, then I can add it back. > > I see that NV12_10LE40_4L4 is now NV15_4L4 as agreed, and don't see any other changes. Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com> > > Just checking. > > This series now passes my tests. > > Regards, > > Hans > > > --- > > .../media/platform/verisilicon/hantro_drv.c | 1 + > > .../media/platform/verisilicon/hantro_hw.h | 6 + > > .../platform/verisilicon/rockchip_vpu_hw.c | 134 ++++++++++++++++++ > > 3 files changed, 141 insertions(+) > > > > diff --git a/drivers/media/platform/verisilicon/hantro_drv.c b/drivers/media/platform/verisilicon/hantro_drv.c > > index 71bd68e63859..aef1de20fc5e 100644 > > --- a/drivers/media/platform/verisilicon/hantro_drv.c > > +++ b/drivers/media/platform/verisilicon/hantro_drv.c > > @@ -713,6 +713,7 @@ static const struct of_device_id of_hantro_match[] = { > > { .compatible = "rockchip,rk3399-vpu", .data = &rk3399_vpu_variant, }, > > { .compatible = "rockchip,rk3568-vepu", .data = &rk3568_vepu_variant, }, > > { .compatible = "rockchip,rk3568-vpu", .data = &rk3568_vpu_variant, }, > > + { .compatible = "rockchip,rk3588-av1-vpu", .data = &rk3588_vpu981_variant, }, > > #endif > > #ifdef CONFIG_VIDEO_HANTRO_IMX8M > > { .compatible = "nxp,imx8mm-vpu-g1", .data = &imx8mm_vpu_g1_variant, }, > > diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h > > index e3d303cea7f6..7f33f7b07ce4 100644 > > --- a/drivers/media/platform/verisilicon/hantro_hw.h > > +++ b/drivers/media/platform/verisilicon/hantro_hw.h > > @@ -403,11 +403,13 @@ extern const struct hantro_variant rk3328_vpu_variant; > > extern const struct hantro_variant rk3399_vpu_variant; > > extern const struct hantro_variant rk3568_vepu_variant; > > extern const struct hantro_variant rk3568_vpu_variant; > > +extern const struct hantro_variant rk3588_vpu981_variant; > > extern const struct hantro_variant sama5d4_vdec_variant; > > extern const struct hantro_variant sunxi_vpu_variant; > > > > extern const struct hantro_postproc_ops hantro_g1_postproc_ops; > > extern const struct hantro_postproc_ops hantro_g2_postproc_ops; > > +extern const struct hantro_postproc_ops rockchip_vpu981_postproc_ops; > > > > extern const u32 hantro_vp8_dec_mc_filter[8][6]; > > > > @@ -444,6 +446,10 @@ void hantro_hevc_ref_init(struct hantro_ctx *ctx); > > dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, s32 poc); > > int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr); > > > > +int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx); > > +void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx); > > +int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx); > > +void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx); > > > > static inline unsigned short hantro_vp9_num_sbs(unsigned short dimension) > > { > > diff --git a/drivers/media/platform/verisilicon/rockchip_vpu_hw.c b/drivers/media/platform/verisilicon/rockchip_vpu_hw.c > > index 8de6fd2e8eef..816ffa905a4b 100644 > > --- a/drivers/media/platform/verisilicon/rockchip_vpu_hw.c > > +++ b/drivers/media/platform/verisilicon/rockchip_vpu_hw.c > > @@ -13,9 +13,13 @@ > > #include "hantro_g1_regs.h" > > #include "hantro_h1_regs.h" > > #include "rockchip_vpu2_regs.h" > > +#include "rockchip_vpu981_regs.h" > > > > #define RK3066_ACLK_MAX_FREQ (300 * 1000 * 1000) > > #define RK3288_ACLK_MAX_FREQ (400 * 1000 * 1000) > > +#define RK3588_ACLK_MAX_FREQ (300 * 1000 * 1000) > > + > > +#define ROCKCHIP_VPU981_MIN_SIZE 64 > > > > /* > > * Supported formats. > > @@ -74,6 +78,37 @@ static const struct hantro_fmt rockchip_vpu1_postproc_fmts[] = { > > }, > > }; > > > > +static const struct hantro_fmt rockchip_vpu981_postproc_fmts[] = { > > + { > > + .fourcc = V4L2_PIX_FMT_NV12, > > + .codec_mode = HANTRO_MODE_NONE, > > + .match_depth = true, > > + .postprocessed = true, > > + .frmsize = { > > + .min_width = ROCKCHIP_VPU981_MIN_SIZE, > > + .max_width = FMT_UHD_WIDTH, > > + .step_width = MB_DIM, > > + .min_height = ROCKCHIP_VPU981_MIN_SIZE, > > + .max_height = FMT_UHD_HEIGHT, > > + .step_height = MB_DIM, > > + }, > > + }, > > + { > > + .fourcc = V4L2_PIX_FMT_P010, > > + .codec_mode = HANTRO_MODE_NONE, > > + .match_depth = true, > > + .postprocessed = true, > > + .frmsize = { > > + .min_width = ROCKCHIP_VPU981_MIN_SIZE, > > + .max_width = FMT_UHD_WIDTH, > > + .step_width = MB_DIM, > > + .min_height = ROCKCHIP_VPU981_MIN_SIZE, > > + .max_height = FMT_UHD_HEIGHT, > > + .step_height = MB_DIM, > > + }, > > + }, > > +}; > > + > > static const struct hantro_fmt rk3066_vpu_dec_fmts[] = { > > { > > .fourcc = V4L2_PIX_FMT_NV12, > > @@ -277,6 +312,48 @@ static const struct hantro_fmt rk3399_vpu_dec_fmts[] = { > > }, > > }; > > > > +static const struct hantro_fmt rockchip_vpu981_dec_fmts[] = { > > + { > > + .fourcc = V4L2_PIX_FMT_NV12_4L4, > > + .codec_mode = HANTRO_MODE_NONE, > > + .match_depth = true, > > + .frmsize = { > > + .min_width = ROCKCHIP_VPU981_MIN_SIZE, > > + .max_width = FMT_UHD_WIDTH, > > + .step_width = MB_DIM, > > + .min_height = ROCKCHIP_VPU981_MIN_SIZE, > > + .max_height = FMT_UHD_HEIGHT, > > + .step_height = MB_DIM, > > + }, > > + }, > > + { > > + .fourcc = V4L2_PIX_FMT_NV15_4L4, > > + .codec_mode = HANTRO_MODE_NONE, > > + .match_depth = true, > > + .frmsize = { > > + .min_width = ROCKCHIP_VPU981_MIN_SIZE, > > + .max_width = FMT_UHD_WIDTH, > > + .step_width = MB_DIM, > > + .min_height = ROCKCHIP_VPU981_MIN_SIZE, > > + .max_height = FMT_UHD_HEIGHT, > > + .step_height = MB_DIM, > > + }, > > + }, > > + { > > + .fourcc = V4L2_PIX_FMT_AV1_FRAME, > > + .codec_mode = HANTRO_MODE_AV1_DEC, > > + .max_depth = 2, > > + .frmsize = { > > + .min_width = ROCKCHIP_VPU981_MIN_SIZE, > > + .max_width = FMT_UHD_WIDTH, > > + .step_width = MB_DIM, > > + .min_height = ROCKCHIP_VPU981_MIN_SIZE, > > + .max_height = FMT_UHD_HEIGHT, > > + .step_height = MB_DIM, > > + }, > > + }, > > +}; > > + > > static irqreturn_t rockchip_vpu1_vepu_irq(int irq, void *dev_id) > > { > > struct hantro_dev *vpu = dev_id; > > @@ -331,6 +408,24 @@ static irqreturn_t rockchip_vpu2_vepu_irq(int irq, void *dev_id) > > return IRQ_HANDLED; > > } > > > > +static irqreturn_t rk3588_vpu981_irq(int irq, void *dev_id) > > +{ > > + struct hantro_dev *vpu = dev_id; > > + enum vb2_buffer_state state; > > + u32 status; > > + > > + status = vdpu_read(vpu, AV1_REG_INTERRUPT); > > + state = (status & AV1_REG_INTERRUPT_DEC_RDY_INT) ? > > + VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR; > > + > > + vdpu_write(vpu, 0, AV1_REG_INTERRUPT); > > + vdpu_write(vpu, AV1_REG_CONFIG_DEC_CLK_GATE_E, AV1_REG_CONFIG); > > + > > + hantro_irq_done(vpu, state); > > + > > + return IRQ_HANDLED; > > +} > > + > > static int rk3036_vpu_hw_init(struct hantro_dev *vpu) > > { > > /* Bump ACLK to max. possible freq. to improve performance. */ > > @@ -346,6 +441,13 @@ static int rk3066_vpu_hw_init(struct hantro_dev *vpu) > > return 0; > > } > > > > +static int rk3588_vpu981_hw_init(struct hantro_dev *vpu) > > +{ > > + /* Bump ACLKs to max. possible freq. to improve performance. */ > > + clk_set_rate(vpu->clocks[0].clk, RK3588_ACLK_MAX_FREQ); > > + return 0; > > +} > > + > > static int rockchip_vpu_hw_init(struct hantro_dev *vpu) > > { > > /* Bump ACLK to max. possible freq. to improve performance. */ > > @@ -498,6 +600,14 @@ static const struct hantro_codec_ops rk3568_vepu_codec_ops[] = { > > }, > > }; > > > > +static const struct hantro_codec_ops rk3588_vpu981_codec_ops[] = { > > + [HANTRO_MODE_AV1_DEC] = { > > + .run = rockchip_vpu981_av1_dec_run, > > + .init = rockchip_vpu981_av1_dec_init, > > + .exit = rockchip_vpu981_av1_dec_exit, > > + .done = rockchip_vpu981_av1_dec_done, > > + }, > > +}; > > /* > > * VPU variant. > > */ > > @@ -529,10 +639,18 @@ static const char * const rk3066_vpu_clk_names[] = { > > "aclk_vepu", "hclk_vepu" > > }; > > > > +static const struct hantro_irq rk3588_vpu981_irqs[] = { > > + { "vdpu", rk3588_vpu981_irq }, > > +}; > > + > > static const char * const rockchip_vpu_clk_names[] = { > > "aclk", "hclk" > > }; > > > > +static const char * const rk3588_vpu981_vpu_clk_names[] = { > > + "aclk", "hclk", "aclk_vdpu_root", "hclk_vdpu_root" > > +}; > > + > > /* VDPU1/VEPU1 */ > > > > const struct hantro_variant rk3036_vpu_variant = { > > @@ -678,3 +796,19 @@ const struct hantro_variant px30_vpu_variant = { > > .clk_names = rockchip_vpu_clk_names, > > .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names) > > }; > > + > > +const struct hantro_variant rk3588_vpu981_variant = { > > + .dec_offset = 0x0, > > + .dec_fmts = rockchip_vpu981_dec_fmts, > > + .num_dec_fmts = ARRAY_SIZE(rockchip_vpu981_dec_fmts), > > + .postproc_fmts = rockchip_vpu981_postproc_fmts, > > + .num_postproc_fmts = ARRAY_SIZE(rockchip_vpu981_postproc_fmts), > > + .postproc_ops = &rockchip_vpu981_postproc_ops, > > + .codec = HANTRO_AV1_DECODER, > > + .codec_ops = rk3588_vpu981_codec_ops, > > + .irqs = rk3588_vpu981_irqs, > > + .num_irqs = ARRAY_SIZE(rk3588_vpu981_irqs), > > + .init = rk3588_vpu981_hw_init, > > + .clk_names = rk3588_vpu981_vpu_clk_names, > > + .num_clocks = ARRAY_SIZE(rk3588_vpu981_vpu_clk_names) > > +}; > >
Le 12/04/2023 à 17:50, Nicolas Dufresne a écrit : > Le mercredi 12 avril 2023 à 14:43 +0200, Hans Verkuil a écrit : >> Hi Benjamin, >> >> On 12/04/2023 13:56, Benjamin Gaignard wrote: >>> Add rk3588 AV1 decoder to Hantro variant. >>> The hardware support image from 64x64 up to 7680x4320 >>> by steps of 16 pixels. >>> >>> Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com> >> Nicolas reviewed this patch in v5. Is there a reason that tag was dropped, >> or did you just forget? If it is the latter, then I can add it back. >> >> > I see that NV12_10LE40_4L4 is now NV15_4L4 as agreed, and don't see any other > changes. I have forgot the previous tag from Nicolas, sorry. The only change in NV12_10LE40_4L4 -> NV15_4L4. Regards, Benjamin > > Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com> > >> Just checking. >> >> This series now passes my tests. >> >> Regards, >> >> Hans >> >>> --- >>> .../media/platform/verisilicon/hantro_drv.c | 1 + >>> .../media/platform/verisilicon/hantro_hw.h | 6 + >>> .../platform/verisilicon/rockchip_vpu_hw.c | 134 ++++++++++++++++++ >>> 3 files changed, 141 insertions(+) >>> >>> diff --git a/drivers/media/platform/verisilicon/hantro_drv.c b/drivers/media/platform/verisilicon/hantro_drv.c >>> index 71bd68e63859..aef1de20fc5e 100644 >>> --- a/drivers/media/platform/verisilicon/hantro_drv.c >>> +++ b/drivers/media/platform/verisilicon/hantro_drv.c >>> @@ -713,6 +713,7 @@ static const struct of_device_id of_hantro_match[] = { >>> { .compatible = "rockchip,rk3399-vpu", .data = &rk3399_vpu_variant, }, >>> { .compatible = "rockchip,rk3568-vepu", .data = &rk3568_vepu_variant, }, >>> { .compatible = "rockchip,rk3568-vpu", .data = &rk3568_vpu_variant, }, >>> + { .compatible = "rockchip,rk3588-av1-vpu", .data = &rk3588_vpu981_variant, }, >>> #endif >>> #ifdef CONFIG_VIDEO_HANTRO_IMX8M >>> { .compatible = "nxp,imx8mm-vpu-g1", .data = &imx8mm_vpu_g1_variant, }, >>> diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h >>> index e3d303cea7f6..7f33f7b07ce4 100644 >>> --- a/drivers/media/platform/verisilicon/hantro_hw.h >>> +++ b/drivers/media/platform/verisilicon/hantro_hw.h >>> @@ -403,11 +403,13 @@ extern const struct hantro_variant rk3328_vpu_variant; >>> extern const struct hantro_variant rk3399_vpu_variant; >>> extern const struct hantro_variant rk3568_vepu_variant; >>> extern const struct hantro_variant rk3568_vpu_variant; >>> +extern const struct hantro_variant rk3588_vpu981_variant; >>> extern const struct hantro_variant sama5d4_vdec_variant; >>> extern const struct hantro_variant sunxi_vpu_variant; >>> >>> extern const struct hantro_postproc_ops hantro_g1_postproc_ops; >>> extern const struct hantro_postproc_ops hantro_g2_postproc_ops; >>> +extern const struct hantro_postproc_ops rockchip_vpu981_postproc_ops; >>> >>> extern const u32 hantro_vp8_dec_mc_filter[8][6]; >>> >>> @@ -444,6 +446,10 @@ void hantro_hevc_ref_init(struct hantro_ctx *ctx); >>> dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, s32 poc); >>> int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr); >>> >>> +int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx); >>> +void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx); >>> +int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx); >>> +void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx); >>> >>> static inline unsigned short hantro_vp9_num_sbs(unsigned short dimension) >>> { >>> diff --git a/drivers/media/platform/verisilicon/rockchip_vpu_hw.c b/drivers/media/platform/verisilicon/rockchip_vpu_hw.c >>> index 8de6fd2e8eef..816ffa905a4b 100644 >>> --- a/drivers/media/platform/verisilicon/rockchip_vpu_hw.c >>> +++ b/drivers/media/platform/verisilicon/rockchip_vpu_hw.c >>> @@ -13,9 +13,13 @@ >>> #include "hantro_g1_regs.h" >>> #include "hantro_h1_regs.h" >>> #include "rockchip_vpu2_regs.h" >>> +#include "rockchip_vpu981_regs.h" >>> >>> #define RK3066_ACLK_MAX_FREQ (300 * 1000 * 1000) >>> #define RK3288_ACLK_MAX_FREQ (400 * 1000 * 1000) >>> +#define RK3588_ACLK_MAX_FREQ (300 * 1000 * 1000) >>> + >>> +#define ROCKCHIP_VPU981_MIN_SIZE 64 >>> >>> /* >>> * Supported formats. >>> @@ -74,6 +78,37 @@ static const struct hantro_fmt rockchip_vpu1_postproc_fmts[] = { >>> }, >>> }; >>> >>> +static const struct hantro_fmt rockchip_vpu981_postproc_fmts[] = { >>> + { >>> + .fourcc = V4L2_PIX_FMT_NV12, >>> + .codec_mode = HANTRO_MODE_NONE, >>> + .match_depth = true, >>> + .postprocessed = true, >>> + .frmsize = { >>> + .min_width = ROCKCHIP_VPU981_MIN_SIZE, >>> + .max_width = FMT_UHD_WIDTH, >>> + .step_width = MB_DIM, >>> + .min_height = ROCKCHIP_VPU981_MIN_SIZE, >>> + .max_height = FMT_UHD_HEIGHT, >>> + .step_height = MB_DIM, >>> + }, >>> + }, >>> + { >>> + .fourcc = V4L2_PIX_FMT_P010, >>> + .codec_mode = HANTRO_MODE_NONE, >>> + .match_depth = true, >>> + .postprocessed = true, >>> + .frmsize = { >>> + .min_width = ROCKCHIP_VPU981_MIN_SIZE, >>> + .max_width = FMT_UHD_WIDTH, >>> + .step_width = MB_DIM, >>> + .min_height = ROCKCHIP_VPU981_MIN_SIZE, >>> + .max_height = FMT_UHD_HEIGHT, >>> + .step_height = MB_DIM, >>> + }, >>> + }, >>> +}; >>> + >>> static const struct hantro_fmt rk3066_vpu_dec_fmts[] = { >>> { >>> .fourcc = V4L2_PIX_FMT_NV12, >>> @@ -277,6 +312,48 @@ static const struct hantro_fmt rk3399_vpu_dec_fmts[] = { >>> }, >>> }; >>> >>> +static const struct hantro_fmt rockchip_vpu981_dec_fmts[] = { >>> + { >>> + .fourcc = V4L2_PIX_FMT_NV12_4L4, >>> + .codec_mode = HANTRO_MODE_NONE, >>> + .match_depth = true, >>> + .frmsize = { >>> + .min_width = ROCKCHIP_VPU981_MIN_SIZE, >>> + .max_width = FMT_UHD_WIDTH, >>> + .step_width = MB_DIM, >>> + .min_height = ROCKCHIP_VPU981_MIN_SIZE, >>> + .max_height = FMT_UHD_HEIGHT, >>> + .step_height = MB_DIM, >>> + }, >>> + }, >>> + { >>> + .fourcc = V4L2_PIX_FMT_NV15_4L4, >>> + .codec_mode = HANTRO_MODE_NONE, >>> + .match_depth = true, >>> + .frmsize = { >>> + .min_width = ROCKCHIP_VPU981_MIN_SIZE, >>> + .max_width = FMT_UHD_WIDTH, >>> + .step_width = MB_DIM, >>> + .min_height = ROCKCHIP_VPU981_MIN_SIZE, >>> + .max_height = FMT_UHD_HEIGHT, >>> + .step_height = MB_DIM, >>> + }, >>> + }, >>> + { >>> + .fourcc = V4L2_PIX_FMT_AV1_FRAME, >>> + .codec_mode = HANTRO_MODE_AV1_DEC, >>> + .max_depth = 2, >>> + .frmsize = { >>> + .min_width = ROCKCHIP_VPU981_MIN_SIZE, >>> + .max_width = FMT_UHD_WIDTH, >>> + .step_width = MB_DIM, >>> + .min_height = ROCKCHIP_VPU981_MIN_SIZE, >>> + .max_height = FMT_UHD_HEIGHT, >>> + .step_height = MB_DIM, >>> + }, >>> + }, >>> +}; >>> + >>> static irqreturn_t rockchip_vpu1_vepu_irq(int irq, void *dev_id) >>> { >>> struct hantro_dev *vpu = dev_id; >>> @@ -331,6 +408,24 @@ static irqreturn_t rockchip_vpu2_vepu_irq(int irq, void *dev_id) >>> return IRQ_HANDLED; >>> } >>> >>> +static irqreturn_t rk3588_vpu981_irq(int irq, void *dev_id) >>> +{ >>> + struct hantro_dev *vpu = dev_id; >>> + enum vb2_buffer_state state; >>> + u32 status; >>> + >>> + status = vdpu_read(vpu, AV1_REG_INTERRUPT); >>> + state = (status & AV1_REG_INTERRUPT_DEC_RDY_INT) ? >>> + VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR; >>> + >>> + vdpu_write(vpu, 0, AV1_REG_INTERRUPT); >>> + vdpu_write(vpu, AV1_REG_CONFIG_DEC_CLK_GATE_E, AV1_REG_CONFIG); >>> + >>> + hantro_irq_done(vpu, state); >>> + >>> + return IRQ_HANDLED; >>> +} >>> + >>> static int rk3036_vpu_hw_init(struct hantro_dev *vpu) >>> { >>> /* Bump ACLK to max. possible freq. to improve performance. */ >>> @@ -346,6 +441,13 @@ static int rk3066_vpu_hw_init(struct hantro_dev *vpu) >>> return 0; >>> } >>> >>> +static int rk3588_vpu981_hw_init(struct hantro_dev *vpu) >>> +{ >>> + /* Bump ACLKs to max. possible freq. to improve performance. */ >>> + clk_set_rate(vpu->clocks[0].clk, RK3588_ACLK_MAX_FREQ); >>> + return 0; >>> +} >>> + >>> static int rockchip_vpu_hw_init(struct hantro_dev *vpu) >>> { >>> /* Bump ACLK to max. possible freq. to improve performance. */ >>> @@ -498,6 +600,14 @@ static const struct hantro_codec_ops rk3568_vepu_codec_ops[] = { >>> }, >>> }; >>> >>> +static const struct hantro_codec_ops rk3588_vpu981_codec_ops[] = { >>> + [HANTRO_MODE_AV1_DEC] = { >>> + .run = rockchip_vpu981_av1_dec_run, >>> + .init = rockchip_vpu981_av1_dec_init, >>> + .exit = rockchip_vpu981_av1_dec_exit, >>> + .done = rockchip_vpu981_av1_dec_done, >>> + }, >>> +}; >>> /* >>> * VPU variant. >>> */ >>> @@ -529,10 +639,18 @@ static const char * const rk3066_vpu_clk_names[] = { >>> "aclk_vepu", "hclk_vepu" >>> }; >>> >>> +static const struct hantro_irq rk3588_vpu981_irqs[] = { >>> + { "vdpu", rk3588_vpu981_irq }, >>> +}; >>> + >>> static const char * const rockchip_vpu_clk_names[] = { >>> "aclk", "hclk" >>> }; >>> >>> +static const char * const rk3588_vpu981_vpu_clk_names[] = { >>> + "aclk", "hclk", "aclk_vdpu_root", "hclk_vdpu_root" >>> +}; >>> + >>> /* VDPU1/VEPU1 */ >>> >>> const struct hantro_variant rk3036_vpu_variant = { >>> @@ -678,3 +796,19 @@ const struct hantro_variant px30_vpu_variant = { >>> .clk_names = rockchip_vpu_clk_names, >>> .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names) >>> }; >>> + >>> +const struct hantro_variant rk3588_vpu981_variant = { >>> + .dec_offset = 0x0, >>> + .dec_fmts = rockchip_vpu981_dec_fmts, >>> + .num_dec_fmts = ARRAY_SIZE(rockchip_vpu981_dec_fmts), >>> + .postproc_fmts = rockchip_vpu981_postproc_fmts, >>> + .num_postproc_fmts = ARRAY_SIZE(rockchip_vpu981_postproc_fmts), >>> + .postproc_ops = &rockchip_vpu981_postproc_ops, >>> + .codec = HANTRO_AV1_DECODER, >>> + .codec_ops = rk3588_vpu981_codec_ops, >>> + .irqs = rk3588_vpu981_irqs, >>> + .num_irqs = ARRAY_SIZE(rk3588_vpu981_irqs), >>> + .init = rk3588_vpu981_hw_init, >>> + .clk_names = rk3588_vpu981_vpu_clk_names, >>> + .num_clocks = ARRAY_SIZE(rk3588_vpu981_vpu_clk_names) >>> +}; >>
Il 12/04/23 13:56, Benjamin Gaignard ha scritto: > Compute the additional space required to store motion vectors at > the end of the frames buffers. > > Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com> > Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com> Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Il 12/04/23 13:56, Benjamin Gaignard ha scritto: > Add AV1 decoder as new decoder mode to Hantro driver. > Register needed AV1 controls for the decoder. > > Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com> > Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com> Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Il 12/04/23 13:56, Benjamin Gaignard ha scritto: > Let's the driver knows that V4L2_PIX_FMT_NV15_4L4 is a 10bits > pixel format. > > Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com> > Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com> Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Il 12/04/23 13:56, Benjamin Gaignard ha scritto: > Implement AV1 stateless decoder for rockchip VPU981. > It decode 8 and 10 bits AV1 bitstreams. > AV1 scaling feature is done by the postprocessor. > > Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com> > Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com> > --- > Changes in v6: > - Fix frame-larger-than warning. > > drivers/media/platform/verisilicon/Makefile | 1 + > .../media/platform/verisilicon/hantro_hw.h | 64 +- > .../verisilicon/rockchip_vpu981_hw_av1_dec.c | 2024 +++++++++++++++++ > .../verisilicon/rockchip_vpu981_regs.h | 477 ++++ > 4 files changed, 2564 insertions(+), 2 deletions(-) > create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c > create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_regs.h > ..snip.. > diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h > index d7641eced32e..25456fb960c8 100644 > --- a/drivers/media/platform/verisilicon/hantro_hw.h > +++ b/drivers/media/platform/verisilicon/hantro_hw.h > @@ -37,6 +37,8 @@ > > #define NUM_REF_PICTURES (V4L2_HEVC_DPB_ENTRIES_NUM_MAX + 1) > > +#define AV1_MAX_FRAME_BUF_COUNT (V4L2_AV1_TOTAL_REFS_PER_FRAME + 1) > + > struct hantro_dev; > struct hantro_ctx; > struct hantro_buf; > @@ -250,23 +252,81 @@ struct hantro_vp9_dec_hw_ctx { > }; > > /** > - * hantro_av1_dec_hw_ctx > + * struct hantro_av1_dec_ctrls > + * @sequence: AV1 Sequence > + * @tile_group_entry: AV1 Tile Group entry > + * @frame: AV1 Frame Header OBU > + * @film_grain: AV1 Film Grain > + */ > +struct hantro_av1_dec_ctrls { > + const struct v4l2_ctrl_av1_sequence *sequence; > + const struct v4l2_ctrl_av1_tile_group_entry *tile_group_entry; > + const struct v4l2_ctrl_av1_frame *frame; > + const struct v4l2_ctrl_av1_film_grain *film_grain; > +}; > + > +struct hantro_av1_frame_ref { > + int width; > + int height; > + int mi_cols; > + int mi_rows; I wonder if we can save some memory here, if w/h/mi_c/mi_r can never be negative, it would be possible to change them to a unsigned type, in which case, a u16 would probably be fine at least for width and height as I would never expect a resolution of more than 65535x65535. Even a s16 would probably be fine, anyway - but that's your call. P.S.: If this is a structure coming from the HW instead, all members shall use fixed size type! ..snip.. > diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c > new file mode 100644 > index 000000000000..b9fc70f606a3 > --- /dev/null > +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c > @@ -0,0 +1,2024 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Copyright (c) 2021, Collabora Shouldn't this be 2023, Collabora Ltd. ? :-) > + * > + * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com> > + */ > + > +#include <media/v4l2-mem2mem.h> > +#include "hantro.h" > +#include "hantro_v4l2.h" > +#include "rockchip_vpu981_regs.h" > + ..snip.. > + > +static int rockchip_vpu981_av1_tile_log2(int target) > +{ > + int k; > + > + /* > + * returns the smallest value for k such that 1 << k is greater > + * than or equal to target Spaces -> tabs here, please. > + */ > + for (k = 0; (1 << k) < target; k++); > + > + return k; > +} > + ..snip.. > +static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx) > +{ > + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec; > + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls; > + const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info; > + const struct v4l2_ctrl_av1_tile_group_entry *group_entry = > + ctrls->tile_group_entry; > + int context_update_y = > + tile_info->context_update_tile_id / tile_info->tile_cols; > + int context_update_x = > + tile_info->context_update_tile_id % tile_info->tile_cols; > + int context_update_tile_id = > + context_update_x * tile_info->tile_rows + context_update_y; > + u8 *dst = av1_dec->tile_info.cpu; > + struct hantro_dev *vpu = ctx->dev; > + int tile0, tile1; > + > + memset(dst, 0, av1_dec->tile_info.size); > + > + for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) { > + for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) { > + int tile_id = tile1 * tile_info->tile_cols + tile0; > + u32 start, end; > + u32 y0 = > + tile_info->height_in_sbs_minus_1[tile1] + 1; > + u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1; > + > + // tile size in SB units (width,height) Please be consistent with comments style. You used C-style before, so please do the same here. > + *dst++ = x0; > + *dst++ = 0; > + *dst++ = 0; > + *dst++ = 0; > + *dst++ = y0; > + *dst++ = 0; > + *dst++ = 0; > + *dst++ = 0; > + > + // tile start position > + start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset; > + *dst++ = start & 255; > + *dst++ = (start >> 8) & 255; > + *dst++ = (start >> 16) & 255; > + *dst++ = (start >> 24) & 255; > + > + // # of bytes in tile data s/#/number/g > + end = start + group_entry[tile_id].tile_size; > + *dst++ = end & 255; > + *dst++ = (end >> 8) & 255; > + *dst++ = (end >> 16) & 255; > + *dst++ = (end >> 24) & 255; > + } > + } > + > + hantro_reg_write(vpu, &av1_multicore_expect_context_update, > + !!(context_update_x == 0)); fits in one line of 96 columns > + hantro_reg_write(vpu, &av1_tile_enable, > + !!((tile_info->tile_cols > 1) || (tile_info->tile_rows > 1))); > + hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info->tile_cols); > + hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows); > + hantro_reg_write(vpu, &av1_context_update_tile_id, > + context_update_tile_id); ditto > + hantro_reg_write(vpu, &av1_tile_transpose, 1); > + if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) || > + rockchip_vpu981_av1_tile_log2(tile_info->tile_rows)) > + hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1); > + else > + hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3); > + > + hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma); > +} > + ..snip.. > +static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx) > +{ > + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec; > + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls; > + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame; > + bool frame_is_intra = IS_INTRA(frame->frame_type); > + struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu; > + int i; > + > + if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF) > + return; > + > + for (i = 0; i < NUM_REF_FRAMES; i++) { > + if (frame->refresh_frame_flags & (1 << i)) { You can use the BIT() macro here: if (frame->refresh_frame_flags & BIT(i)) { > + struct mvcdfs stored_mv_cdf; > + > + rockchip_av1_get_cdfs(ctx, i); > + stored_mv_cdf = av1_dec->cdfs->mv_cdf; > + *av1_dec->cdfs = *out_cdfs; > + if (frame_is_intra) { > + av1_dec->cdfs->mv_cdf = stored_mv_cdf; > + *av1_dec->cdfs_ndvc = out_cdfs->mv_cdf; > + } > + rockchip_av1_store_cdfs(ctx, > + frame->refresh_frame_flags); > + break; > + } > + } > +} ..snip.. > + > +static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx) > +{ > + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec; > + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls; > + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame; > + const struct v4l2_av1_cdef *cdef = &frame->cdef; > + struct hantro_dev *vpu = ctx->dev; > + u32 luma_pri_strength = 0; > + u16 luma_sec_strength = 0; > + u32 chroma_pri_strength = 0; > + u16 chroma_sec_strength = 0; > + int i; > + > + hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits); > + hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3); > + > + for (i = 0; i < (1 << cdef->bits); i++) { for (i = 0; i < BIT(cdef->bits); i++) { > + luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4); > + if (cdef->y_sec_strength[i] == 4) > + luma_sec_strength |= 3 << (i * 2); > + else > + luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2); > + > + chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4); > + if (cdef->uv_sec_strength[i] == 4) > + chroma_sec_strength |= 3 << (i * 2); > + else > + chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2); > + } > + > + hantro_reg_write(vpu, &av1_cdef_luma_primary_strength, > + luma_pri_strength); > + hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength, > + luma_sec_strength); > + hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength, > + chroma_pri_strength); > + hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength, > + chroma_sec_strength); > + > + hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma); > +} > + ..snip.. > diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h > new file mode 100644 > index 000000000000..182e6c830ff6 > --- /dev/null > +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h > @@ -0,0 +1,477 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +/* > + * Copyright (c) 2022, Collabora > + * > + * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com> > + */ > + > +#ifndef _ROCKCHIP_VPU981_REGS_H_ > +#define _ROCKCHIP_VPU981_REGS_H_ > + > +#include "hantro.h" > + > +#define AV1_SWREG(nr) ((nr) * 4) > + > +#define AV1_DEC_REG(b, s, m) \ > + ((const struct hantro_reg) { \ > + .base = AV1_SWREG(b), \ > + .shift = s, \ > + .mask = m, \ > + }) > + > +#define AV1_REG_INTERRUPT AV1_SWREG(1) > +#define AV1_REG_INTERRUPT_DEC_RDY_INT BIT(12) > + > +#define AV1_REG_CONFIG AV1_SWREG(2) > +#define AV1_REG_CONFIG_DEC_CLK_GATE_E BIT(10) > + > +#define av1_dec_e AV1_DEC_REG(1, 0, 0x1) I personally dislike definitions in lowercase, but it's like that across the entire driver and we must keep consistency, so it's fine. Regards, Angelo