mbox series

[v3,0/4] Add AIE Driver

Message ID 20241225090113.17027-1-bo.kong@mediatek.com
Headers show
Series Add AIE Driver | expand

Message

bo.kong Dec. 25, 2024, 9 a.m. UTC
From: Bo Kong <Bo.Kong@mediatek.com>

AIE(AI Engine) is one of the units in mt8188 ISP which provides
hardware accelerated face detection function.

Bo Kong (4):
  arm64: dts: mt8188: add aie node
  media: dt-bindings: add MT8188 AIE
  media: mediatek: add MT8188 AIE driver
  uapi: linux: add MT8188 AIE

 .../bindings/media/mediatek,mt8188-aie.yaml   |   97 +
 arch/arm64/boot/dts/mediatek/mt8188.dtsi      |   33 +
 drivers/media/platform/mediatek/Kconfig       |    1 +
 drivers/media/platform/mediatek/Makefile      |    1 +
 drivers/media/platform/mediatek/aie/Kconfig   |   41 +
 drivers/media/platform/mediatek/aie/Makefile  |    8 +
 drivers/media/platform/mediatek/aie/mtk_aie.h |  950 +++++
 .../media/platform/mediatek/aie/mtk_aie_53.c  | 1398 +++++++
 .../media/platform/mediatek/aie/mtk_aie_drv.c | 3545 +++++++++++++++++
 include/uapi/linux/mtk_aie_v4l2_controls.h    |  132 +
 include/uapi/linux/videodev2.h                |    6 +
 11 files changed, 6212 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/media/mediatek,mt8188-aie.yaml
 create mode 100644 drivers/media/platform/mediatek/aie/Kconfig
 create mode 100644 drivers/media/platform/mediatek/aie/Makefile
 create mode 100644 drivers/media/platform/mediatek/aie/mtk_aie.h
 create mode 100644 drivers/media/platform/mediatek/aie/mtk_aie_53.c
 create mode 100644 drivers/media/platform/mediatek/aie/mtk_aie_drv.c
 create mode 100644 include/uapi/linux/mtk_aie_v4l2_controls.h

Comments

CK Hu (胡俊光) Dec. 26, 2024, 3:53 a.m. UTC | #1
On Wed, 2024-12-25 at 17:00 +0800, bo.kong wrote:
> From: Bo Kong <Bo.Kong@mediatek.com>
> 
> Add a V4L2 sub-device driver for MT8188 AIE.
> 
> Signed-off-by: Bo Kong <Bo.Kong@mediatek.com>
> ---

[snip]

> +#define FLD_PL_IN_BASE_ADDR_0_0		0x550
> +#define FLD_PL_IN_BASE_ADDR_0_1		0x554
> +#define FLD_PL_IN_BASE_ADDR_0_2		0x558
> +#define FLD_PL_IN_BASE_ADDR_0_3		0x55C
> +#define FLD_PL_IN_BASE_ADDR_0_4		0x560
> +#define FLD_PL_IN_BASE_ADDR_0_5		0x564
> +#define FLD_PL_IN_BASE_ADDR_0_6		0x568
> +#define FLD_PL_IN_BASE_ADDR_0_7		0x56C
> +#define FLD_PL_IN_BASE_ADDR_0_8		0x570
> +#define FLD_PL_IN_BASE_ADDR_0_9		0x574
> +#define FLD_PL_IN_BASE_ADDR_0_10	0x578
> +#define FLD_PL_IN_BASE_ADDR_0_11	0x57C
> +#define FLD_PL_IN_BASE_ADDR_0_12	0x580
> +#define FLD_PL_IN_BASE_ADDR_0_13	0x584
> +#define FLD_PL_IN_BASE_ADDR_0_14	0x588
> +#define FLD_PL_IN_BASE_ADDR_0_15	0x58C
> +#define FLD_PL_IN_BASE_ADDR_0_16	0x590
> +#define FLD_PL_IN_BASE_ADDR_0_17	0x594
> +#define FLD_PL_IN_BASE_ADDR_0_18	0x598
> +#define FLD_PL_IN_BASE_ADDR_0_19	0x59C
> +#define FLD_PL_IN_BASE_ADDR_0_20	0x5A0
> +#define FLD_PL_IN_BASE_ADDR_0_21	0x5A4
> +#define FLD_PL_IN_BASE_ADDR_0_22	0x5A8
> +#define FLD_PL_IN_BASE_ADDR_0_23	0x5AC
> +#define FLD_PL_IN_BASE_ADDR_0_24	0x5B0
> +#define FLD_PL_IN_BASE_ADDR_0_25	0x5B4
> +#define FLD_PL_IN_BASE_ADDR_0_26	0x5B8
> +#define FLD_PL_IN_BASE_ADDR_0_27	0x5BC
> +#define FLD_PL_IN_BASE_ADDR_0_28	0x5C0
> +#define FLD_PL_IN_BASE_ADDR_0_29	0x5C4
> +

[snip]

> +void aie_execute(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
> +{
> +	unsigned int loop_num = 0;
> +	unsigned int loop_reg_val = 0;
> +	unsigned int i = 0;
> +
> +	if (aie_cfg->sel_mode == FDMODE) {
> +		writel(0x0, fd->fd_base + AIE_START_REG);
> +		writel(0x00000111, fd->fd_base + AIE_ENABLE_REG);
> +		loop_num = FD_LOOP_NUM / 3 * (aie_cfg->number_of_pyramid);
> +		loop_reg_val = (loop_num << 8) |
> +			       (aie_cfg->number_of_pyramid - 1);
> +		writel(loop_reg_val, fd->fd_base + AIE_LOOP_REG);
> +		writel(0x1, fd->fd_base + AIE_INT_EN_REG);
> +		writel(fd->reg_cfg.rs_adr,
> +		       fd->fd_base + AIE_RS_CON_BASE_ADR_REG);
> +		writel(fd->reg_cfg.fd_adr,
> +		       fd->fd_base + AIE_FD_CON_BASE_ADR_REG);
> +		writel(fd->reg_cfg.yuv2rgb_adr,
> +		       fd->fd_base + AIE_YUV2RGB_CON_BASE_ADR_REG);
> +
> +		if (fd->variant->hw_version == 31) {
> +			writel(0x00000002,
> +			       fd->fd_base + AIE_YUV2RGB_CON_BASE_ADR_MSB);
> +			writel(0x00000002,
> +			       fd->fd_base + AIE_RS_CON_BASE_ADR_MSB);
> +			writel(0x00000002,
> +			       fd->fd_base + AIE_FD_CON_BASE_ADR_MSB);
> +		}
> +
> +		writel(0x1, fd->fd_base + AIE_START_REG);
> +	} else if (aie_cfg->sel_mode == ATTRIBUTEMODE) {
> +		writel(0x0, fd->fd_base + AIE_START_REG);
> +		writel(0x00000101, fd->fd_base + AIE_ENABLE_REG);
> +		writel(0x00001A00, fd->fd_base + AIE_LOOP_REG);
> +		writel(0x1, fd->fd_base + AIE_INT_EN_REG);
> +		writel(fd->reg_cfg.rs_adr,
> +		       fd->fd_base + AIE_RS_CON_BASE_ADR_REG);
> +		writel(fd->reg_cfg.fd_adr,
> +		       fd->fd_base + AIE_FD_CON_BASE_ADR_REG);
> +		writel(fd->reg_cfg.yuv2rgb_adr,
> +		       fd->fd_base + AIE_YUV2RGB_CON_BASE_ADR_REG);
> +
> +		if (fd->variant->hw_version == 31) {
> +			writel(0x00000002,
> +			       fd->fd_base + AIE_YUV2RGB_CON_BASE_ADR_MSB);
> +			writel(0x00000002,
> +			       fd->fd_base + AIE_RS_CON_BASE_ADR_MSB);
> +			writel(0x00000002,
> +			       fd->fd_base + AIE_FD_CON_BASE_ADR_MSB);
> +		}
> +
> +		writel(0x1, fd->fd_base + AIE_START_REG);
> +	} else if (aie_cfg->sel_mode == FLDMODE) {
> +		if (fd->variant->fld_enable) {
> +			writel(0x10, fd->fd_base + AIE_START_REG);
> +			writel(0x00011111, fd->fd_base + AIE_DMA_CTL_REG);
> +			writel(0x01111111, fd->fd_base + FLD_EN);
> +			writel(0x1, fd->fd_base + AIE_INT_EN_REG);
> +			for (i = 0; i < aie_cfg->fld_face_num; i++) {
> +				writel(aie_cfg->src_img_addr,
> +				       fd->fd_base + FLD_BASE_ADDR_FACE_0 +
> +					       i * 0x4);
> +				writel(aie_cfg->fld_input[i].fld_in_crop_x1
> +						       << 16 |
> +					       aie_cfg->fld_input[i]
> +						       .fld_in_crop_y1,
> +				       fd->fd_base + fld_face_info_0[i]);
> +				writel(aie_cfg->fld_input[i].fld_in_crop_x2
> +						       << 16 |
> +					       aie_cfg->fld_input[i]
> +						       .fld_in_crop_y2,
> +				       fd->fd_base + fld_face_info_1[i]);
> +				writel(aie_cfg->fld_input[i].fld_in_rip << 4 |
> +					       aie_cfg->fld_input[i].fld_in_rop,
> +				       fd->fd_base + fld_face_info_2[i]);
> +			}
> +
> +			writel(aie_cfg->fld_face_num << 28 | FLD_FOREST << 16 |
> +				       FLD_POINT,
> +			       fd->fd_base + FLD_MODEL_PARA1);
> +			writel(13 << 16 | 0xfe9,
> +			       fd->fd_base + FLD_MODEL_PARA14);
> +
> +			writel(aie_cfg->src_img_width << 16 |
> +				       aie_cfg->src_img_height,
> +			       fd->fd_base + FLD_SRC_WD_HT);
> +
> +			/*input settings*/
> +			writel(0x007c003f, fd->fd_base + FLD_PL_IN_SIZE_0);
> +			writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_0);
> +			writel(0x007c003f, fd->fd_base + FLD_PL_IN_SIZE_1);
> +			writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_1);
> +			writel(0x0016003f, fd->fd_base + FLD_PL_IN_SIZE_2_0);
> +			writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_2_0);
> +			writel(0x0013003f, fd->fd_base + FLD_PL_IN_SIZE_2_1);
> +			writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_2_1);
> +			writel(0x0013003f, fd->fd_base + FLD_PL_IN_SIZE_2_2);
> +			writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_2_2);
> +			writel(0x00a6001f, fd->fd_base + FLD_PL_IN_SIZE_3);
> +			writel(0x0020000f, fd->fd_base + FLD_PL_IN_STRIDE_3);
> +
> +			/*output setting*/
> +			writel((2400 * aie_cfg->fld_face_num - 1) << 16 | 127,
> +			       fd->fd_base + FLD_SH_IN_SIZE_0);
> +			writel(0x0010000f, fd->fd_base + FLD_SH_IN_STRIDE_0);
> +			writel(fd->fld_para->fld_output_pa[0],
> +			       fd->fd_base + FLD_TR_OUT_BASE_ADDR_0);
> +			writel((aie_cfg->fld_face_num - 1) << 16 | 0x6f,
> +			       fd->fd_base + FLD_TR_OUT_SIZE_0);
> +			writel(0x0070000f, fd->fd_base + FLD_TR_OUT_STRIDE_0);
> +			writel(fd->fld_para->fld_output_pa[0],
> +			       fd->fd_base + FLD_PP_OUT_BASE_ADDR_0);
> +			writel((aie_cfg->fld_face_num - 1) << 16 | 0x6f,
> +			       fd->fd_base + FLD_PP_OUT_SIZE_0);
> +			writel(0x0070000f, fd->fd_base + FLD_PP_OUT_STRIDE_0);
> +
> +			/*cv score*/
> +			writel(0x00000001, fd->fd_base + FLD_BS_BIAS);
> +			writel(0x0000b835,
> +			       fd->fd_base + FLD_CV_FM_RANGE_0); // 8E8
> +			writel(0xffff5cba,
> +			       fd->fd_base + FLD_CV_FM_RANGE_1); // 8EC
> +			writel(0x00005ed5,
> +			       fd->fd_base + FLD_CV_PM_RANGE_0); // 8F0
> +			writel(0xffff910d,
> +			       fd->fd_base + FLD_CV_PM_RANGE_1); // 8F4
> +			writel(0x0000031e, fd->fd_base + FLD_BS_RANGE_0); // 8F8
> +			writel(0xfffffcae, fd->fd_base + FLD_BS_RANGE_1); // 8FC
> +
> +			/* 6 steps */
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_BLINK][14],
> +			       fd->fd_base + FLD_BS_IN_BASE_ADDR_14);
> +
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][0],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_0);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][1],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_1);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][2],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_2);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][3],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_3);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][4],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_4);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][5],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_5);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][6],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_6);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][7],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_7);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][8],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_8);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][9],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_9);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][10],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_10);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][11],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_11);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][12],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_12);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][13],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_13);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][14],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_14);
> +
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][0],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_0);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][1],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_1);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][2],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_2);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][3],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_3);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][4],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_4);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][5],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_5);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][6],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_6);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][7],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_7);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][8],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_8);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][9],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_9);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][10],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_10);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][11],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_11);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][12],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_12);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][13],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_13);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][14],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_14);
> +
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][0],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_0);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][1],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_1);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][2],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_2);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][3],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_3);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][4],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_4);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][5],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_5);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][6],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_6);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][7],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_7);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][8],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_8);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][9],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_9);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][10],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_10);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][11],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_11);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][12],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_12);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][13],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_13);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][14],
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_14);
> +
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][0],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_0);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][1],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_1);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][2],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_2);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][3],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_3);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][4],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_4);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][5],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_5);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][6],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_6);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][7],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_7);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][8],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_8);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][9],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_9);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][10],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_10);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][11],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_11);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][12],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_12);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][13],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_13);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][14],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_14);

Use a for-loop to simplify these code:

#define FLD_PL_IN_BASE_ADDR_0_(n) (0x550 + 4 * n)

for (i = 0; i < 15; i++)
	writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][i],
	       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_(i));

Regards,
CK

> +
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][0],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_0);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][1],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_1);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][2],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_2);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][3],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_3);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][4],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_4);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][5],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_5);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][6],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_6);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][7],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_7);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][8],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_8);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][9],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_9);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][10],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_10);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][11],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_11);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][12],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_12);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][13],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_13);
> +			writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][14],
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_14);
> +
> +			/* */
> +			writel(0x22222222,
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_0_7_MSB);
> +			writel(0x02222222,
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_8_15_MSB);
> +
> +			writel(0x22222222,
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_0_7_MSB);
> +			writel(0x02222222,
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_8_15_MSB);
> +
> +			writel(0x22222222,
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_0_7_MSB);
> +			writel(0x02222222,
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_8_15_MSB);
> +
> +			writel(0x22222222,
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_0_7_MSB);
> +			writel(0x02222222,
> +			       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_8_15_MSB);
> +
> +			writel(0x22222222,
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_0_7_MSB);
> +			writel(0x02222222,
> +			       fd->fd_base + FLD_SH_IN_BASE_ADDR_8_15_MSB);
> +
> +			writel(0x02000000,
> +			       fd->fd_base + FLD_BS_IN_BASE_ADDR_8_15_MSB);
> +
> +			writel(0x22222222,
> +			       fd->fd_base + FLD_BASE_ADDR_FACE_0_7_MSB);
> +			writel(0x02222222,
> +			       fd->fd_base + FLD_BASE_ADDR_FACE_8_14_MSB);
> +			writel(0x00000002,
> +			       fd->fd_base + FLD_TR_OUT_BASE_ADDR_0_MSB);
> +			writel(0x00000002,
> +			       fd->fd_base + FLD_PP_OUT_BASE_ADDR_0_MSB);
> +
> +			/*fld mode + trigger start*/
> +			writel(0x11, fd->fd_base + AIE_START_REG);
> +		}
> +	}
> +}
> +
CK Hu (胡俊光) Dec. 26, 2024, 5:20 a.m. UTC | #2
On Wed, 2024-12-25 at 17:00 +0800, bo.kong wrote:
> From: Bo Kong <Bo.Kong@mediatek.com>
> 
> Add a V4L2 sub-device driver for MT8188 AIE.
> 
> Signed-off-by: Bo Kong <Bo.Kong@mediatek.com>
> ---

[snip]

> diff --git a/drivers/media/platform/mediatek/aie/Kconfig b/drivers/media/platform/mediatek/aie/Kconfig
> new file mode 100644
> index 000000000000..b7925cd69309
> --- /dev/null
> +++ b/drivers/media/platform/mediatek/aie/Kconfig
> @@ -0,0 +1,41 @@
> +config VIDEO_MTK_AIE
> +	tristate "MediaTek AI engine function"
> +	depends on OF
> +	select V4L2_MEM2MEM_DEV
> +	select VIDEOBUF2_DMA_CONTIG
> +	select MEDIA_CONTROLLER_REQUEST_API
> +	help
> +	  Support the AI engine (AIE) feature
> +
> +	  AIE driver is a V4L2 memory-to-memory device driver which
> +	  provides hardware accelerated face detection function,
> +	  it can detect different sizes of faces in a raw image.
> +
> +config VIDEO_MTK_AIE_RESULT_IN_KERNEL

This config is useless, so drop it.

> +	bool "Operate AIE in kernel mode"
> +	depends on VIDEO_MTK_AIE
> +	default y
> +	help
> +	  When this option is enabled, the MediaTek (MTK) AIE driver operates in
> +	  kernel mode, which is the default mode.
> +
> +	  In kernel mode, the AIE driver's results are processed directly within
> +	  the kernel space, enhancing performance and reliability.
> +
> +	  Disabling this option might compromise the AIE driver performance and stability.
> +
> +	  Unless you have specific needs for operating the driver in user mode,
> +	  for example: unit test (UT), this option should remain enabled.
> +
> +config VIDEO_MTK_AIE_RESULT_IN_USER

Ditto.

Regards,
CK

> +	bool "Operate AIE in user mode"
> +	depends on VIDEO_MTK_AIE
> +	help
> +	  Enabling this option sets the MediaTek (MTK) AIE driver to operate in
> +	  user mode.
> +
> +	  In this mode, AIE driver result values are handled at user level, providing an
> +	  organized manner to store multiple result values.
> +
> +	  Unless you understand the implications of operating in user mode,
> +	  this option is usually recommended to be disabled.
>
CK Hu (胡俊光) Dec. 26, 2024, 6:36 a.m. UTC | #3
On Wed, 2024-12-25 at 17:00 +0800, bo.kong wrote:
> From: Bo Kong <Bo.Kong@mediatek.com>
> 
> Add AIE control related definitions.
> 
> Signed-off-by: Bo Kong <Bo.Kong@mediatek.com>
> ---
> 
> Changes in v3:
> none
> 
> Changes in v2:
> 1. Fix coding style
> ---
>  include/uapi/linux/mtk_aie_v4l2_controls.h | 132 +++++++++++++++++++++
>  include/uapi/linux/videodev2.h             |   6 +
>  2 files changed, 138 insertions(+)
>  create mode 100644 include/uapi/linux/mtk_aie_v4l2_controls.h
> 
> diff --git a/include/uapi/linux/mtk_aie_v4l2_controls.h b/include/uapi/linux/mtk_aie_v4l2_controls.h
> new file mode 100644
> index 000000000000..e635548c2cdf
> --- /dev/null
> +++ b/include/uapi/linux/mtk_aie_v4l2_controls.h
> @@ -0,0 +1,132 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/*
> + * AIE Controls Header
> + *
> + * Copyright (c) 2020 MediaTek Inc.
> + * Author: Fish Wu <fish.wu@mediatek.com>
> + */
> +
> +#ifndef __MTK_AIE_V4L2_CONTROLS_H__
> +#define __MTK_AIE_V4L2_CONTROLS_H__
> +
> +#include <linux/types.h>
> +
> +/*
> + * The base for the mediatek Face Detection driver controls.
> + * We reserve 16 controls for this driver.
> + * Each CID represents different stages of AIE, with different structures and functions
> + * and cannot be reused
> + */
> +#define V4L2_CID_USER_MTK_FD_BASE (V4L2_CID_USER_BASE + 0x1fd0)
> +
> +#define V4L2_CID_MTK_AIE_INIT (V4L2_CID_USER_MTK_FD_BASE + 1)
> +#define V4L2_CID_MTK_AIE_PARAM (V4L2_CID_USER_MTK_FD_BASE + 2)
> +
> +#define V4L2_FLD_MAX_FRAME 15
> +
> +/**
> + * struct v4l2_ctrl_aie_init - aie init parameters.
> + *
> + * @max_img_width: maximum width of the source image.
> + * @max_img_height: maximum height of the source image.
> + * @pyramid_width: maximum width of the base pyramid.
> + * @pyramid_height: maximum height of the base pyramid.
> + * @feature_threshold: feature threshold for hareware.
> + */
> +struct v4l2_ctrl_aie_init {
> +	__u32 max_img_width;
> +	__u32 max_img_height;
> +	__u32 pyramid_width;
> +	__u32 pyramid_height;
> +	__s32 feature_threshold;
> +};
> +
> +/**
> + * struct v4l2_aie_roi - aie roi parameters.
> + *
> + * @x1: x1 of the roi coordinate.
> + * @y1: y1 of the roi coordinate.
> + * @x2: x2 of the roi coordinate.
> + * @y2: y2 of the roi coordinate.
> + */
> +struct v4l2_aie_roi {
> +	__u32 x1;
> +	__u32 y1;
> +	__u32 x2;
> +	__u32 y2;
> +};
> +
> +/**
> + * struct v4l2_aie_padding - aie padding parameters.
> + *
> + * @left: the size of padding left.
> + * @right: the size of padding right.
> + * @down: the size of padding below.
> + * @up: the size of padding above.
> + */
> +struct v4l2_aie_padding {
> +	__u32 left;
> +	__u32 right;
> +	__u32 down;
> +	__u32 up;
> +};
> +
> +/**
> + * struct v4l2_fld_crop_rip_rop - aie fld parameters.
> + *
> + * @fld_in_crop_x1: x1 of the crop coordinate.
> + * @fld_in_crop_y1: y1 of the crop coordinate.
> + * @fld_in_crop_x2: x2 of the crop coordinate.
> + * @fld_in_crop_y2: y2 of the crop coordinate.
> + * @fld_in_rip: fld in rip.
> + * @fld_in_rop: fld in rop.
> + */
> +struct v4l2_fld_crop_rip_rop {
> +	__u32 fld_in_crop_x1;
> +	__u32 fld_in_crop_y1;
> +	__u32 fld_in_crop_x2;
> +	__u32 fld_in_crop_y2;
> +	__u32 fld_in_rip;
> +	__u32 fld_in_rop;
> +};
> +
> +/**
> + * struct v4l2_fld_crop_rip_rop - aie fld parameters.
> + *
> + * @fd_mode: select a mode for current fd.

Where is the definition of mode?

> + * @src_img_fmt: source image format.
> + * @src_img_width: the width of the source image.
> + * @src_img_height: the height of the source image.
> + * @src_img_stride: the stride of the source image.

I'm not familiar with V4L2, but I think there is a standard interface to set source buffer format, width, height, stride.

> + * @pyramid_base_width: the width of the base pyramid.
> + * @pyramid_base_height: the width of the base pyramid.
> + * @number_of_pyramid: number of pyramid.

How does pyramid work?
Does pyramid width and height has any limitation? Larger than image width and height?
How many number of pyramid could be set?

> + * @rotate_degree: the rotate degree of the image.
> + * @en_roi: enable roi.
> + * @src_roi: roi params.

What is roi? Does roi means 'region of interest'?
If it is enable, how it work?

> + * @en_padding: enable padding.
> + * @src_padding: padding params.

In which case need padding?
For face detection, padding would not help any thing.

> + * @freq_level: frequency level.

What's freq_level?
How to assign it?

> + * @fld_face_num: the number of faces in fld.
> + * @fld_input: fld input params.

What is fld?
Why user space already know how many faces in image?
I think number of faces is detected by kernel.

> + */
> +struct v4l2_ctrl_aie_param {
> +	__u32 fd_mode;
> +	__u32 src_img_fmt;
> +	__u32 src_img_width;
> +	__u32 src_img_height;
> +	__u32 src_img_stride;
> +	__u32 pyramid_base_width;
> +	__u32 pyramid_base_height;
> +	__u32 number_of_pyramid;
> +	__u32 rotate_degree;
> +	__s32 en_roi;
> +	struct v4l2_aie_roi src_roi;
> +	__s32 en_padding;
> +	struct v4l2_aie_padding src_padding;
> +	__u32 freq_level;
> +	__u32 fld_face_num;
> +	struct v4l2_fld_crop_rip_rop fld_input[V4L2_FLD_MAX_FRAME];
> +};
> +
> +#endif /* __MTK_AIE_V4L2_CONTROLS_H__ */
> diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
> index e7c4dce39007..b23a9e99c835 100644
> --- a/include/uapi/linux/videodev2.h
> +++ b/include/uapi/linux/videodev2.h
> @@ -877,6 +877,9 @@ struct v4l2_pix_format {
>  #define V4L2_META_FMT_GENERIC_CSI2_24	v4l2_fourcc('M', 'C', '1', 'O') /* 24-bit CSI-2 packed 8-bit metadata */
>  #endif
>  
> +/* Vendor-specific definition: used for the MediaTek camera subsystem's face detection results */
> +#define V4L2_META_FMT_MTFD_RESULT v4l2_fourcc('M', 'T', 'f', 'd')
> +
>  /* priv field value to indicates that subsequent fields are valid. */
>  #define V4L2_PIX_FMT_PRIV_MAGIC		0xfeedcafe
>  
> @@ -1961,6 +1964,9 @@ enum v4l2_ctrl_type {
>  	V4L2_CTRL_TYPE_AV1_TILE_GROUP_ENTRY = 0x281,
>  	V4L2_CTRL_TYPE_AV1_FRAME	    = 0x282,
>  	V4L2_CTRL_TYPE_AV1_FILM_GRAIN	    = 0x283,
> +
> +	V4L2_CTRL_TYPE_AIE_INIT		= 0x0290,
> +	V4L2_CTRL_TYPE_AIE_PARAM	= 0x0291,
>  };
>  
>  /*  Used in the VIDIOC_QUERYCTRL ioctl for querying controls */
CK Hu (胡俊光) Dec. 26, 2024, 7:38 a.m. UTC | #4
On Wed, 2024-12-25 at 17:00 +0800, bo.kong wrote:
> From: Bo Kong <Bo.Kong@mediatek.com>
> 
> Add a V4L2 sub-device driver for MT8188 AIE.
> 
> Signed-off-by: Bo Kong <Bo.Kong@mediatek.com>
> ---

[snip]

> +struct mtk_aie_ctx {
> +	struct mtk_aie_dev *fd_dev;
> +	struct device *dev;
> +	struct v4l2_fh fh;
> +	struct v4l2_ctrl_handler hdl;
> +	struct v4l2_pix_format_mplane src_fmt;
> +	struct v4l2_meta_format dst_fmt;
> +	struct v4l2_ctrl_aie_init user_init;
> +	struct v4l2_ctrl_aie_param user_param;

struct v4l2_ctrl_aie_param is defined in future patch.
When apply this patch, it would build fail.
So reorder patch sequence to let struct v4l2_ctrl_aie_param defined first.

Regards,
CK

> +};
> +