diff mbox series

[v1,07/14] media: mtk-vcodec: Add msg queue feature for lat and core architecture

Message ID 20210707062157.21176-8-yunfei.dong@mediatek.com
State New
Headers show
Series Using component framework to support multi hardware decode | expand

Commit Message

Yunfei Dong July 7, 2021, 6:21 a.m. UTC
For lat and core architecture, lat thread will send message to core
thread when lat decode done. Core hardware will use the message
from lat to decode, then free message to lat thread when decode done.

Signed-off-by: Yunfei Dong <yunfei.dong@mediatek.com>
---
 drivers/media/platform/mtk-vcodec/Makefile    |   1 +
 .../platform/mtk-vcodec/mtk_vcodec_drv.h      |  15 ++
 .../platform/mtk-vcodec/vdec_msg_queue.c      | 234 ++++++++++++++++++
 .../platform/mtk-vcodec/vdec_msg_queue.h      | 130 ++++++++++
 4 files changed, 380 insertions(+)
 create mode 100644 drivers/media/platform/mtk-vcodec/vdec_msg_queue.c
 create mode 100644 drivers/media/platform/mtk-vcodec/vdec_msg_queue.h

Comments

Tzung-Bi Shih July 9, 2021, 9:39 a.m. UTC | #1
On Wed, Jul 7, 2021 at 2:22 PM Yunfei Dong <yunfei.dong@mediatek.com> wrote:
> @@ -464,6 +469,11 @@ struct mtk_vcodec_enc_pdata {

>   * comp_dev: component hardware device

>   * component_node: component node

>   * comp_idx: component index

> + *

> + * core_read: Wait queue used to signalize when core get useful lat buffer

> + * core_queue: List of V4L2 lat_buf

To be neat, replace "Wait" to "wait" and "List" to "list".

> +int vdec_msg_queue_init(

> +       struct mtk_vcodec_ctx *ctx,

> +       struct vdec_msg_queue *msg_queue,

> +       core_decode_cb_t core_decode,

> +       int private_size)

> +{

> +       struct vdec_lat_buf *lat_buf;

> +       int i, err;

> +

> +       init_waitqueue_head(&msg_queue->lat_read);

> +       INIT_LIST_HEAD(&msg_queue->lat_queue);

> +       spin_lock_init(&msg_queue->lat_lock);

> +       msg_queue->num_lat = 0;

> +

> +       msg_queue->wdma_addr.size = vde_msg_queue_get_trans_size(

> +               ctx->picinfo.buf_w, ctx->picinfo.buf_h);

> +

> +       err = mtk_vcodec_mem_alloc(ctx, &msg_queue->wdma_addr);

> +       if (err) {

> +               mtk_v4l2_err("failed to allocate wdma_addr buf");

> +               return -ENOMEM;

> +       }

> +       msg_queue->wdma_rptr_addr = msg_queue->wdma_addr.dma_addr;

> +       msg_queue->wdma_wptr_addr = msg_queue->wdma_addr.dma_addr;

> +

> +       for (i = 0; i < NUM_BUFFER_COUNT; i++) {

> +               lat_buf = &msg_queue->lat_buf[i];

> +

> +               lat_buf->wdma_err_addr.size = VDEC_ERR_MAP_SZ_AVC;

> +               err = mtk_vcodec_mem_alloc(ctx, &lat_buf->wdma_err_addr);

> +               if (err) {

> +                       mtk_v4l2_err("failed to allocate wdma_err_addr buf[%d]", i);

> +                       return -ENOMEM;

> +               }

> +

> +               lat_buf->slice_bc_addr.size = VDEC_LAT_SLICE_HEADER_SZ;

> +               err = mtk_vcodec_mem_alloc(ctx, &lat_buf->slice_bc_addr);

> +               if (err) {

> +                       mtk_v4l2_err("failed to allocate wdma_addr buf[%d]", i);

> +                       return -ENOMEM;

> +               }

> +

> +               lat_buf->private_data = kzalloc(private_size, GFP_KERNEL);

> +               if (!lat_buf->private_data) {

> +                       mtk_v4l2_err("failed to allocate private_data[%d]", i);

> +                       return -ENOMEM;

> +               }

> +

> +               lat_buf->ctx = ctx;

> +               lat_buf->core_decode = core_decode;

> +               vdec_msg_queue_buf_to_lat(lat_buf);

> +       }

Doesn't it need to call mtk_vcodec_mem_free() and kfree() for any failure paths?

> +struct vdec_lat_buf *vdec_msg_queue_get_core_buf(

> +       struct mtk_vcodec_dev *dev)

> +{

> +       struct vdec_lat_buf *buf;

> +       int ret;

> +

> +       spin_lock(&dev->core_lock);

> +       if (list_empty(&dev->core_queue)) {

> +               mtk_v4l2_debug(3, "core queue is NULL, num_core = %d", dev->num_core);

> +               spin_unlock(&dev->core_lock);

> +               ret = wait_event_freezable(dev->core_read,

> +                       !list_empty(&dev->core_queue));

> +               if (ret)

> +                       return NULL;

Should be !ret?

> +void vdec_msg_queue_buf_to_core(struct mtk_vcodec_dev *dev,

> +       struct vdec_lat_buf *buf)

> +{

> +       spin_lock(&dev->core_lock);

> +       list_add_tail(&buf->core_list, &dev->core_queue);

> +       dev->num_core++;

> +       wake_up_all(&dev->core_read);

> +       mtk_v4l2_debug(3, "queu buf addr: (0x%p)", buf);

Typo.

> +bool vdec_msg_queue_wait_lat_buf_full(struct vdec_msg_queue *msg_queue)

> +{

> +       long timeout_jiff;

> +       int ret, i;

> +

> +       for (i = 0; i < NUM_BUFFER_COUNT + 2; i++) {

> +              timeout_jiff = msecs_to_jiffies(1000);

> +              ret = wait_event_timeout(msg_queue->lat_read,

> +                    msg_queue->num_lat == NUM_BUFFER_COUNT, timeout_jiff);

> +              if (ret) {

> +                     mtk_v4l2_debug(3, "success to get lat buf: %d",

> +                            msg_queue->num_lat);

> +                     return true;

> +              }

> +       }

Why does it need the loop?  i is unused.

> +void vdec_msg_queue_deinit(

> +       struct mtk_vcodec_ctx *ctx,

> +       struct vdec_msg_queue *msg_queue)

> +{

> +       struct vdec_lat_buf *lat_buf;

> +       struct mtk_vcodec_mem *mem;

> +       int i;

> +

> +       mem = &msg_queue->wdma_addr;

> +       if (mem->va)

> +               mtk_vcodec_mem_free(ctx, mem);

> +       for (i = 0; i < NUM_BUFFER_COUNT; i++) {

> +               lat_buf = &msg_queue->lat_buf[i];

> +

> +               mem = &lat_buf->wdma_err_addr;

> +               if (mem->va)

> +                       mtk_vcodec_mem_free(ctx, mem);

> +

> +               mem = &lat_buf->slice_bc_addr;

> +               if (mem->va)

> +                       mtk_vcodec_mem_free(ctx, mem);

> +

> +               if (lat_buf->private_data)

> +                       kfree(lat_buf->private_data);

> +       }

> +

> +       msg_queue->init_done = false;

Have no idea what init_done does in the code.  It is not included in
any branch condition.

> +/**

> + * vdec_msg_queue_init - init lat buffer information.

> + * @ctx: v4l2 ctx

> + * @msg_queue: used to store the lat buffer information

> + * @core_decode: core decode callback for each codec

> + * @private_size: the private data size used to share with core

> + */

> +int vdec_msg_queue_init(

> +       struct mtk_vcodec_ctx *ctx,

> +       struct vdec_msg_queue *msg_queue,

> +       core_decode_cb_t core_decode,

> +       int private_size);

Would prefer to have *msg_queue as the first argument (also applies to
all operators of vdec_msg_queue).

> +/**

> + * vdec_msg_queue_get_core_buf - get used core buffer for lat decode.

> + * @dev: mtk vcodec device

> + */

> +struct vdec_lat_buf *vdec_msg_queue_get_core_buf(

> +       struct mtk_vcodec_dev *dev);

This is weird: vdec_msg_queue's operator but manipulating mtk_vcodec_dev?

> +

> +/**

> + * vdec_msg_queue_buf_to_core - queue buf to the core for core decode.

> + * @dev: mtk vcodec device

> + * @buf: current lat buffer

> + */

> +void vdec_msg_queue_buf_to_core(struct mtk_vcodec_dev *dev,

> +       struct vdec_lat_buf *buf);

Also weird.

> +/**

> + * vdec_msg_queue_buf_to_lat - queue buf to lat for lat decode.

> + * @buf: current lat buffer

> + */

> +void vdec_msg_queue_buf_to_lat(struct vdec_lat_buf *buf);

It should at least accept a struct vdec_msg_queue argument (or which
msg queue should the buf put into?).

> +/**

> + * vdec_msg_queue_update_ube_rptr - used to updata the ube read point.

Typo.

> +/**

> + * vdec_msg_queue_update_ube_wptr - used to updata the ube write point.

Typo.

> +/**

> + * vdec_msg_queue_deinit - deinit lat buffer information.

> + * @ctx: v4l2 ctx

> + * @msg_queue: used to store the lat buffer information

> + */

> +void vdec_msg_queue_deinit(

> +       struct mtk_vcodec_ctx *ctx,

> +       struct vdec_msg_queue *msg_queue);

Would prefer to have *msg_queue as the first argument.


The position of struct vdec_msg_queue is weird.  It looks like the msg
queue is only for struct vdec_lat_buf.  If so, would vdec_msg_queue be
better to call vdec_lat_queue or something similar?

It shouldn't touch the core queue in mtk_vcodec_dev anyway.  Is it
possible to generalize the queue-related code for both lat and core
queues?
Yunfei Dong July 12, 2021, 7:27 a.m. UTC | #2
Hi Tzung-Bi,

Thanks for your detail feedback.
I add the description according to your each comments.

On Fri, 2021-07-09 at 17:39 +0800, Tzung-Bi Shih wrote:
> On Wed, Jul 7, 2021 at 2:22 PM Yunfei Dong <yunfei.dong@mediatek.com> wrote:

> > @@ -464,6 +469,11 @@ struct mtk_vcodec_enc_pdata {

> >   * comp_dev: component hardware device

> >   * component_node: component node

> >   * comp_idx: component index

> > + *

> > + * core_read: Wait queue used to signalize when core get useful lat buffer

> > + * core_queue: List of V4L2 lat_buf

> To be neat, replace "Wait" to "wait" and "List" to "list".

Will fix.
> > +int vdec_msg_queue_init(

> > +       struct mtk_vcodec_ctx *ctx,

> > +       struct vdec_msg_queue *msg_queue,

> > +       core_decode_cb_t core_decode,

> > +       int private_size)

> > +{

> > +       struct vdec_lat_buf *lat_buf;

> > +       int i, err;

> > +

> > +       init_waitqueue_head(&msg_queue->lat_read);

> > +       INIT_LIST_HEAD(&msg_queue->lat_queue);

> > +       spin_lock_init(&msg_queue->lat_lock);

> > +       msg_queue->num_lat = 0;

> > +

> > +       msg_queue->wdma_addr.size = vde_msg_queue_get_trans_size(

> > +               ctx->picinfo.buf_w, ctx->picinfo.buf_h);

> > +

> > +       err = mtk_vcodec_mem_alloc(ctx, &msg_queue->wdma_addr);

> > +       if (err) {

> > +               mtk_v4l2_err("failed to allocate wdma_addr buf");

> > +               return -ENOMEM;

> > +       }

> > +       msg_queue->wdma_rptr_addr = msg_queue->wdma_addr.dma_addr;

> > +       msg_queue->wdma_wptr_addr = msg_queue->wdma_addr.dma_addr;

> > +

> > +       for (i = 0; i < NUM_BUFFER_COUNT; i++) {

> > +               lat_buf = &msg_queue->lat_buf[i];

> > +

> > +               lat_buf->wdma_err_addr.size = VDEC_ERR_MAP_SZ_AVC;

> > +               err = mtk_vcodec_mem_alloc(ctx, &lat_buf->wdma_err_addr);

> > +               if (err) {

> > +                       mtk_v4l2_err("failed to allocate wdma_err_addr buf[%d]", i);

> > +                       return -ENOMEM;

> > +               }

> > +

> > +               lat_buf->slice_bc_addr.size = VDEC_LAT_SLICE_HEADER_SZ;

> > +               err = mtk_vcodec_mem_alloc(ctx, &lat_buf->slice_bc_addr);

> > +               if (err) {

> > +                       mtk_v4l2_err("failed to allocate wdma_addr buf[%d]", i);

> > +                       return -ENOMEM;

> > +               }

> > +

> > +               lat_buf->private_data = kzalloc(private_size, GFP_KERNEL);

> > +               if (!lat_buf->private_data) {

> > +                       mtk_v4l2_err("failed to allocate private_data[%d]", i);

> > +                       return -ENOMEM;

> > +               }

> > +

> > +               lat_buf->ctx = ctx;

> > +               lat_buf->core_decode = core_decode;

> > +               vdec_msg_queue_buf_to_lat(lat_buf);

> > +       }

> Doesn't it need to call mtk_vcodec_mem_free() and kfree() for any failure paths?

When allocate memory fail, will call deinit function auto, then free all allocated memory.
> > +struct vdec_lat_buf *vdec_msg_queue_get_core_buf(

> > +       struct mtk_vcodec_dev *dev)

> > +{

> > +       struct vdec_lat_buf *buf;

> > +       int ret;

> > +

> > +       spin_lock(&dev->core_lock);

> > +       if (list_empty(&dev->core_queue)) {

> > +               mtk_v4l2_debug(3, "core queue is NULL, num_core = %d", dev->num_core);

> > +               spin_unlock(&dev->core_lock);

> > +               ret = wait_event_freezable(dev->core_read,

> > +                       !list_empty(&dev->core_queue));

> > +               if (ret)

> > +                       return NULL;

> Should be !ret?

According the definidtion, when condition is true, return value is 0.
#define wait_event_freezable(wq_head, condition)				\
({										\
	int __ret = 0;								\
	might_sleep();								\
	if (!(condition))							\
		__ret = __wait_event_freezable(wq_head, condition);		\
	__ret;									\
}) 
> > +void vdec_msg_queue_buf_to_core(struct mtk_vcodec_dev *dev,

> > +       struct vdec_lat_buf *buf)

> > +{

> > +       spin_lock(&dev->core_lock);

> > +       list_add_tail(&buf->core_list, &dev->core_queue);

> > +       dev->num_core++;

> > +       wake_up_all(&dev->core_read);

> > +       mtk_v4l2_debug(3, "queu buf addr: (0x%p)", buf);

> Typo.

> 

> > +bool vdec_msg_queue_wait_lat_buf_full(struct vdec_msg_queue *msg_queue)

> > +{

> > +       long timeout_jiff;

> > +       int ret, i;

> > +

> > +       for (i = 0; i < NUM_BUFFER_COUNT + 2; i++) {

> > +              timeout_jiff = msecs_to_jiffies(1000);

> > +              ret = wait_event_timeout(msg_queue->lat_read,

> > +                    msg_queue->num_lat == NUM_BUFFER_COUNT, timeout_jiff);

> > +              if (ret) {

> > +                     mtk_v4l2_debug(3, "success to get lat buf: %d",

> > +                            msg_queue->num_lat);

> > +                     return true;

> > +              }

> > +       }

> Why does it need the loop?  i is unused.

Core maybe decode timeout, need to wait all core buffer process
completely.
> > +void vdec_msg_queue_deinit(

> > +       struct mtk_vcodec_ctx *ctx,

> > +       struct vdec_msg_queue *msg_queue)

> > +{

> > +       struct vdec_lat_buf *lat_buf;

> > +       struct mtk_vcodec_mem *mem;

> > +       int i;

> > +

> > +       mem = &msg_queue->wdma_addr;

> > +       if (mem->va)

> > +               mtk_vcodec_mem_free(ctx, mem);

> > +       for (i = 0; i < NUM_BUFFER_COUNT; i++) {

> > +               lat_buf = &msg_queue->lat_buf[i];

> > +

> > +               mem = &lat_buf->wdma_err_addr;

> > +               if (mem->va)

> > +                       mtk_vcodec_mem_free(ctx, mem);

> > +

> > +               mem = &lat_buf->slice_bc_addr;

> > +               if (mem->va)

> > +                       mtk_vcodec_mem_free(ctx, mem);

> > +

> > +               if (lat_buf->private_data)

> > +                       kfree(lat_buf->private_data);

> > +       }

> > +

> > +       msg_queue->init_done = false;

> Have no idea what init_done does in the code.  It is not included in

> any branch condition.

When call vdec_msg_queue_init will set this parameter to true.
> > +/**

> > + * vdec_msg_queue_init - init lat buffer information.

> > + * @ctx: v4l2 ctx

> > + * @msg_queue: used to store the lat buffer information

> > + * @core_decode: core decode callback for each codec

> > + * @private_size: the private data size used to share with core

> > + */

> > +int vdec_msg_queue_init(

> > +       struct mtk_vcodec_ctx *ctx,

> > +       struct vdec_msg_queue *msg_queue,

> > +       core_decode_cb_t core_decode,

> > +       int private_size);

> Would prefer to have *msg_queue as the first argument (also applies to

> all operators of vdec_msg_queue).

Can fix.
> > +/**

> > + * vdec_msg_queue_get_core_buf - get used core buffer for lat decode.

> > + * @dev: mtk vcodec device

> > + */

> > +struct vdec_lat_buf *vdec_msg_queue_get_core_buf(

> > +       struct mtk_vcodec_dev *dev);

> This is weird: vdec_msg_queue's operator but manipulating mtk_vcodec_dev?

vdec_msg_queue is used to share message between lat and core, for each
instance has its lat msg queue list, but all instance share one core msg
queue list. When try to get core buffer need to get it from core queue
list. Then queue it to lat queue list when core decode done.
> > +

> > +/**

> > + * vdec_msg_queue_buf_to_core - queue buf to the core for core decode.

> > + * @dev: mtk vcodec device

> > + * @buf: current lat buffer

> > + */

> > +void vdec_msg_queue_buf_to_core(struct mtk_vcodec_dev *dev,

> > +       struct vdec_lat_buf *buf);

> Also weird.

> 

> > +/**

> > + * vdec_msg_queue_buf_to_lat - queue buf to lat for lat decode.

> > + * @buf: current lat buffer

> > + */

> > +void vdec_msg_queue_buf_to_lat(struct vdec_lat_buf *buf);

> It should at least accept a struct vdec_msg_queue argument (or which

> msg queue should the buf put into?).

All buffer is struct vdec_lat_buf, used to share info between lat and core queue list.
> > +/**

> > + * vdec_msg_queue_update_ube_rptr - used to updata the ube read point.

> Typo.

> 

> > +/**

> > + * vdec_msg_queue_update_ube_wptr - used to updata the ube write point.

> Typo.

> 

> > +/**

> > + * vdec_msg_queue_deinit - deinit lat buffer information.

> > + * @ctx: v4l2 ctx

> > + * @msg_queue: used to store the lat buffer information

> > + */

> > +void vdec_msg_queue_deinit(

> > +       struct mtk_vcodec_ctx *ctx,

> > +       struct vdec_msg_queue *msg_queue);

> Would prefer to have *msg_queue as the first argument.

Yes, can fix.
> 

> The position of struct vdec_msg_queue is weird.  It looks like the msg

> queue is only for struct vdec_lat_buf.  If so, would vdec_msg_queue be

> better to call vdec_lat_queue or something similar?

> 

> It shouldn't touch the core queue in mtk_vcodec_dev anyway.  Is it

> possible to generalize the queue-related code for both lat and core

> queues?

Lat queue list is separately for each instance, but only has one core
queue list.
diff mbox series

Patch

diff --git a/drivers/media/platform/mtk-vcodec/Makefile b/drivers/media/platform/mtk-vcodec/Makefile
index edeb3b66e9e9..5000e59da576 100644
--- a/drivers/media/platform/mtk-vcodec/Makefile
+++ b/drivers/media/platform/mtk-vcodec/Makefile
@@ -11,6 +11,7 @@  mtk-vcodec-dec-y := vdec/vdec_h264_if.o \
 		mtk_vcodec_dec_drv.o \
 		vdec_drv_if.o \
 		vdec_vpu_if.o \
+		vdec_msg_queue.o \
 		mtk_vcodec_dec.o \
 		mtk_vcodec_dec_stateful.o \
 		mtk_vcodec_dec_stateless.o \
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
index 50c87bca3973..53a3bab0da52 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
@@ -17,7 +17,9 @@ 
 #include <media/v4l2-ioctl.h>
 #include <media/v4l2-mem2mem.h>
 #include <media/videobuf2-core.h>
+
 #include "mtk_vcodec_util.h"
+#include "vdec_msg_queue.h"
 
 #define VDEC_HW_ACTIVE	0x10
 #define VDEC_IRQ_CFG	0x11
@@ -284,6 +286,8 @@  struct vdec_pic_info {
  * @decoded_frame_cnt: number of decoded frames
  * @lock: protect variables accessed by V4L2 threads and worker thread such as
  *	  mtk_video_dec_buf.
+ *
+ * @msg_queue: msg queue used to store lat buffer information.
  */
 struct mtk_vcodec_ctx {
 	enum mtk_instance_type type;
@@ -331,6 +335,7 @@  struct mtk_vcodec_ctx {
 	int decoded_frame_cnt;
 	struct mutex lock;
 
+	struct vdec_msg_queue msg_queue;
 };
 
 enum mtk_chip {
@@ -464,6 +469,11 @@  struct mtk_vcodec_enc_pdata {
  * comp_dev: component hardware device
  * component_node: component node
  * comp_idx: component index
+ *
+ * core_read: Wait queue used to signalize when core get useful lat buffer
+ * core_queue: List of V4L2 lat_buf
+ * core_lock: spin lock to protect the struct usage
+ * num_core: number of buffers ready to be processed
  */
 struct mtk_vcodec_dev {
 	struct v4l2_device v4l2_dev;
@@ -506,6 +516,11 @@  struct mtk_vcodec_dev {
 	void *comp_dev[MTK_VDEC_HW_MAX];
 	struct device_node *component_node[MTK_VDEC_HW_MAX];
 	int comp_idx;
+
+	wait_queue_head_t core_read;
+	struct list_head core_queue;
+	spinlock_t core_lock;
+	int num_core;
 };
 
 static inline struct mtk_vcodec_ctx *fh_to_ctx(struct v4l2_fh *fh)
diff --git a/drivers/media/platform/mtk-vcodec/vdec_msg_queue.c b/drivers/media/platform/mtk-vcodec/vdec_msg_queue.c
new file mode 100644
index 000000000000..9d684e5f051c
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/vdec_msg_queue.c
@@ -0,0 +1,234 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ * Author: Yunfei Dong <yunfei.dong@mediatek.com>
+ */
+
+#include <linux/freezer.h>
+#include <linux/interrupt.h>
+#include <linux/kthread.h>
+
+#include "mtk_vcodec_dec_pm.h"
+#include "mtk_vcodec_drv.h"
+#include "vdec_msg_queue.h"
+
+#define VDEC_LAT_SLICE_HEADER_SZ    (640 * 1024)
+#define VDEC_ERR_MAP_SZ_AVC         ((8192 / 16) * (4352 / 16) / 8)
+
+static int vde_msg_queue_get_trans_size(int width, int height)
+{
+	if (width > 1920 || height > 1088)
+		return (30 * 1024 * 1024);
+	else
+		return 6 * 1024 * 1024;
+}
+
+int vdec_msg_queue_init(
+	struct mtk_vcodec_ctx *ctx,
+	struct vdec_msg_queue *msg_queue,
+	core_decode_cb_t core_decode,
+	int private_size)
+{
+	struct vdec_lat_buf *lat_buf;
+	int i, err;
+
+	init_waitqueue_head(&msg_queue->lat_read);
+	INIT_LIST_HEAD(&msg_queue->lat_queue);
+	spin_lock_init(&msg_queue->lat_lock);
+	msg_queue->num_lat = 0;
+
+	msg_queue->wdma_addr.size = vde_msg_queue_get_trans_size(
+		ctx->picinfo.buf_w, ctx->picinfo.buf_h);
+
+	err = mtk_vcodec_mem_alloc(ctx, &msg_queue->wdma_addr);
+	if (err) {
+		mtk_v4l2_err("failed to allocate wdma_addr buf");
+		return -ENOMEM;
+	}
+	msg_queue->wdma_rptr_addr = msg_queue->wdma_addr.dma_addr;
+	msg_queue->wdma_wptr_addr = msg_queue->wdma_addr.dma_addr;
+
+	for (i = 0; i < NUM_BUFFER_COUNT; i++) {
+		lat_buf = &msg_queue->lat_buf[i];
+
+		lat_buf->wdma_err_addr.size = VDEC_ERR_MAP_SZ_AVC;
+		err = mtk_vcodec_mem_alloc(ctx, &lat_buf->wdma_err_addr);
+		if (err) {
+			mtk_v4l2_err("failed to allocate wdma_err_addr buf[%d]", i);
+			return -ENOMEM;
+		}
+
+		lat_buf->slice_bc_addr.size = VDEC_LAT_SLICE_HEADER_SZ;
+		err = mtk_vcodec_mem_alloc(ctx, &lat_buf->slice_bc_addr);
+		if (err) {
+			mtk_v4l2_err("failed to allocate wdma_addr buf[%d]", i);
+			return -ENOMEM;
+		}
+
+		lat_buf->private_data = kzalloc(private_size, GFP_KERNEL);
+		if (!lat_buf->private_data) {
+			mtk_v4l2_err("failed to allocate private_data[%d]", i);
+			return -ENOMEM;
+		}
+
+		lat_buf->ctx = ctx;
+		lat_buf->core_decode = core_decode;
+		vdec_msg_queue_buf_to_lat(lat_buf);
+	}
+
+	msg_queue->init_done = true;
+	return 0;
+}
+
+struct vdec_lat_buf *vdec_msg_queue_get_lat_buf(
+	struct vdec_msg_queue *msg_queue)
+{
+	struct vdec_lat_buf *buf;
+	long timeout_jiff;
+	int ret;
+
+	spin_lock(&msg_queue->lat_lock);
+	if (list_empty(&msg_queue->lat_queue)) {
+		mtk_v4l2_debug(3, "lat queue is NULL, num_lat = %d", msg_queue->num_lat);
+		spin_unlock(&msg_queue->lat_lock);
+		timeout_jiff = msecs_to_jiffies(1500);
+		ret = wait_event_timeout(msg_queue->lat_read,
+			!list_empty(&msg_queue->lat_queue), timeout_jiff);
+		if (!ret)
+			return NULL;
+		spin_lock(&msg_queue->lat_lock);
+	}
+
+	buf = list_first_entry(&msg_queue->lat_queue, struct vdec_lat_buf,
+		lat_list);
+	list_del(&buf->lat_list);
+	msg_queue->num_lat--;
+
+	mtk_v4l2_debug(4, "lat num in msg queue = %d", msg_queue->num_lat);
+	mtk_v4l2_debug(3, "get lat(0x%p) trans(0x%llx) err:0x%llx slice(0x%llx)\n",
+		buf, msg_queue->wdma_addr.dma_addr,
+		buf->wdma_err_addr.dma_addr,
+		buf->slice_bc_addr.dma_addr);
+
+	spin_unlock(&msg_queue->lat_lock);
+	return buf;
+}
+
+struct vdec_lat_buf *vdec_msg_queue_get_core_buf(
+	struct mtk_vcodec_dev *dev)
+{
+	struct vdec_lat_buf *buf;
+	int ret;
+
+	spin_lock(&dev->core_lock);
+	if (list_empty(&dev->core_queue)) {
+		mtk_v4l2_debug(3, "core queue is NULL, num_core = %d", dev->num_core);
+		spin_unlock(&dev->core_lock);
+		ret = wait_event_freezable(dev->core_read,
+			!list_empty(&dev->core_queue));
+		if (ret)
+			return NULL;
+		spin_lock(&dev->core_lock);
+	}
+
+	buf = list_first_entry(&dev->core_queue, struct vdec_lat_buf,
+		core_list);
+	mtk_v4l2_debug(3, "get core buf addr: (0x%p)", buf);
+	list_del(&buf->core_list);
+	dev->num_core--;
+	spin_unlock(&dev->core_lock);
+	return buf;
+}
+
+void vdec_msg_queue_buf_to_lat(struct vdec_lat_buf *buf)
+{
+	struct vdec_msg_queue *msg_queue = &buf->ctx->msg_queue;
+
+	spin_lock(&msg_queue->lat_lock);
+	list_add_tail(&buf->lat_list, &msg_queue->lat_queue);
+	msg_queue->num_lat++;
+	wake_up_all(&msg_queue->lat_read);
+	mtk_v4l2_debug(3, "queue buf addr: (0x%p) lat num = %d",
+		buf, msg_queue->num_lat);
+	spin_unlock(&msg_queue->lat_lock);
+}
+
+void vdec_msg_queue_buf_to_core(struct mtk_vcodec_dev *dev,
+	struct vdec_lat_buf *buf)
+{
+	spin_lock(&dev->core_lock);
+	list_add_tail(&buf->core_list, &dev->core_queue);
+	dev->num_core++;
+	wake_up_all(&dev->core_read);
+	mtk_v4l2_debug(3, "queu buf addr: (0x%p)", buf);
+	spin_unlock(&dev->core_lock);
+}
+
+void vdec_msg_queue_update_ube_rptr(struct vdec_msg_queue *msg_queue,
+	uint64_t ube_rptr)
+{
+	spin_lock(&msg_queue->lat_lock);
+	msg_queue->wdma_rptr_addr = ube_rptr;
+	mtk_v4l2_debug(3, "update ube rprt (0x%llx)", ube_rptr);
+	spin_unlock(&msg_queue->lat_lock);
+}
+
+void vdec_msg_queue_update_ube_wptr(struct vdec_msg_queue *msg_queue,
+	uint64_t ube_wptr)
+{
+	spin_lock(&msg_queue->lat_lock);
+	msg_queue->wdma_wptr_addr = ube_wptr;
+	mtk_v4l2_debug(3, "update ube wprt: (0x%llx 0x%llx) offset: 0x%llx",
+		msg_queue->wdma_rptr_addr, msg_queue->wdma_wptr_addr, ube_wptr);
+	spin_unlock(&msg_queue->lat_lock);
+}
+
+bool vdec_msg_queue_wait_lat_buf_full(struct vdec_msg_queue *msg_queue)
+{
+	long timeout_jiff;
+	int ret, i;
+
+	for (i = 0; i < NUM_BUFFER_COUNT + 2; i++) {
+		timeout_jiff = msecs_to_jiffies(1000);
+		ret = wait_event_timeout(msg_queue->lat_read,
+			msg_queue->num_lat == NUM_BUFFER_COUNT, timeout_jiff);
+		if (ret) {
+			mtk_v4l2_debug(3, "success to get lat buf: %d",
+				msg_queue->num_lat);
+			return true;
+		}
+	}
+
+	mtk_v4l2_err("failed with lat buf isn't full: %d",
+		msg_queue->num_lat);
+	return false;
+}
+
+void vdec_msg_queue_deinit(
+	struct mtk_vcodec_ctx *ctx,
+	struct vdec_msg_queue *msg_queue)
+{
+	struct vdec_lat_buf *lat_buf;
+	struct mtk_vcodec_mem *mem;
+	int i;
+
+	mem = &msg_queue->wdma_addr;
+	if (mem->va)
+		mtk_vcodec_mem_free(ctx, mem);
+	for (i = 0; i < NUM_BUFFER_COUNT; i++) {
+		lat_buf = &msg_queue->lat_buf[i];
+
+		mem = &lat_buf->wdma_err_addr;
+		if (mem->va)
+			mtk_vcodec_mem_free(ctx, mem);
+
+		mem = &lat_buf->slice_bc_addr;
+		if (mem->va)
+			mtk_vcodec_mem_free(ctx, mem);
+
+		if (lat_buf->private_data)
+			kfree(lat_buf->private_data);
+	}
+
+	msg_queue->init_done = false;
+}
diff --git a/drivers/media/platform/mtk-vcodec/vdec_msg_queue.h b/drivers/media/platform/mtk-vcodec/vdec_msg_queue.h
new file mode 100644
index 000000000000..62261702c464
--- /dev/null
+++ b/drivers/media/platform/mtk-vcodec/vdec_msg_queue.h
@@ -0,0 +1,130 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ * Author: Yunfei Dong <yunfei.dong@mediatek.com>
+ */
+
+#ifndef _VDEC_MSG_QUEUE_H_
+#define _VDEC_MSG_QUEUE_H_
+
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/slab.h>
+#include <media/videobuf2-v4l2.h>
+
+#include "mtk_vcodec_util.h"
+
+#define NUM_BUFFER_COUNT 3
+
+struct vdec_lat_buf;
+struct mtk_vcodec_ctx;
+struct mtk_vcodec_dev;
+typedef int (*core_decode_cb_t)(struct vdec_lat_buf *lat_buf);
+
+/**
+ * struct vdec_lat_buf - lat buffer message used to store lat
+ *                       info for core decode
+ */
+struct vdec_lat_buf {
+	struct mtk_vcodec_mem wdma_err_addr;
+	struct mtk_vcodec_mem slice_bc_addr;
+	struct vb2_v4l2_buffer ts_info;
+
+	void *private_data;
+	struct mtk_vcodec_ctx *ctx;
+	core_decode_cb_t core_decode;
+	struct list_head lat_list;
+	struct list_head core_list;
+};
+
+/**
+ * struct vdec_msg_queue - used to store lat buffer message
+ */
+struct vdec_msg_queue {
+	struct vdec_lat_buf lat_buf[NUM_BUFFER_COUNT];
+
+	struct mtk_vcodec_mem wdma_addr;
+	uint64_t wdma_rptr_addr;
+	uint64_t wdma_wptr_addr;
+
+	wait_queue_head_t lat_read;
+	struct list_head lat_queue;
+	spinlock_t lat_lock;
+	int num_lat;
+	bool init_done;
+};
+
+/**
+ * vdec_msg_queue_init - init lat buffer information.
+ * @ctx: v4l2 ctx
+ * @msg_queue: used to store the lat buffer information
+ * @core_decode: core decode callback for each codec
+ * @private_size: the private data size used to share with core
+ */
+int vdec_msg_queue_init(
+	struct mtk_vcodec_ctx *ctx,
+	struct vdec_msg_queue *msg_queue,
+	core_decode_cb_t core_decode,
+	int private_size);
+
+/**
+ * vdec_msg_queue_get_lat_buf - get used lat buffer for core decode.
+ * @msg_queue: used to store the lat buffer information
+ */
+struct vdec_lat_buf *vdec_msg_queue_get_lat_buf(
+	struct vdec_msg_queue *msg_queue);
+
+/**
+ * vdec_msg_queue_get_core_buf - get used core buffer for lat decode.
+ * @dev: mtk vcodec device
+ */
+struct vdec_lat_buf *vdec_msg_queue_get_core_buf(
+	struct mtk_vcodec_dev *dev);
+
+/**
+ * vdec_msg_queue_buf_to_core - queue buf to the core for core decode.
+ * @dev: mtk vcodec device
+ * @buf: current lat buffer
+ */
+void vdec_msg_queue_buf_to_core(struct mtk_vcodec_dev *dev,
+	struct vdec_lat_buf *buf);
+
+/**
+ * vdec_msg_queue_buf_to_lat - queue buf to lat for lat decode.
+ * @buf: current lat buffer
+ */
+void vdec_msg_queue_buf_to_lat(struct vdec_lat_buf *buf);
+
+/**
+ * vdec_msg_queue_update_ube_rptr - used to updata the ube read point.
+ * @msg_queue: used to store the lat buffer information
+ * @ube_rptr: current ube read point
+ */
+void vdec_msg_queue_update_ube_rptr(struct vdec_msg_queue *msg_queue,
+	uint64_t ube_rptr);
+
+/**
+ * vdec_msg_queue_update_ube_wptr - used to updata the ube write point.
+ * @msg_queue: used to store the lat buffer information
+ * @ube_wptr: current ube write point
+ */
+void vdec_msg_queue_update_ube_wptr(struct vdec_msg_queue *msg_queue,
+	uint64_t ube_wptr);
+
+/**
+ * vdec_msg_queue_wait_lat_buf_full - used to check whether all lat buffer
+ *                                    in lat list.
+ * @msg_queue: used to store the lat buffer information
+ */
+bool vdec_msg_queue_wait_lat_buf_full(struct vdec_msg_queue *msg_queue);
+
+/**
+ * vdec_msg_queue_deinit - deinit lat buffer information.
+ * @ctx: v4l2 ctx
+ * @msg_queue: used to store the lat buffer information
+ */
+void vdec_msg_queue_deinit(
+	struct mtk_vcodec_ctx *ctx,
+	struct vdec_msg_queue *msg_queue);
+
+#endif