Message ID | 20241213091030.2936864-1-ming.qian@oss.nxp.com |
---|---|
State | Superseded |
Headers | show |
Series | [v2] media: amphion: Support dmabuf and v4l2 buffer without binding | expand |
Le vendredi 13 décembre 2024 à 18:10 +0900, Ming Qian a écrit : > From: Ming Qian <ming.qian@nxp.com> > > When using VB2_DMABUF, the relationship between dma-buf and v4l2 buffer > may not one-to-one, a single dma-buf may be queued via different > v4l2 buffers, and different dma-bufs may be queued via the same > v4l2 buffer, so it's not appropriate to use the v4l2 buffer index > as the frame store id. > > We can generate a frame store id according to the dma address. > Then for a given dma-buf, the id is fixed. > > Driver now manages the frame store and vb2-buffer states independently. > > When a dmabuf is queued via another v4l2 buffer before the buffer is > released by firmware, need to pend it until firmware release it. > > Signed-off-by: Ming Qian <ming.qian@nxp.com> > --- > v2 > -- fix an uninitialized issue reported by media-ci > > drivers/media/platform/amphion/vdec.c | 232 ++++++++++++++++++---- > drivers/media/platform/amphion/vpu.h | 7 +- > drivers/media/platform/amphion/vpu_dbg.c | 15 +- > drivers/media/platform/amphion/vpu_v4l2.c | 11 + > 4 files changed, 218 insertions(+), 47 deletions(-) > > diff --git a/drivers/media/platform/amphion/vdec.c b/drivers/media/platform/amphion/vdec.c > index 61d5598ee6a1..a26cb0c264c9 100644 > --- a/drivers/media/platform/amphion/vdec.c > +++ b/drivers/media/platform/amphion/vdec.c > @@ -26,6 +26,7 @@ > #include "vpu_cmds.h" > #include "vpu_rpc.h" > > +#define VDEC_SLOT_CNT_DFT 32 > #define VDEC_MIN_BUFFER_CAP 8 > #define VDEC_MIN_BUFFER_OUT 8 > > @@ -41,6 +42,14 @@ struct vdec_fs_info { > u32 tag; > }; > > +struct vdec_frame_store_t { > + struct vpu_vb2_buffer *curr; > + struct vpu_vb2_buffer *pend; > + dma_addr_t addr; > + unsigned int state; > + u32 tag; > +}; > + > struct vdec_t { > u32 seq_hdr_found; > struct vpu_buffer udata; > @@ -48,7 +57,8 @@ struct vdec_t { > struct vpu_dec_codec_info codec_info; > enum vpu_codec_state state; > > - struct vpu_vb2_buffer *slots[VB2_MAX_FRAME]; > + struct vdec_frame_store_t *slots; > + u32 slot_count; > u32 req_frame_count; > struct vdec_fs_info mbi; > struct vdec_fs_info dcp; > @@ -289,6 +299,64 @@ static int vdec_ctrl_init(struct vpu_inst *inst) > return 0; > } > > +static void vdec_attach_frame_store(struct vpu_inst *inst, struct vb2_buffer *vb) > +{ > + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); > + struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf); > + struct vdec_t *vdec = inst->priv; > + struct vdec_frame_store_t *new_slots = NULL; > + dma_addr_t addr; > + int i; > + > + addr = vpu_get_vb_phy_addr(vb, 0); > + for (i = 0; i < vdec->slot_count; i++) { > + if (addr == vdec->slots[i].addr) { > + if (vdec->slots[i].curr && vdec->slots[i].curr != vpu_buf) { > + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED); > + vdec->slots[i].pend = vpu_buf; > + } else { > + vpu_set_buffer_state(vbuf, vdec->slots[i].state); > + } > + vpu_buf->fs_id = i; > + return; > + } > + } > + > + for (i = 0; i < vdec->slot_count; i++) { > + if (!vdec->slots[i].addr) { > + vdec->slots[i].addr = addr; > + vpu_buf->fs_id = i; > + return; > + } > + } > + > + new_slots = vzalloc(sizeof(*vdec->slots) * vdec->slot_count * 2); To avoid open coding arithmetics (see Documentation/process/deprecated.rst) you may be able to use flex_array_size(vdec, slots, vdec->slot_count * 2) > + if (!vdec->slots) { > + dev_err(inst->dev, "fail to alloc frame store\n"); > + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_ERROR); > + return; > + } > + > + memcpy(new_slots, vdec->slots, sizeof(*vdec->slots) * vdec->slot_count); > + vfree(vdec->slots); > + vdec->slots = new_slots; > + vdec->slot_count *= 2; > + > + vdec->slots[i].addr = addr; > + vpu_buf->fs_id = i; > +} > + > +static void vdec_reset_frame_store(struct vpu_inst *inst) > +{ > + struct vdec_t *vdec = inst->priv; > + > + if (!vdec->slots || !vdec->slot_count) > + return; > + > + vpu_trace(inst->dev, "inst[%d] reset slots\n", inst->id); > + memset(vdec->slots, 0, sizeof(*vdec->slots) * vdec->slot_count); Same here, flex_array_size() would calculate the size for you. > +} > + > static void vdec_handle_resolution_change(struct vpu_inst *inst) > { > struct vdec_t *vdec = inst->priv; > @@ -745,11 +813,11 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg) > struct vb2_v4l2_buffer *src_buf; > int ret = 0; > > - if (!info || info->id >= ARRAY_SIZE(vdec->slots)) > + if (!info || info->id >= vdec->slot_count) > return -EINVAL; > > vpu_inst_lock(inst); > - vpu_buf = vdec->slots[info->id]; > + vpu_buf = vdec->slots[info->id].curr; > if (!vpu_buf) { > dev_err(inst->dev, "[%d] decoded invalid frame[%d]\n", inst->id, info->id); > ret = -EINVAL; > @@ -770,11 +838,13 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg) > if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) > dev_info(inst->dev, "[%d] buf[%d] has been decoded\n", inst->id, info->id); > vpu_set_buffer_state(vbuf, VPU_BUF_STATE_DECODED); > + vdec->slots[info->id].state = VPU_BUF_STATE_DECODED; > vdec->decoded_frame_count++; > if (vdec->params.display_delay_enable) { > struct vpu_format *cur_fmt; > > cur_fmt = vpu_get_format(inst, inst->cap_format.type); > + vdec->slots[info->id].state = VPU_BUF_STATE_READY; > vpu_set_buffer_state(vbuf, VPU_BUF_STATE_READY); > for (int i = 0; i < vbuf->vb2_buf.num_planes; i++) > vb2_set_plane_payload(&vbuf->vb2_buf, > @@ -797,11 +867,11 @@ static struct vpu_vb2_buffer *vdec_find_buffer(struct vpu_inst *inst, u32 luma) > struct vdec_t *vdec = inst->priv; > int i; > > - for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) { > - if (!vdec->slots[i]) > + for (i = 0; i < vdec->slot_count; i++) { > + if (!vdec->slots[i].curr) > continue; > - if (luma == vdec->slots[i]->luma) > - return vdec->slots[i]; > + if (luma == vdec->slots[i].addr) > + return vdec->slots[i].curr; > } > > return NULL; > @@ -835,11 +905,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame) > > cur_fmt = vpu_get_format(inst, inst->cap_format.type); > vbuf = &vpu_buf->m2m_buf.vb; > - if (vbuf->vb2_buf.index != frame->id) > - dev_err(inst->dev, "[%d] buffer id(%d, %d) dismatch\n", > - inst->id, vbuf->vb2_buf.index, frame->id); > + if (vpu_buf->fs_id != frame->id) > + dev_err(inst->dev, "[%d] buffer id(%d(%d), %d) dismatch\n", > + inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index, frame->id); > > - if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_READY && vdec->params.display_delay_enable) > + if (vdec->params.display_delay_enable) > return; > > if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_DECODED) > @@ -852,10 +922,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame) > vbuf->sequence = vdec->sequence; > dev_dbg(inst->dev, "[%d][OUTPUT TS]%32lld\n", inst->id, vbuf->vb2_buf.timestamp); > > - v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE); > vpu_inst_lock(inst); > + vdec->slots[vpu_buf->fs_id].state = VPU_BUF_STATE_READY; > vdec->display_frame_count++; > vpu_inst_unlock(inst); > + v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE); > dev_dbg(inst->dev, "[%d] decoded : %d, display : %d, sequence : %d\n", > inst->id, vdec->decoded_frame_count, vdec->display_frame_count, vdec->sequence); > } > @@ -1083,18 +1154,30 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb > if (!vbuf) > return -EINVAL; > > - if (vdec->slots[vbuf->vb2_buf.index]) { > - dev_err(inst->dev, "[%d] repeat alloc fs %d\n", > - inst->id, vbuf->vb2_buf.index); > + vpu_buf = to_vpu_vb2_buffer(vbuf); > + if (vpu_buf->fs_id < 0 || vpu_buf->fs_id >= vdec->slot_count) { > + dev_err(inst->dev, "invalid fs %d for v4l2 buffer %d\n", > + vpu_buf->fs_id, vbuf->vb2_buf.index); > return -EINVAL; > } > > + if (vdec->slots[vpu_buf->fs_id].curr) { > + if (vdec->slots[vpu_buf->fs_id].curr != vpu_buf) { > + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED); > + vdec->slots[vpu_buf->fs_id].pend = vpu_buf; > + } else { > + vpu_set_buffer_state(vbuf, vdec->slots[vpu_buf->fs_id].state); > + } > + dev_err(inst->dev, "[%d] repeat alloc fs %d (v4l2 index %d)\n", > + inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index); > + return -EAGAIN; > + } > + > dev_dbg(inst->dev, "[%d] state = %s, alloc fs %d, tag = 0x%x\n", > inst->id, vpu_codec_state_name(inst->state), vbuf->vb2_buf.index, vdec->seq_tag); > - vpu_buf = to_vpu_vb2_buffer(vbuf); > > memset(&info, 0, sizeof(info)); > - info.id = vbuf->vb2_buf.index; > + info.id = vpu_buf->fs_id; > info.type = MEM_RES_FRAME; > info.tag = vdec->seq_tag; > info.luma_addr = vpu_get_vb_phy_addr(&vbuf->vb2_buf, 0); > @@ -1109,12 +1192,13 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb > if (ret) > return ret; > > - vpu_buf->tag = info.tag; > vpu_buf->luma = info.luma_addr; > vpu_buf->chroma_u = info.chroma_addr; > vpu_buf->chroma_v = 0; > vpu_set_buffer_state(vbuf, VPU_BUF_STATE_INUSE); > - vdec->slots[info.id] = vpu_buf; > + vdec->slots[info.id].tag = info.tag; > + vdec->slots[info.id].curr = vpu_buf; > + vdec->slots[info.id].state = VPU_BUF_STATE_INUSE; > vdec->req_frame_count--; > > return 0; > @@ -1175,25 +1259,47 @@ static void vdec_recycle_buffer(struct vpu_inst *inst, struct vb2_v4l2_buffer *v > v4l2_m2m_buf_queue(inst->fh.m2m_ctx, vbuf); > } > > -static void vdec_clear_slots(struct vpu_inst *inst) > +static void vdec_release_curr_frame_store(struct vpu_inst *inst, u32 id) > { > struct vdec_t *vdec = inst->priv; > struct vpu_vb2_buffer *vpu_buf; > struct vb2_v4l2_buffer *vbuf; > + > + if (id >= vdec->slot_count) > + return; > + if (!vdec->slots[id].curr) > + return; > + > + vpu_buf = vdec->slots[id].curr; > + vbuf = &vpu_buf->m2m_buf.vb; > + > + vdec_response_fs_release(inst, id, vdec->slots[id].tag); > + if (vpu_buf->fs_id == id) { > + if (vpu_buf->state != VPU_BUF_STATE_READY) > + vdec_recycle_buffer(inst, vbuf); > + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); > + } > + > + vdec->slots[id].curr = NULL; > + vdec->slots[id].state = VPU_BUF_STATE_IDLE; > + > + if (vdec->slots[id].pend) { > + vpu_set_buffer_state(&vdec->slots[id].pend->m2m_buf.vb, VPU_BUF_STATE_IDLE); > + vdec->slots[id].pend = NULL; > + } > +} > + > +static void vdec_clear_slots(struct vpu_inst *inst) > +{ > + struct vdec_t *vdec = inst->priv; > int i; > > - for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) { > - if (!vdec->slots[i]) > + for (i = 0; i < vdec->slot_count; i++) { > + if (!vdec->slots[i].curr) > continue; > > - vpu_buf = vdec->slots[i]; > - vbuf = &vpu_buf->m2m_buf.vb; > - > vpu_trace(inst->dev, "clear slot %d\n", i); > - vdec_response_fs_release(inst, i, vpu_buf->tag); > - vdec_recycle_buffer(inst, vbuf); > - vdec->slots[i]->state = VPU_BUF_STATE_IDLE; > - vdec->slots[i] = NULL; > + vdec_release_curr_frame_store(inst, i); > } > } > > @@ -1324,39 +1430,29 @@ static void vdec_event_req_fs(struct vpu_inst *inst, struct vpu_fs_info *fs) > static void vdec_evnet_rel_fs(struct vpu_inst *inst, struct vpu_fs_info *fs) > { > struct vdec_t *vdec = inst->priv; > - struct vpu_vb2_buffer *vpu_buf; > - struct vb2_v4l2_buffer *vbuf; > > - if (!fs || fs->id >= ARRAY_SIZE(vdec->slots)) > + if (!fs || fs->id >= vdec->slot_count) > return; > if (fs->type != MEM_RES_FRAME) > return; > > - if (fs->id >= vpu_get_num_buffers(inst, inst->cap_format.type)) { > + if (fs->id >= vdec->slot_count) { > dev_err(inst->dev, "[%d] invalid fs(%d) to release\n", inst->id, fs->id); > return; > } > > vpu_inst_lock(inst); > - vpu_buf = vdec->slots[fs->id]; > - vdec->slots[fs->id] = NULL; > - > - if (!vpu_buf) { > + if (!vdec->slots[fs->id].curr) { > dev_dbg(inst->dev, "[%d] fs[%d] has bee released\n", inst->id, fs->id); > goto exit; > } > > - vbuf = &vpu_buf->m2m_buf.vb; > - if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) { > + if (vdec->slots[fs->id].state == VPU_BUF_STATE_DECODED) { > dev_dbg(inst->dev, "[%d] frame skip\n", inst->id); > vdec->sequence++; > } > > - vdec_response_fs_release(inst, fs->id, vpu_buf->tag); > - if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_READY) > - vdec_recycle_buffer(inst, vbuf); > - > - vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); > + vdec_release_curr_frame_store(inst, fs->id); > vpu_process_capture_buffer(inst); > > exit: > @@ -1552,6 +1648,11 @@ static void vdec_cleanup(struct vpu_inst *inst) > return; > > vdec = inst->priv; > + if (vdec) { > + vfree(vdec->slots); > + vdec->slots = NULL; > + vdec->slot_count = 0; > + } > vfree(vdec); > inst->priv = NULL; > vfree(inst); > @@ -1683,6 +1784,38 @@ static int vdec_stop_session(struct vpu_inst *inst, u32 type) > return 0; > } > > +static int vdec_get_slot_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i) > +{ > + struct vdec_t *vdec = inst->priv; > + struct vpu_vb2_buffer *vpu_buf; > + int num = -1; > + > + vpu_inst_lock(inst); > + if (i >= vdec->slot_count || !vdec->slots[i].addr) > + goto exit; > + > + vpu_buf = vdec->slots[i].curr; > + > + num = scnprintf(str, size, "slot[%2d] :", i); > + if (vpu_buf) { > + num += scnprintf(str + num, size - num, " %2d", > + vpu_buf->m2m_buf.vb.vb2_buf.index); > + num += scnprintf(str + num, size - num, "; state = %d", vdec->slots[i].state); > + } else { > + num += scnprintf(str + num, size - num, " -1"); > + } > + > + if (vdec->slots[i].pend) > + num += scnprintf(str + num, size - num, "; %d", > + vdec->slots[i].pend->m2m_buf.vb.vb2_buf.index); > + > + num += scnprintf(str + num, size - num, "\n"); > +exit: > + vpu_inst_unlock(inst); > + > + return num; > +} > + > static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i) > { > struct vdec_t *vdec = inst->priv; > @@ -1741,6 +1874,7 @@ static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i > vdec->codec_info.vui_present); > break; > default: > + num = vdec_get_slot_debug_info(inst, str, size, i - 10); > break; > } > > @@ -1764,6 +1898,8 @@ static struct vpu_inst_ops vdec_inst_ops = { > .get_debug_info = vdec_get_debug_info, > .wait_prepare = vpu_inst_unlock, > .wait_finish = vpu_inst_lock, > + .attach_frame_store = vdec_attach_frame_store, > + .reset_frame_store = vdec_reset_frame_store, > }; > > static void vdec_init(struct file *file) > @@ -1804,6 +1940,14 @@ static int vdec_open(struct file *file) > return -ENOMEM; > } > > + vdec->slots = vzalloc(sizeof(*vdec->slots) * VDEC_SLOT_CNT_DFT); > + if (!vdec->slots) { > + vfree(vdec); > + vfree(inst); > + return -ENOMEM; > + } > + vdec->slot_count = VDEC_SLOT_CNT_DFT; > + > inst->ops = &vdec_inst_ops; > inst->formats = vdec_formats; > inst->type = VPU_CORE_TYPE_DEC; > diff --git a/drivers/media/platform/amphion/vpu.h b/drivers/media/platform/amphion/vpu.h > index 22f0da26ccec..76bfd6b26170 100644 > --- a/drivers/media/platform/amphion/vpu.h > +++ b/drivers/media/platform/amphion/vpu.h > @@ -223,6 +223,8 @@ struct vpu_inst_ops { > int (*get_debug_info)(struct vpu_inst *inst, char *str, u32 size, u32 i); > void (*wait_prepare)(struct vpu_inst *inst); > void (*wait_finish)(struct vpu_inst *inst); > + void (*attach_frame_store)(struct vpu_inst *inst, struct vb2_buffer *vb); > + void (*reset_frame_store)(struct vpu_inst *inst); > }; > > struct vpu_inst { > @@ -296,7 +298,8 @@ enum { > VPU_BUF_STATE_DECODED, > VPU_BUF_STATE_READY, > VPU_BUF_STATE_SKIP, > - VPU_BUF_STATE_ERROR > + VPU_BUF_STATE_ERROR, > + VPU_BUF_STATE_CHANGED > }; > > struct vpu_vb2_buffer { > @@ -305,8 +308,8 @@ struct vpu_vb2_buffer { > dma_addr_t chroma_u; > dma_addr_t chroma_v; > unsigned int state; > - u32 tag; > u32 average_qp; > + s32 fs_id; > }; > > void vpu_writel(struct vpu_dev *vpu, u32 reg, u32 val); > diff --git a/drivers/media/platform/amphion/vpu_dbg.c b/drivers/media/platform/amphion/vpu_dbg.c > index 940e5bda5fa3..497ae4e8a229 100644 > --- a/drivers/media/platform/amphion/vpu_dbg.c > +++ b/drivers/media/platform/amphion/vpu_dbg.c > @@ -48,6 +48,7 @@ static char *vpu_stat_name[] = { > [VPU_BUF_STATE_READY] = "ready", > [VPU_BUF_STATE_SKIP] = "skip", > [VPU_BUF_STATE_ERROR] = "error", > + [VPU_BUF_STATE_CHANGED] = "changed", > }; > > static inline const char *to_vpu_stat_name(int state) > @@ -164,6 +165,7 @@ static int vpu_dbg_instance(struct seq_file *s, void *data) > for (i = 0; i < vb2_get_num_buffers(vq); i++) { > struct vb2_buffer *vb; > struct vb2_v4l2_buffer *vbuf; > + struct vpu_vb2_buffer *vpu_buf; > > vb = vb2_get_buffer(vq, i); > if (!vb) > @@ -173,13 +175,24 @@ static int vpu_dbg_instance(struct seq_file *s, void *data) > continue; > > vbuf = to_vb2_v4l2_buffer(vb); > + vpu_buf = to_vpu_vb2_buffer(vbuf); > > num = scnprintf(str, sizeof(str), > - "capture[%2d] state = %10s, %8s\n", > + "capture[%2d] state = %10s, %8s", > i, vb2_stat_name[vb->state], > to_vpu_stat_name(vpu_get_buffer_state(vbuf))); > if (seq_write(s, str, num)) > return 0; > + > + if (vpu_buf->fs_id >= 0) { > + num = scnprintf(str, sizeof(str), "; fs %d", vpu_buf->fs_id); > + if (seq_write(s, str, num)) > + return 0; > + } > + > + num = scnprintf(str, sizeof(str), "\n"); > + if (seq_write(s, str, num)) > + return 0; > } > > num = scnprintf(str, sizeof(str), "sequence = %d\n", inst->sequence); > diff --git a/drivers/media/platform/amphion/vpu_v4l2.c b/drivers/media/platform/amphion/vpu_v4l2.c > index 45707931bc4f..74668fa362e2 100644 > --- a/drivers/media/platform/amphion/vpu_v4l2.c > +++ b/drivers/media/platform/amphion/vpu_v4l2.c > @@ -501,14 +501,25 @@ static int vpu_vb2_queue_setup(struct vb2_queue *vq, > call_void_vop(inst, release); > } > > + if (V4L2_TYPE_IS_CAPTURE(vq->type)) > + call_void_vop(inst, reset_frame_store); > + > return 0; > } > > static int vpu_vb2_buf_init(struct vb2_buffer *vb) > { > struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); > + struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf); > + struct vpu_inst *inst = vb2_get_drv_priv(vb->vb2_queue); > > + vpu_buf->fs_id = -1; > vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); > + > + if (!inst->ops->attach_frame_store || V4L2_TYPE_IS_OUTPUT(vb->type)) > + return 0; > + > + call_void_vop(inst, attach_frame_store, vb); > return 0; > } > Just a general question, was the choice for a flexible array because the IP does not provide enough per-codec information to calculate the number of needed slots or to actually avoid needing to do per codec array sizing ? Nicolas
Hi Nicolas, On 2025/1/7 5:55, Nicolas Dufresne wrote: > Le vendredi 13 décembre 2024 à 18:10 +0900, Ming Qian a écrit : >> From: Ming Qian <ming.qian@nxp.com> >> >> When using VB2_DMABUF, the relationship between dma-buf and v4l2 buffer >> may not one-to-one, a single dma-buf may be queued via different >> v4l2 buffers, and different dma-bufs may be queued via the same >> v4l2 buffer, so it's not appropriate to use the v4l2 buffer index >> as the frame store id. >> >> We can generate a frame store id according to the dma address. >> Then for a given dma-buf, the id is fixed. >> >> Driver now manages the frame store and vb2-buffer states independently. >> >> When a dmabuf is queued via another v4l2 buffer before the buffer is >> released by firmware, need to pend it until firmware release it. >> >> Signed-off-by: Ming Qian <ming.qian@nxp.com> >> --- >> v2 >> -- fix an uninitialized issue reported by media-ci >> >> drivers/media/platform/amphion/vdec.c | 232 ++++++++++++++++++---- >> drivers/media/platform/amphion/vpu.h | 7 +- >> drivers/media/platform/amphion/vpu_dbg.c | 15 +- >> drivers/media/platform/amphion/vpu_v4l2.c | 11 + >> 4 files changed, 218 insertions(+), 47 deletions(-) >> >> diff --git a/drivers/media/platform/amphion/vdec.c b/drivers/media/platform/amphion/vdec.c >> index 61d5598ee6a1..a26cb0c264c9 100644 >> --- a/drivers/media/platform/amphion/vdec.c >> +++ b/drivers/media/platform/amphion/vdec.c >> @@ -26,6 +26,7 @@ >> #include "vpu_cmds.h" >> #include "vpu_rpc.h" >> >> +#define VDEC_SLOT_CNT_DFT 32 >> #define VDEC_MIN_BUFFER_CAP 8 >> #define VDEC_MIN_BUFFER_OUT 8 >> >> @@ -41,6 +42,14 @@ struct vdec_fs_info { >> u32 tag; >> }; >> >> +struct vdec_frame_store_t { >> + struct vpu_vb2_buffer *curr; >> + struct vpu_vb2_buffer *pend; >> + dma_addr_t addr; >> + unsigned int state; >> + u32 tag; >> +}; >> + >> struct vdec_t { >> u32 seq_hdr_found; >> struct vpu_buffer udata; >> @@ -48,7 +57,8 @@ struct vdec_t { >> struct vpu_dec_codec_info codec_info; >> enum vpu_codec_state state; >> >> - struct vpu_vb2_buffer *slots[VB2_MAX_FRAME]; >> + struct vdec_frame_store_t *slots; >> + u32 slot_count; >> u32 req_frame_count; >> struct vdec_fs_info mbi; >> struct vdec_fs_info dcp; >> @@ -289,6 +299,64 @@ static int vdec_ctrl_init(struct vpu_inst *inst) >> return 0; >> } >> >> +static void vdec_attach_frame_store(struct vpu_inst *inst, struct vb2_buffer *vb) >> +{ >> + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); >> + struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf); >> + struct vdec_t *vdec = inst->priv; >> + struct vdec_frame_store_t *new_slots = NULL; >> + dma_addr_t addr; >> + int i; >> + >> + addr = vpu_get_vb_phy_addr(vb, 0); >> + for (i = 0; i < vdec->slot_count; i++) { >> + if (addr == vdec->slots[i].addr) { >> + if (vdec->slots[i].curr && vdec->slots[i].curr != vpu_buf) { >> + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED); >> + vdec->slots[i].pend = vpu_buf; >> + } else { >> + vpu_set_buffer_state(vbuf, vdec->slots[i].state); >> + } >> + vpu_buf->fs_id = i; >> + return; >> + } >> + } >> + >> + for (i = 0; i < vdec->slot_count; i++) { >> + if (!vdec->slots[i].addr) { >> + vdec->slots[i].addr = addr; >> + vpu_buf->fs_id = i; >> + return; >> + } >> + } >> + >> + new_slots = vzalloc(sizeof(*vdec->slots) * vdec->slot_count * 2); > > To avoid open coding arithmetics (see Documentation/process/deprecated.rst) you > may be able to use flex_array_size(vdec, slots, vdec->slot_count * 2) Thanks for the tip, I will fix this issue in v3 patch. > >> + if (!vdec->slots) { >> + dev_err(inst->dev, "fail to alloc frame store\n"); >> + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_ERROR); >> + return; >> + } >> + >> + memcpy(new_slots, vdec->slots, sizeof(*vdec->slots) * vdec->slot_count); >> + vfree(vdec->slots); >> + vdec->slots = new_slots; >> + vdec->slot_count *= 2; >> + >> + vdec->slots[i].addr = addr; >> + vpu_buf->fs_id = i; >> +} >> + >> +static void vdec_reset_frame_store(struct vpu_inst *inst) >> +{ >> + struct vdec_t *vdec = inst->priv; >> + >> + if (!vdec->slots || !vdec->slot_count) >> + return; >> + >> + vpu_trace(inst->dev, "inst[%d] reset slots\n", inst->id); >> + memset(vdec->slots, 0, sizeof(*vdec->slots) * vdec->slot_count); > > > Same here, flex_array_size() would calculate the size for you. Got it. > >> +} >> + >> static void vdec_handle_resolution_change(struct vpu_inst *inst) >> { >> struct vdec_t *vdec = inst->priv; >> @@ -745,11 +813,11 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg) >> struct vb2_v4l2_buffer *src_buf; >> int ret = 0; >> >> - if (!info || info->id >= ARRAY_SIZE(vdec->slots)) >> + if (!info || info->id >= vdec->slot_count) >> return -EINVAL; >> >> vpu_inst_lock(inst); >> - vpu_buf = vdec->slots[info->id]; >> + vpu_buf = vdec->slots[info->id].curr; >> if (!vpu_buf) { >> dev_err(inst->dev, "[%d] decoded invalid frame[%d]\n", inst->id, info->id); >> ret = -EINVAL; >> @@ -770,11 +838,13 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg) >> if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) >> dev_info(inst->dev, "[%d] buf[%d] has been decoded\n", inst->id, info->id); >> vpu_set_buffer_state(vbuf, VPU_BUF_STATE_DECODED); >> + vdec->slots[info->id].state = VPU_BUF_STATE_DECODED; >> vdec->decoded_frame_count++; >> if (vdec->params.display_delay_enable) { >> struct vpu_format *cur_fmt; >> >> cur_fmt = vpu_get_format(inst, inst->cap_format.type); >> + vdec->slots[info->id].state = VPU_BUF_STATE_READY; >> vpu_set_buffer_state(vbuf, VPU_BUF_STATE_READY); >> for (int i = 0; i < vbuf->vb2_buf.num_planes; i++) >> vb2_set_plane_payload(&vbuf->vb2_buf, >> @@ -797,11 +867,11 @@ static struct vpu_vb2_buffer *vdec_find_buffer(struct vpu_inst *inst, u32 luma) >> struct vdec_t *vdec = inst->priv; >> int i; >> >> - for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) { >> - if (!vdec->slots[i]) >> + for (i = 0; i < vdec->slot_count; i++) { >> + if (!vdec->slots[i].curr) >> continue; >> - if (luma == vdec->slots[i]->luma) >> - return vdec->slots[i]; >> + if (luma == vdec->slots[i].addr) >> + return vdec->slots[i].curr; >> } >> >> return NULL; >> @@ -835,11 +905,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame) >> >> cur_fmt = vpu_get_format(inst, inst->cap_format.type); >> vbuf = &vpu_buf->m2m_buf.vb; >> - if (vbuf->vb2_buf.index != frame->id) >> - dev_err(inst->dev, "[%d] buffer id(%d, %d) dismatch\n", >> - inst->id, vbuf->vb2_buf.index, frame->id); >> + if (vpu_buf->fs_id != frame->id) >> + dev_err(inst->dev, "[%d] buffer id(%d(%d), %d) dismatch\n", >> + inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index, frame->id); >> >> - if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_READY && vdec->params.display_delay_enable) >> + if (vdec->params.display_delay_enable) >> return; >> >> if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_DECODED) >> @@ -852,10 +922,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame) >> vbuf->sequence = vdec->sequence; >> dev_dbg(inst->dev, "[%d][OUTPUT TS]%32lld\n", inst->id, vbuf->vb2_buf.timestamp); >> >> - v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE); >> vpu_inst_lock(inst); >> + vdec->slots[vpu_buf->fs_id].state = VPU_BUF_STATE_READY; >> vdec->display_frame_count++; >> vpu_inst_unlock(inst); >> + v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE); >> dev_dbg(inst->dev, "[%d] decoded : %d, display : %d, sequence : %d\n", >> inst->id, vdec->decoded_frame_count, vdec->display_frame_count, vdec->sequence); >> } >> @@ -1083,18 +1154,30 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb >> if (!vbuf) >> return -EINVAL; >> >> - if (vdec->slots[vbuf->vb2_buf.index]) { >> - dev_err(inst->dev, "[%d] repeat alloc fs %d\n", >> - inst->id, vbuf->vb2_buf.index); >> + vpu_buf = to_vpu_vb2_buffer(vbuf); >> + if (vpu_buf->fs_id < 0 || vpu_buf->fs_id >= vdec->slot_count) { >> + dev_err(inst->dev, "invalid fs %d for v4l2 buffer %d\n", >> + vpu_buf->fs_id, vbuf->vb2_buf.index); >> return -EINVAL; >> } >> >> + if (vdec->slots[vpu_buf->fs_id].curr) { >> + if (vdec->slots[vpu_buf->fs_id].curr != vpu_buf) { >> + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED); >> + vdec->slots[vpu_buf->fs_id].pend = vpu_buf; >> + } else { >> + vpu_set_buffer_state(vbuf, vdec->slots[vpu_buf->fs_id].state); >> + } >> + dev_err(inst->dev, "[%d] repeat alloc fs %d (v4l2 index %d)\n", >> + inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index); >> + return -EAGAIN; >> + } >> + >> dev_dbg(inst->dev, "[%d] state = %s, alloc fs %d, tag = 0x%x\n", >> inst->id, vpu_codec_state_name(inst->state), vbuf->vb2_buf.index, vdec->seq_tag); >> - vpu_buf = to_vpu_vb2_buffer(vbuf); >> >> memset(&info, 0, sizeof(info)); >> - info.id = vbuf->vb2_buf.index; >> + info.id = vpu_buf->fs_id; >> info.type = MEM_RES_FRAME; >> info.tag = vdec->seq_tag; >> info.luma_addr = vpu_get_vb_phy_addr(&vbuf->vb2_buf, 0); >> @@ -1109,12 +1192,13 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb >> if (ret) >> return ret; >> >> - vpu_buf->tag = info.tag; >> vpu_buf->luma = info.luma_addr; >> vpu_buf->chroma_u = info.chroma_addr; >> vpu_buf->chroma_v = 0; >> vpu_set_buffer_state(vbuf, VPU_BUF_STATE_INUSE); >> - vdec->slots[info.id] = vpu_buf; >> + vdec->slots[info.id].tag = info.tag; >> + vdec->slots[info.id].curr = vpu_buf; >> + vdec->slots[info.id].state = VPU_BUF_STATE_INUSE; >> vdec->req_frame_count--; >> >> return 0; >> @@ -1175,25 +1259,47 @@ static void vdec_recycle_buffer(struct vpu_inst *inst, struct vb2_v4l2_buffer *v >> v4l2_m2m_buf_queue(inst->fh.m2m_ctx, vbuf); >> } >> >> -static void vdec_clear_slots(struct vpu_inst *inst) >> +static void vdec_release_curr_frame_store(struct vpu_inst *inst, u32 id) >> { >> struct vdec_t *vdec = inst->priv; >> struct vpu_vb2_buffer *vpu_buf; >> struct vb2_v4l2_buffer *vbuf; >> + >> + if (id >= vdec->slot_count) >> + return; >> + if (!vdec->slots[id].curr) >> + return; >> + >> + vpu_buf = vdec->slots[id].curr; >> + vbuf = &vpu_buf->m2m_buf.vb; >> + >> + vdec_response_fs_release(inst, id, vdec->slots[id].tag); >> + if (vpu_buf->fs_id == id) { >> + if (vpu_buf->state != VPU_BUF_STATE_READY) >> + vdec_recycle_buffer(inst, vbuf); >> + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); >> + } >> + >> + vdec->slots[id].curr = NULL; >> + vdec->slots[id].state = VPU_BUF_STATE_IDLE; >> + >> + if (vdec->slots[id].pend) { >> + vpu_set_buffer_state(&vdec->slots[id].pend->m2m_buf.vb, VPU_BUF_STATE_IDLE); >> + vdec->slots[id].pend = NULL; >> + } >> +} >> + >> +static void vdec_clear_slots(struct vpu_inst *inst) >> +{ >> + struct vdec_t *vdec = inst->priv; >> int i; >> >> - for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) { >> - if (!vdec->slots[i]) >> + for (i = 0; i < vdec->slot_count; i++) { >> + if (!vdec->slots[i].curr) >> continue; >> >> - vpu_buf = vdec->slots[i]; >> - vbuf = &vpu_buf->m2m_buf.vb; >> - >> vpu_trace(inst->dev, "clear slot %d\n", i); >> - vdec_response_fs_release(inst, i, vpu_buf->tag); >> - vdec_recycle_buffer(inst, vbuf); >> - vdec->slots[i]->state = VPU_BUF_STATE_IDLE; >> - vdec->slots[i] = NULL; >> + vdec_release_curr_frame_store(inst, i); >> } >> } >> >> @@ -1324,39 +1430,29 @@ static void vdec_event_req_fs(struct vpu_inst *inst, struct vpu_fs_info *fs) >> static void vdec_evnet_rel_fs(struct vpu_inst *inst, struct vpu_fs_info *fs) >> { >> struct vdec_t *vdec = inst->priv; >> - struct vpu_vb2_buffer *vpu_buf; >> - struct vb2_v4l2_buffer *vbuf; >> >> - if (!fs || fs->id >= ARRAY_SIZE(vdec->slots)) >> + if (!fs || fs->id >= vdec->slot_count) >> return; >> if (fs->type != MEM_RES_FRAME) >> return; >> >> - if (fs->id >= vpu_get_num_buffers(inst, inst->cap_format.type)) { >> + if (fs->id >= vdec->slot_count) { >> dev_err(inst->dev, "[%d] invalid fs(%d) to release\n", inst->id, fs->id); >> return; >> } >> >> vpu_inst_lock(inst); >> - vpu_buf = vdec->slots[fs->id]; >> - vdec->slots[fs->id] = NULL; >> - >> - if (!vpu_buf) { >> + if (!vdec->slots[fs->id].curr) { >> dev_dbg(inst->dev, "[%d] fs[%d] has bee released\n", inst->id, fs->id); >> goto exit; >> } >> >> - vbuf = &vpu_buf->m2m_buf.vb; >> - if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) { >> + if (vdec->slots[fs->id].state == VPU_BUF_STATE_DECODED) { >> dev_dbg(inst->dev, "[%d] frame skip\n", inst->id); >> vdec->sequence++; >> } >> >> - vdec_response_fs_release(inst, fs->id, vpu_buf->tag); >> - if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_READY) >> - vdec_recycle_buffer(inst, vbuf); >> - >> - vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); >> + vdec_release_curr_frame_store(inst, fs->id); >> vpu_process_capture_buffer(inst); >> >> exit: >> @@ -1552,6 +1648,11 @@ static void vdec_cleanup(struct vpu_inst *inst) >> return; >> >> vdec = inst->priv; >> + if (vdec) { >> + vfree(vdec->slots); >> + vdec->slots = NULL; >> + vdec->slot_count = 0; >> + } >> vfree(vdec); >> inst->priv = NULL; >> vfree(inst); >> @@ -1683,6 +1784,38 @@ static int vdec_stop_session(struct vpu_inst *inst, u32 type) >> return 0; >> } >> >> +static int vdec_get_slot_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i) >> +{ >> + struct vdec_t *vdec = inst->priv; >> + struct vpu_vb2_buffer *vpu_buf; >> + int num = -1; >> + >> + vpu_inst_lock(inst); >> + if (i >= vdec->slot_count || !vdec->slots[i].addr) >> + goto exit; >> + >> + vpu_buf = vdec->slots[i].curr; >> + >> + num = scnprintf(str, size, "slot[%2d] :", i); >> + if (vpu_buf) { >> + num += scnprintf(str + num, size - num, " %2d", >> + vpu_buf->m2m_buf.vb.vb2_buf.index); >> + num += scnprintf(str + num, size - num, "; state = %d", vdec->slots[i].state); >> + } else { >> + num += scnprintf(str + num, size - num, " -1"); >> + } >> + >> + if (vdec->slots[i].pend) >> + num += scnprintf(str + num, size - num, "; %d", >> + vdec->slots[i].pend->m2m_buf.vb.vb2_buf.index); >> + >> + num += scnprintf(str + num, size - num, "\n"); >> +exit: >> + vpu_inst_unlock(inst); >> + >> + return num; >> +} >> + >> static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i) >> { >> struct vdec_t *vdec = inst->priv; >> @@ -1741,6 +1874,7 @@ static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i >> vdec->codec_info.vui_present); >> break; >> default: >> + num = vdec_get_slot_debug_info(inst, str, size, i - 10); >> break; >> } >> >> @@ -1764,6 +1898,8 @@ static struct vpu_inst_ops vdec_inst_ops = { >> .get_debug_info = vdec_get_debug_info, >> .wait_prepare = vpu_inst_unlock, >> .wait_finish = vpu_inst_lock, >> + .attach_frame_store = vdec_attach_frame_store, >> + .reset_frame_store = vdec_reset_frame_store, >> }; >> >> static void vdec_init(struct file *file) >> @@ -1804,6 +1940,14 @@ static int vdec_open(struct file *file) >> return -ENOMEM; >> } >> >> + vdec->slots = vzalloc(sizeof(*vdec->slots) * VDEC_SLOT_CNT_DFT); >> + if (!vdec->slots) { >> + vfree(vdec); >> + vfree(inst); >> + return -ENOMEM; >> + } >> + vdec->slot_count = VDEC_SLOT_CNT_DFT; >> + >> inst->ops = &vdec_inst_ops; >> inst->formats = vdec_formats; >> inst->type = VPU_CORE_TYPE_DEC; >> diff --git a/drivers/media/platform/amphion/vpu.h b/drivers/media/platform/amphion/vpu.h >> index 22f0da26ccec..76bfd6b26170 100644 >> --- a/drivers/media/platform/amphion/vpu.h >> +++ b/drivers/media/platform/amphion/vpu.h >> @@ -223,6 +223,8 @@ struct vpu_inst_ops { >> int (*get_debug_info)(struct vpu_inst *inst, char *str, u32 size, u32 i); >> void (*wait_prepare)(struct vpu_inst *inst); >> void (*wait_finish)(struct vpu_inst *inst); >> + void (*attach_frame_store)(struct vpu_inst *inst, struct vb2_buffer *vb); >> + void (*reset_frame_store)(struct vpu_inst *inst); >> }; >> >> struct vpu_inst { >> @@ -296,7 +298,8 @@ enum { >> VPU_BUF_STATE_DECODED, >> VPU_BUF_STATE_READY, >> VPU_BUF_STATE_SKIP, >> - VPU_BUF_STATE_ERROR >> + VPU_BUF_STATE_ERROR, >> + VPU_BUF_STATE_CHANGED >> }; >> >> struct vpu_vb2_buffer { >> @@ -305,8 +308,8 @@ struct vpu_vb2_buffer { >> dma_addr_t chroma_u; >> dma_addr_t chroma_v; >> unsigned int state; >> - u32 tag; >> u32 average_qp; >> + s32 fs_id; >> }; >> >> void vpu_writel(struct vpu_dev *vpu, u32 reg, u32 val); >> diff --git a/drivers/media/platform/amphion/vpu_dbg.c b/drivers/media/platform/amphion/vpu_dbg.c >> index 940e5bda5fa3..497ae4e8a229 100644 >> --- a/drivers/media/platform/amphion/vpu_dbg.c >> +++ b/drivers/media/platform/amphion/vpu_dbg.c >> @@ -48,6 +48,7 @@ static char *vpu_stat_name[] = { >> [VPU_BUF_STATE_READY] = "ready", >> [VPU_BUF_STATE_SKIP] = "skip", >> [VPU_BUF_STATE_ERROR] = "error", >> + [VPU_BUF_STATE_CHANGED] = "changed", >> }; >> >> static inline const char *to_vpu_stat_name(int state) >> @@ -164,6 +165,7 @@ static int vpu_dbg_instance(struct seq_file *s, void *data) >> for (i = 0; i < vb2_get_num_buffers(vq); i++) { >> struct vb2_buffer *vb; >> struct vb2_v4l2_buffer *vbuf; >> + struct vpu_vb2_buffer *vpu_buf; >> >> vb = vb2_get_buffer(vq, i); >> if (!vb) >> @@ -173,13 +175,24 @@ static int vpu_dbg_instance(struct seq_file *s, void *data) >> continue; >> >> vbuf = to_vb2_v4l2_buffer(vb); >> + vpu_buf = to_vpu_vb2_buffer(vbuf); >> >> num = scnprintf(str, sizeof(str), >> - "capture[%2d] state = %10s, %8s\n", >> + "capture[%2d] state = %10s, %8s", >> i, vb2_stat_name[vb->state], >> to_vpu_stat_name(vpu_get_buffer_state(vbuf))); >> if (seq_write(s, str, num)) >> return 0; >> + >> + if (vpu_buf->fs_id >= 0) { >> + num = scnprintf(str, sizeof(str), "; fs %d", vpu_buf->fs_id); >> + if (seq_write(s, str, num)) >> + return 0; >> + } >> + >> + num = scnprintf(str, sizeof(str), "\n"); >> + if (seq_write(s, str, num)) >> + return 0; >> } >> >> num = scnprintf(str, sizeof(str), "sequence = %d\n", inst->sequence); >> diff --git a/drivers/media/platform/amphion/vpu_v4l2.c b/drivers/media/platform/amphion/vpu_v4l2.c >> index 45707931bc4f..74668fa362e2 100644 >> --- a/drivers/media/platform/amphion/vpu_v4l2.c >> +++ b/drivers/media/platform/amphion/vpu_v4l2.c >> @@ -501,14 +501,25 @@ static int vpu_vb2_queue_setup(struct vb2_queue *vq, >> call_void_vop(inst, release); >> } >> >> + if (V4L2_TYPE_IS_CAPTURE(vq->type)) >> + call_void_vop(inst, reset_frame_store); >> + >> return 0; >> } >> >> static int vpu_vb2_buf_init(struct vb2_buffer *vb) >> { >> struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); >> + struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf); >> + struct vpu_inst *inst = vb2_get_drv_priv(vb->vb2_queue); >> >> + vpu_buf->fs_id = -1; >> vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); >> + >> + if (!inst->ops->attach_frame_store || V4L2_TYPE_IS_OUTPUT(vb->type)) >> + return 0; >> + >> + call_void_vop(inst, attach_frame_store, vb); >> return 0; >> } >> > > Just a general question, was the choice for a flexible array because the IP does > not provide enough per-codec information to calculate the number of needed slots > or to actually avoid needing to do per codec array sizing ? > > Nicolas > The decoder only requires the minimum number of frame buffers, it doesn't constrain the maximum value. And the frame buffer is managed by fs_id, so we make dma address of frame buffer have a one-to-one correspondence with the fs_id. Here I use the slot index as the fs_id. I guess I can give an example of why we use a flexible array. For example, 20 v4l2 buffers are created, and 20 dma-bufs are queued, they took 20 slots. But after they are dequeued, the client enqueue another 20 dma-bufs via the 20 v4l2 buffers. Then this results in a total of 40 slots. We may not release the previous 20 slots as they are not released by the vpu firmware. This only happens in certain extreme cases, in most cases, we won't need to enlarge the array. The problem we have so far is that the client may maintain a dma-buffer pool, the number of dma-bufs in the pool is indeterminate. Each time the client get a dma-buf from the pool and qbuf() with a v4l2 buffer. For the worst-case scenarios, we choose a flexible array to store the frame buffers. Thanks, Ming
diff --git a/drivers/media/platform/amphion/vdec.c b/drivers/media/platform/amphion/vdec.c index 61d5598ee6a1..a26cb0c264c9 100644 --- a/drivers/media/platform/amphion/vdec.c +++ b/drivers/media/platform/amphion/vdec.c @@ -26,6 +26,7 @@ #include "vpu_cmds.h" #include "vpu_rpc.h" +#define VDEC_SLOT_CNT_DFT 32 #define VDEC_MIN_BUFFER_CAP 8 #define VDEC_MIN_BUFFER_OUT 8 @@ -41,6 +42,14 @@ struct vdec_fs_info { u32 tag; }; +struct vdec_frame_store_t { + struct vpu_vb2_buffer *curr; + struct vpu_vb2_buffer *pend; + dma_addr_t addr; + unsigned int state; + u32 tag; +}; + struct vdec_t { u32 seq_hdr_found; struct vpu_buffer udata; @@ -48,7 +57,8 @@ struct vdec_t { struct vpu_dec_codec_info codec_info; enum vpu_codec_state state; - struct vpu_vb2_buffer *slots[VB2_MAX_FRAME]; + struct vdec_frame_store_t *slots; + u32 slot_count; u32 req_frame_count; struct vdec_fs_info mbi; struct vdec_fs_info dcp; @@ -289,6 +299,64 @@ static int vdec_ctrl_init(struct vpu_inst *inst) return 0; } +static void vdec_attach_frame_store(struct vpu_inst *inst, struct vb2_buffer *vb) +{ + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf); + struct vdec_t *vdec = inst->priv; + struct vdec_frame_store_t *new_slots = NULL; + dma_addr_t addr; + int i; + + addr = vpu_get_vb_phy_addr(vb, 0); + for (i = 0; i < vdec->slot_count; i++) { + if (addr == vdec->slots[i].addr) { + if (vdec->slots[i].curr && vdec->slots[i].curr != vpu_buf) { + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED); + vdec->slots[i].pend = vpu_buf; + } else { + vpu_set_buffer_state(vbuf, vdec->slots[i].state); + } + vpu_buf->fs_id = i; + return; + } + } + + for (i = 0; i < vdec->slot_count; i++) { + if (!vdec->slots[i].addr) { + vdec->slots[i].addr = addr; + vpu_buf->fs_id = i; + return; + } + } + + new_slots = vzalloc(sizeof(*vdec->slots) * vdec->slot_count * 2); + if (!vdec->slots) { + dev_err(inst->dev, "fail to alloc frame store\n"); + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_ERROR); + return; + } + + memcpy(new_slots, vdec->slots, sizeof(*vdec->slots) * vdec->slot_count); + vfree(vdec->slots); + vdec->slots = new_slots; + vdec->slot_count *= 2; + + vdec->slots[i].addr = addr; + vpu_buf->fs_id = i; +} + +static void vdec_reset_frame_store(struct vpu_inst *inst) +{ + struct vdec_t *vdec = inst->priv; + + if (!vdec->slots || !vdec->slot_count) + return; + + vpu_trace(inst->dev, "inst[%d] reset slots\n", inst->id); + memset(vdec->slots, 0, sizeof(*vdec->slots) * vdec->slot_count); +} + static void vdec_handle_resolution_change(struct vpu_inst *inst) { struct vdec_t *vdec = inst->priv; @@ -745,11 +813,11 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg) struct vb2_v4l2_buffer *src_buf; int ret = 0; - if (!info || info->id >= ARRAY_SIZE(vdec->slots)) + if (!info || info->id >= vdec->slot_count) return -EINVAL; vpu_inst_lock(inst); - vpu_buf = vdec->slots[info->id]; + vpu_buf = vdec->slots[info->id].curr; if (!vpu_buf) { dev_err(inst->dev, "[%d] decoded invalid frame[%d]\n", inst->id, info->id); ret = -EINVAL; @@ -770,11 +838,13 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg) if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) dev_info(inst->dev, "[%d] buf[%d] has been decoded\n", inst->id, info->id); vpu_set_buffer_state(vbuf, VPU_BUF_STATE_DECODED); + vdec->slots[info->id].state = VPU_BUF_STATE_DECODED; vdec->decoded_frame_count++; if (vdec->params.display_delay_enable) { struct vpu_format *cur_fmt; cur_fmt = vpu_get_format(inst, inst->cap_format.type); + vdec->slots[info->id].state = VPU_BUF_STATE_READY; vpu_set_buffer_state(vbuf, VPU_BUF_STATE_READY); for (int i = 0; i < vbuf->vb2_buf.num_planes; i++) vb2_set_plane_payload(&vbuf->vb2_buf, @@ -797,11 +867,11 @@ static struct vpu_vb2_buffer *vdec_find_buffer(struct vpu_inst *inst, u32 luma) struct vdec_t *vdec = inst->priv; int i; - for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) { - if (!vdec->slots[i]) + for (i = 0; i < vdec->slot_count; i++) { + if (!vdec->slots[i].curr) continue; - if (luma == vdec->slots[i]->luma) - return vdec->slots[i]; + if (luma == vdec->slots[i].addr) + return vdec->slots[i].curr; } return NULL; @@ -835,11 +905,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame) cur_fmt = vpu_get_format(inst, inst->cap_format.type); vbuf = &vpu_buf->m2m_buf.vb; - if (vbuf->vb2_buf.index != frame->id) - dev_err(inst->dev, "[%d] buffer id(%d, %d) dismatch\n", - inst->id, vbuf->vb2_buf.index, frame->id); + if (vpu_buf->fs_id != frame->id) + dev_err(inst->dev, "[%d] buffer id(%d(%d), %d) dismatch\n", + inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index, frame->id); - if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_READY && vdec->params.display_delay_enable) + if (vdec->params.display_delay_enable) return; if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_DECODED) @@ -852,10 +922,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame) vbuf->sequence = vdec->sequence; dev_dbg(inst->dev, "[%d][OUTPUT TS]%32lld\n", inst->id, vbuf->vb2_buf.timestamp); - v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE); vpu_inst_lock(inst); + vdec->slots[vpu_buf->fs_id].state = VPU_BUF_STATE_READY; vdec->display_frame_count++; vpu_inst_unlock(inst); + v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE); dev_dbg(inst->dev, "[%d] decoded : %d, display : %d, sequence : %d\n", inst->id, vdec->decoded_frame_count, vdec->display_frame_count, vdec->sequence); } @@ -1083,18 +1154,30 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb if (!vbuf) return -EINVAL; - if (vdec->slots[vbuf->vb2_buf.index]) { - dev_err(inst->dev, "[%d] repeat alloc fs %d\n", - inst->id, vbuf->vb2_buf.index); + vpu_buf = to_vpu_vb2_buffer(vbuf); + if (vpu_buf->fs_id < 0 || vpu_buf->fs_id >= vdec->slot_count) { + dev_err(inst->dev, "invalid fs %d for v4l2 buffer %d\n", + vpu_buf->fs_id, vbuf->vb2_buf.index); return -EINVAL; } + if (vdec->slots[vpu_buf->fs_id].curr) { + if (vdec->slots[vpu_buf->fs_id].curr != vpu_buf) { + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED); + vdec->slots[vpu_buf->fs_id].pend = vpu_buf; + } else { + vpu_set_buffer_state(vbuf, vdec->slots[vpu_buf->fs_id].state); + } + dev_err(inst->dev, "[%d] repeat alloc fs %d (v4l2 index %d)\n", + inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index); + return -EAGAIN; + } + dev_dbg(inst->dev, "[%d] state = %s, alloc fs %d, tag = 0x%x\n", inst->id, vpu_codec_state_name(inst->state), vbuf->vb2_buf.index, vdec->seq_tag); - vpu_buf = to_vpu_vb2_buffer(vbuf); memset(&info, 0, sizeof(info)); - info.id = vbuf->vb2_buf.index; + info.id = vpu_buf->fs_id; info.type = MEM_RES_FRAME; info.tag = vdec->seq_tag; info.luma_addr = vpu_get_vb_phy_addr(&vbuf->vb2_buf, 0); @@ -1109,12 +1192,13 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb if (ret) return ret; - vpu_buf->tag = info.tag; vpu_buf->luma = info.luma_addr; vpu_buf->chroma_u = info.chroma_addr; vpu_buf->chroma_v = 0; vpu_set_buffer_state(vbuf, VPU_BUF_STATE_INUSE); - vdec->slots[info.id] = vpu_buf; + vdec->slots[info.id].tag = info.tag; + vdec->slots[info.id].curr = vpu_buf; + vdec->slots[info.id].state = VPU_BUF_STATE_INUSE; vdec->req_frame_count--; return 0; @@ -1175,25 +1259,47 @@ static void vdec_recycle_buffer(struct vpu_inst *inst, struct vb2_v4l2_buffer *v v4l2_m2m_buf_queue(inst->fh.m2m_ctx, vbuf); } -static void vdec_clear_slots(struct vpu_inst *inst) +static void vdec_release_curr_frame_store(struct vpu_inst *inst, u32 id) { struct vdec_t *vdec = inst->priv; struct vpu_vb2_buffer *vpu_buf; struct vb2_v4l2_buffer *vbuf; + + if (id >= vdec->slot_count) + return; + if (!vdec->slots[id].curr) + return; + + vpu_buf = vdec->slots[id].curr; + vbuf = &vpu_buf->m2m_buf.vb; + + vdec_response_fs_release(inst, id, vdec->slots[id].tag); + if (vpu_buf->fs_id == id) { + if (vpu_buf->state != VPU_BUF_STATE_READY) + vdec_recycle_buffer(inst, vbuf); + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); + } + + vdec->slots[id].curr = NULL; + vdec->slots[id].state = VPU_BUF_STATE_IDLE; + + if (vdec->slots[id].pend) { + vpu_set_buffer_state(&vdec->slots[id].pend->m2m_buf.vb, VPU_BUF_STATE_IDLE); + vdec->slots[id].pend = NULL; + } +} + +static void vdec_clear_slots(struct vpu_inst *inst) +{ + struct vdec_t *vdec = inst->priv; int i; - for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) { - if (!vdec->slots[i]) + for (i = 0; i < vdec->slot_count; i++) { + if (!vdec->slots[i].curr) continue; - vpu_buf = vdec->slots[i]; - vbuf = &vpu_buf->m2m_buf.vb; - vpu_trace(inst->dev, "clear slot %d\n", i); - vdec_response_fs_release(inst, i, vpu_buf->tag); - vdec_recycle_buffer(inst, vbuf); - vdec->slots[i]->state = VPU_BUF_STATE_IDLE; - vdec->slots[i] = NULL; + vdec_release_curr_frame_store(inst, i); } } @@ -1324,39 +1430,29 @@ static void vdec_event_req_fs(struct vpu_inst *inst, struct vpu_fs_info *fs) static void vdec_evnet_rel_fs(struct vpu_inst *inst, struct vpu_fs_info *fs) { struct vdec_t *vdec = inst->priv; - struct vpu_vb2_buffer *vpu_buf; - struct vb2_v4l2_buffer *vbuf; - if (!fs || fs->id >= ARRAY_SIZE(vdec->slots)) + if (!fs || fs->id >= vdec->slot_count) return; if (fs->type != MEM_RES_FRAME) return; - if (fs->id >= vpu_get_num_buffers(inst, inst->cap_format.type)) { + if (fs->id >= vdec->slot_count) { dev_err(inst->dev, "[%d] invalid fs(%d) to release\n", inst->id, fs->id); return; } vpu_inst_lock(inst); - vpu_buf = vdec->slots[fs->id]; - vdec->slots[fs->id] = NULL; - - if (!vpu_buf) { + if (!vdec->slots[fs->id].curr) { dev_dbg(inst->dev, "[%d] fs[%d] has bee released\n", inst->id, fs->id); goto exit; } - vbuf = &vpu_buf->m2m_buf.vb; - if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) { + if (vdec->slots[fs->id].state == VPU_BUF_STATE_DECODED) { dev_dbg(inst->dev, "[%d] frame skip\n", inst->id); vdec->sequence++; } - vdec_response_fs_release(inst, fs->id, vpu_buf->tag); - if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_READY) - vdec_recycle_buffer(inst, vbuf); - - vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); + vdec_release_curr_frame_store(inst, fs->id); vpu_process_capture_buffer(inst); exit: @@ -1552,6 +1648,11 @@ static void vdec_cleanup(struct vpu_inst *inst) return; vdec = inst->priv; + if (vdec) { + vfree(vdec->slots); + vdec->slots = NULL; + vdec->slot_count = 0; + } vfree(vdec); inst->priv = NULL; vfree(inst); @@ -1683,6 +1784,38 @@ static int vdec_stop_session(struct vpu_inst *inst, u32 type) return 0; } +static int vdec_get_slot_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i) +{ + struct vdec_t *vdec = inst->priv; + struct vpu_vb2_buffer *vpu_buf; + int num = -1; + + vpu_inst_lock(inst); + if (i >= vdec->slot_count || !vdec->slots[i].addr) + goto exit; + + vpu_buf = vdec->slots[i].curr; + + num = scnprintf(str, size, "slot[%2d] :", i); + if (vpu_buf) { + num += scnprintf(str + num, size - num, " %2d", + vpu_buf->m2m_buf.vb.vb2_buf.index); + num += scnprintf(str + num, size - num, "; state = %d", vdec->slots[i].state); + } else { + num += scnprintf(str + num, size - num, " -1"); + } + + if (vdec->slots[i].pend) + num += scnprintf(str + num, size - num, "; %d", + vdec->slots[i].pend->m2m_buf.vb.vb2_buf.index); + + num += scnprintf(str + num, size - num, "\n"); +exit: + vpu_inst_unlock(inst); + + return num; +} + static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i) { struct vdec_t *vdec = inst->priv; @@ -1741,6 +1874,7 @@ static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i vdec->codec_info.vui_present); break; default: + num = vdec_get_slot_debug_info(inst, str, size, i - 10); break; } @@ -1764,6 +1898,8 @@ static struct vpu_inst_ops vdec_inst_ops = { .get_debug_info = vdec_get_debug_info, .wait_prepare = vpu_inst_unlock, .wait_finish = vpu_inst_lock, + .attach_frame_store = vdec_attach_frame_store, + .reset_frame_store = vdec_reset_frame_store, }; static void vdec_init(struct file *file) @@ -1804,6 +1940,14 @@ static int vdec_open(struct file *file) return -ENOMEM; } + vdec->slots = vzalloc(sizeof(*vdec->slots) * VDEC_SLOT_CNT_DFT); + if (!vdec->slots) { + vfree(vdec); + vfree(inst); + return -ENOMEM; + } + vdec->slot_count = VDEC_SLOT_CNT_DFT; + inst->ops = &vdec_inst_ops; inst->formats = vdec_formats; inst->type = VPU_CORE_TYPE_DEC; diff --git a/drivers/media/platform/amphion/vpu.h b/drivers/media/platform/amphion/vpu.h index 22f0da26ccec..76bfd6b26170 100644 --- a/drivers/media/platform/amphion/vpu.h +++ b/drivers/media/platform/amphion/vpu.h @@ -223,6 +223,8 @@ struct vpu_inst_ops { int (*get_debug_info)(struct vpu_inst *inst, char *str, u32 size, u32 i); void (*wait_prepare)(struct vpu_inst *inst); void (*wait_finish)(struct vpu_inst *inst); + void (*attach_frame_store)(struct vpu_inst *inst, struct vb2_buffer *vb); + void (*reset_frame_store)(struct vpu_inst *inst); }; struct vpu_inst { @@ -296,7 +298,8 @@ enum { VPU_BUF_STATE_DECODED, VPU_BUF_STATE_READY, VPU_BUF_STATE_SKIP, - VPU_BUF_STATE_ERROR + VPU_BUF_STATE_ERROR, + VPU_BUF_STATE_CHANGED }; struct vpu_vb2_buffer { @@ -305,8 +308,8 @@ struct vpu_vb2_buffer { dma_addr_t chroma_u; dma_addr_t chroma_v; unsigned int state; - u32 tag; u32 average_qp; + s32 fs_id; }; void vpu_writel(struct vpu_dev *vpu, u32 reg, u32 val); diff --git a/drivers/media/platform/amphion/vpu_dbg.c b/drivers/media/platform/amphion/vpu_dbg.c index 940e5bda5fa3..497ae4e8a229 100644 --- a/drivers/media/platform/amphion/vpu_dbg.c +++ b/drivers/media/platform/amphion/vpu_dbg.c @@ -48,6 +48,7 @@ static char *vpu_stat_name[] = { [VPU_BUF_STATE_READY] = "ready", [VPU_BUF_STATE_SKIP] = "skip", [VPU_BUF_STATE_ERROR] = "error", + [VPU_BUF_STATE_CHANGED] = "changed", }; static inline const char *to_vpu_stat_name(int state) @@ -164,6 +165,7 @@ static int vpu_dbg_instance(struct seq_file *s, void *data) for (i = 0; i < vb2_get_num_buffers(vq); i++) { struct vb2_buffer *vb; struct vb2_v4l2_buffer *vbuf; + struct vpu_vb2_buffer *vpu_buf; vb = vb2_get_buffer(vq, i); if (!vb) @@ -173,13 +175,24 @@ static int vpu_dbg_instance(struct seq_file *s, void *data) continue; vbuf = to_vb2_v4l2_buffer(vb); + vpu_buf = to_vpu_vb2_buffer(vbuf); num = scnprintf(str, sizeof(str), - "capture[%2d] state = %10s, %8s\n", + "capture[%2d] state = %10s, %8s", i, vb2_stat_name[vb->state], to_vpu_stat_name(vpu_get_buffer_state(vbuf))); if (seq_write(s, str, num)) return 0; + + if (vpu_buf->fs_id >= 0) { + num = scnprintf(str, sizeof(str), "; fs %d", vpu_buf->fs_id); + if (seq_write(s, str, num)) + return 0; + } + + num = scnprintf(str, sizeof(str), "\n"); + if (seq_write(s, str, num)) + return 0; } num = scnprintf(str, sizeof(str), "sequence = %d\n", inst->sequence); diff --git a/drivers/media/platform/amphion/vpu_v4l2.c b/drivers/media/platform/amphion/vpu_v4l2.c index 45707931bc4f..74668fa362e2 100644 --- a/drivers/media/platform/amphion/vpu_v4l2.c +++ b/drivers/media/platform/amphion/vpu_v4l2.c @@ -501,14 +501,25 @@ static int vpu_vb2_queue_setup(struct vb2_queue *vq, call_void_vop(inst, release); } + if (V4L2_TYPE_IS_CAPTURE(vq->type)) + call_void_vop(inst, reset_frame_store); + return 0; } static int vpu_vb2_buf_init(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf); + struct vpu_inst *inst = vb2_get_drv_priv(vb->vb2_queue); + vpu_buf->fs_id = -1; vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); + + if (!inst->ops->attach_frame_store || V4L2_TYPE_IS_OUTPUT(vb->type)) + return 0; + + call_void_vop(inst, attach_frame_store, vb); return 0; }