[PATCH v2 2/4] media: mediatek: vcodec: support extend h264 video shared information
Chen-Yu Tsai
wenst at chromium.org
Tue Oct 22 00:08:00 PDT 2024
On Wed, Oct 16, 2024 at 11:49 AM Yunfei Dong <yunfei.dong at mediatek.com> wrote:
>
> The address end of working buffer can't be calculated directly
> with buffer size in kernel for some special architecture. Adding
> new extend vsi to calculate the address end in other os.
> Refactoring the driver flow to make the driver looks more reasonable.
>
> Signed-off-by: Yunfei Dong <yunfei.dong at mediatek.com>
> ---
> .../decoder/vdec/vdec_h264_req_multi_if.c | 355 ++++++++++++------
> 1 file changed, 238 insertions(+), 117 deletions(-)
>
> diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c
> index 57c85af5ffb4..e02ed8dfe0ce 100644
> --- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c
> +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c
> @@ -48,10 +48,29 @@ struct vdec_h264_slice_lat_dec_param {
> };
>
> /**
> - * struct vdec_h264_slice_info - decode information
> + * struct vdec_h264_slice_info_ex - extend decode information
Please move these structs around so that the extended ones are
grouped together.
My suggestion is to keep |struct vdec_h264_slice_lat_dec_param|,
|struct vdec_h264_slice_info| and |struct vdec_h264_slice_vsi| at
the top of the file, and add the new ones below them. That way
the ones related are together, and also git won't produce such
a hard to read diff.
Please also mark every struct that is included in the `vsi` shared
memory struct as "shared interface with firmware" so that they do
not get changed accidentally.
> *
> + * @wdma_end_addr_offset: offset from buffer start
> * @nal_info: nal info of current picture
> * @timeout: Decode timeout: 1 timeout, 0 no timeout
> + * @vdec_fb_va: VDEC frame buffer struct virtual address
> + * @crc: Used to check whether hardware's status is right
> + * @reserved: reserved
> + */
> +struct vdec_h264_slice_info_ex {
> + u64 wdma_end_addr_offset;
> + u16 nal_info;
> + u16 timeout;
There is a 4 byte hole here. Can you make it explicit by adding
an extra reserved field?
> + u64 vdec_fb_va;
> + u32 crc[8];
> + u32 reserved;
> +};
> +
> +/**
> + * struct vdec_h264_slice_info - decode information
> + *
> + * @nal_info: nal info of current picture
> + * @timeout: Decode timeout: 1 timeout, 0 no timeount
> * @bs_buf_size: bitstream size
> * @bs_buf_addr: bitstream buffer dma address
> * @y_fb_dma: Y frame buffer dma address
> @@ -70,6 +89,64 @@ struct vdec_h264_slice_info {
> u32 crc[8];
> };
>
> +/*
> + * struct vdec_h264_slice_mem - memory address and size
> + */
> +struct vdec_h264_slice_mem {
> + union {
> + u64 buf;
> + u64 dma_addr;
> + };
> + union {
> + size_t size;
> + u64 dma_addr_end;
> + };
> +};
> +
> +/**
> + * struct vdec_h264_slice_fb - frame buffer for decoding
> + *
> + * @y: current y buffer address info
> + * @c: current c buffer address info
> + */
> +struct vdec_h264_slice_fb {
> + struct vdec_h264_slice_mem y;
> + struct vdec_h264_slice_mem c;
> +};
> +
> +/**
> + * struct vdec_h264_slice_vsi_ex - extend shared memory for decode information exchange
> + * between SCP and Host.
> + *
> + * @bs: input buffer info
> + * @fb: current y/c buffer
> + *
> + * @ube: ube buffer
> + * @trans: transcoded buffer
> + * @row_info: row info buffer
> + * @err_map: err map buffer
> + * @slice_bc: slice buffer
> + *
> + * @mv_buf_dma: HW working motion vector buffer
> + * @dec: decode information (AP-R, VPU-W)
> + * @h264_slice_params: decode parameters for hw used
> + */
> +struct vdec_h264_slice_vsi_ex {
> + /* LAT dec addr */
> + struct vdec_h264_slice_mem bs;
> + struct vdec_h264_slice_fb fb;
> +
> + struct vdec_h264_slice_mem ube;
> + struct vdec_h264_slice_mem trans;
> + struct vdec_h264_slice_mem row_info;
> + struct vdec_h264_slice_mem err_map;
> + struct vdec_h264_slice_mem slice_bc;
> +
> + struct vdec_h264_slice_mem mv_buf_dma[H264_MAX_MV_NUM];
> + struct vdec_h264_slice_info_ex dec;
> + struct vdec_h264_slice_lat_dec_param h264_slice_params;
> +};
> +
> /**
> * struct vdec_h264_slice_vsi - shared memory for decode information exchange
> * between SCP and Host.
> @@ -136,10 +213,10 @@ struct vdec_h264_slice_share_info {
> * @pred_buf: HW working prediction buffer
> * @mv_buf: HW working motion vector buffer
> * @vpu: VPU instance
> - * @vsi: vsi used for lat
> - * @vsi_core: vsi used for core
> + * @vsi_ex: extend vsi used for lat
> + * @vsi_core_ex: extend vsi used for core
> *
> - * @vsi_ctx: Local VSI data for this decoding context
> + * @vsi_ctx_ex: Local extend vsi data for this decoding context
> * @h264_slice_param: the parameters that hardware use to decode
> *
> * @resolution_changed:resolution changed
> @@ -156,10 +233,19 @@ struct vdec_h264_slice_inst {
> struct mtk_vcodec_mem pred_buf;
> struct mtk_vcodec_mem mv_buf[H264_MAX_MV_NUM];
> struct vdec_vpu_inst vpu;
> - struct vdec_h264_slice_vsi *vsi;
> - struct vdec_h264_slice_vsi *vsi_core;
> -
> - struct vdec_h264_slice_vsi vsi_ctx;
> + union {
> + struct vdec_h264_slice_vsi_ex *vsi_ex;
> + struct vdec_h264_slice_vsi *vsi;
> + };
> + union {
> + struct vdec_h264_slice_vsi_ex *vsi_core_ex;
> + struct vdec_h264_slice_vsi *vsi_core;
> + };
> +
> + union {
> + struct vdec_h264_slice_vsi_ex vsi_ctx_ex;
> + struct vdec_h264_slice_vsi vsi_ctx;
> + };
Code wise I think it would be better to have a union of structs of structs:
union {
struct {
struct vdec_h264_slice_vsi *vsi;
struct vdec_h264_slice_vsi *vsi_core;
struct vdec_h264_slice_vsi vsi_ctx;
};
struct {
struct vdec_h264_slice_vsi_ex *vsi_ex;
struct vdec_h264_slice_vsi_ex *vsi_core_ex;
struct vdec_h264_slice_vsi_ex vsi_ctx_ex;
};
};
This makes it clear that the *_ex variants are used together.
The code compiles down to the same layout.
> struct vdec_h264_slice_lat_dec_param h264_slice_param;
>
> unsigned int resolution_changed;
> @@ -389,6 +475,98 @@ static void vdec_h264_slice_get_crop_info(struct vdec_h264_slice_inst *inst,
> cr->left, cr->top, cr->width, cr->height);
> }
>
> +static void vdec_h264_slice_setup_lat_buffer(struct vdec_h264_slice_inst *inst,
Please add the "_ex" suffix to mark this function as used for the extended
architecture.
> + struct mtk_vcodec_mem *bs,
> + struct vdec_lat_buf *lat_buf)
> +{
> + struct mtk_vcodec_mem *mem;
> + int i;
> +
> + inst->vsi_ex->bs.dma_addr = (u64)bs->dma_addr;
> + inst->vsi_ex->bs.size = bs->size;
> +
> + for (i = 0; i < H264_MAX_MV_NUM; i++) {
> + mem = &inst->mv_buf[i];
> + inst->vsi_ex->mv_buf_dma[i].dma_addr = mem->dma_addr;
> + inst->vsi_ex->mv_buf_dma[i].size = mem->size;
> + }
> + inst->vsi_ex->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
> + inst->vsi_ex->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
> +
> + inst->vsi_ex->row_info.dma_addr = 0;
> + inst->vsi_ex->row_info.size = 0;
> +
> + inst->vsi_ex->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
> + inst->vsi_ex->err_map.size = lat_buf->wdma_err_addr.size;
> +
> + inst->vsi_ex->slice_bc.dma_addr = lat_buf->slice_bc_addr.dma_addr;
> + inst->vsi_ex->slice_bc.size = lat_buf->slice_bc_addr.size;
> +
> + inst->vsi_ex->trans.dma_addr_end = inst->ctx->msg_queue.wdma_rptr_addr;
> + inst->vsi_ex->trans.dma_addr = inst->ctx->msg_queue.wdma_wptr_addr;
> +}
> +
> +static int vdec_h264_slice_setup_core_buffer(struct vdec_h264_slice_inst *inst,
> + struct vdec_h264_slice_share_info *share_info,
> + struct vdec_lat_buf *lat_buf)
Same here.
> +{
> + struct mtk_vcodec_mem *mem;
> + struct mtk_vcodec_dec_ctx *ctx = inst->ctx;
> + struct vb2_v4l2_buffer *vb2_v4l2;
> + struct vdec_fb *fb;
> + u64 y_fb_dma, c_fb_dma = 0;
> + int i;
> +
> + fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
> + if (!fb) {
> + mtk_vdec_err(ctx, "fb buffer is NULL");
> + return -EBUSY;
> + }
> +
> + y_fb_dma = (u64)fb->base_y.dma_addr;
> + if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 1)
> + c_fb_dma =
> + y_fb_dma + inst->ctx->picinfo.buf_w * inst->ctx->picinfo.buf_h;
> + else
> + c_fb_dma = (u64)fb->base_c.dma_addr;
> +
> + mtk_vdec_debug(ctx, "[h264-core] y/c addr = 0x%llx 0x%llx", y_fb_dma, c_fb_dma);
> +
> + inst->vsi_core_ex->fb.y.dma_addr = y_fb_dma;
> + inst->vsi_core_ex->fb.y.size = ctx->picinfo.fb_sz[0];
> + inst->vsi_core_ex->fb.c.dma_addr = c_fb_dma;
> + inst->vsi_core_ex->fb.c.size = ctx->picinfo.fb_sz[1];
> +
> + inst->vsi_core_ex->dec.vdec_fb_va = (unsigned long)fb;
> + inst->vsi_core_ex->dec.nal_info = share_info->nal_info;
> +
> + inst->vsi_core_ex->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
> + inst->vsi_core_ex->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
> +
> + inst->vsi_core_ex->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
> + inst->vsi_core_ex->err_map.size = lat_buf->wdma_err_addr.size;
> +
> + inst->vsi_core_ex->slice_bc.dma_addr = lat_buf->slice_bc_addr.dma_addr;
> + inst->vsi_core_ex->slice_bc.size = lat_buf->slice_bc_addr.size;
> +
> + inst->vsi_core_ex->row_info.dma_addr = 0;
> + inst->vsi_core_ex->row_info.size = 0;
> +
> + inst->vsi_core_ex->trans.dma_addr = share_info->trans_start;
> + inst->vsi_core_ex->trans.dma_addr_end = share_info->trans_end;
> +
> + for (i = 0; i < H264_MAX_MV_NUM; i++) {
> + mem = &inst->mv_buf[i];
> + inst->vsi_core_ex->mv_buf_dma[i].dma_addr = mem->dma_addr;
> + inst->vsi_core_ex->mv_buf_dma[i].size = mem->size;
> + }
> +
> + vb2_v4l2 = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
> + v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, vb2_v4l2, true);
> +
> + return 0;
> +}
> +
> static int vdec_h264_slice_init(struct mtk_vcodec_dec_ctx *ctx)
> {
> struct vdec_h264_slice_inst *inst;
> @@ -412,10 +590,10 @@ static int vdec_h264_slice_init(struct mtk_vcodec_dec_ctx *ctx)
> goto error_free_inst;
> }
>
> - vsi_size = round_up(sizeof(struct vdec_h264_slice_vsi), VCODEC_DEC_ALIGNED_64);
> - inst->vsi = inst->vpu.vsi;
> - inst->vsi_core =
> - (struct vdec_h264_slice_vsi *)(((char *)inst->vpu.vsi) + vsi_size);
> + vsi_size = round_up(sizeof(struct vdec_h264_slice_vsi_ex), VCODEC_DEC_ALIGNED_64);
> + inst->vsi_ex = inst->vpu.vsi;
> + inst->vsi_core_ex =
> + (struct vdec_h264_slice_vsi_ex *)(((char *)inst->vpu.vsi) + vsi_size);
Changing this here without any feature checking will break existing
platforms because the vsi_core pointer now points to the wrong address.
You can't change the existing code path in a non backwards compatible
way in one patch then fix it back up in the next patch. It has to be
done at the same time.
In other words, existing platforms _must_ continue to work throughout
your patch series, i.e. with only patches 1 & 2 applied, MT8192 / MT8195
should continue to work properly.
Looking at this patch more, it seems that it is better to squash patches
2 & 3 together. The two patches are working on the same thing: adding
support for the extended architecture. Having the changes together in
one single patch makes more sense.
ChenYu
More information about the linux-arm-kernel
mailing list