[PATCH v2] media: hantro: Be more accurate on pixel formats step_width constraints
Benjamin Gaignard
benjamin.gaignard at collabora.com
Tue May 17 23:32:19 PDT 2022
Le 17/05/2022 à 19:42, Nicolas Dufresne a écrit :
> Le mardi 17 mai 2022 à 14:26 +0200, Benjamin Gaignard a écrit :
>> On Hantro G2 decoder on IMX8MQ strides requirements aren't the same
>> for NV12_4L4 and NV12 pixel formats. The first one use a 4 bytes padding
>> while the last one needs 8 bytes.
>> To be sure to provide the correct stride in all cases we need:
>> - to relax the constraints on codec formats so set step_width to 4
>> - use capture queue format and not the output queue format when applying
>> the pixel format constraints.
>> - put the correct step_width constraints on each pixel format.
>>
>> Move HEVC SPS validation in hantro_hevc.c to be able to perform it
>> when setting sps control and when starting to decode the bitstream.
>> Add a new test in HEVC SPS validation function to check if resolution
>> is still matching the hardware constraints.
>>
>> With this SAODBLK_A_MainConcept_4 and SAODBLK_B_MainConcept_4 conformance
>> tests files are correctly decoded with both NV12 and NV12_4L4 pixel formats.
>> These two files have a resolution of 1016x760.
>> If step_width = 16 for the both pixel formats the selected capture
> Did you mean is instead of = ? Are you missing "and" somewhere in this sentence
> ?
I mean 'equal' if step_width equal 16 the selected resolution is wrong for NV12_4L4.
>> resolution is 1024x768 which is wrong for NV12_4L4 (which expect 1016x760)
>> on Hantro G2 on IMX8MQ (but correct for NV12).
>>
>> For other variants than Hantro G2 on IMX8M keep the same step_width to avoid
>> regressions.
>>
>> Fluster HEVC test score is now 128/147 vs 126/147 with the both pixel
>> formats as decoder output.
>> Fluster VP9 test score stay at 147/303.
>>
>> Signed-off-by: Benjamin Gaignard <benjamin.gaignard at collabora.com>
>> ---
>> version 2:
>> - Add a HEVC SPS validation function to be used when
>> setting the control and start decoding.
>> I hope that is what Nicolas expects in his remark on v1.
>>
>> drivers/staging/media/hantro/hantro_drv.c | 12 +++---
>> drivers/staging/media/hantro/hantro_hevc.c | 28 +++++++++++++
>> drivers/staging/media/hantro/hantro_hw.h | 2 +
>> drivers/staging/media/hantro/hantro_v4l2.c | 2 +-
>> drivers/staging/media/hantro/imx8m_vpu_hw.c | 40 +++++++++++++++++--
>> .../staging/media/hantro/rockchip_vpu_hw.c | 32 +++++++++++++++
>> .../staging/media/hantro/sama5d4_vdec_hw.c | 16 ++++++++
>> drivers/staging/media/hantro/sunxi_vpu_hw.c | 16 ++++++++
>> 8 files changed, 137 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
>> index 377dcc1d19de..5aac3a090480 100644
>> --- a/drivers/staging/media/hantro/hantro_drv.c
>> +++ b/drivers/staging/media/hantro/hantro_drv.c
>> @@ -253,6 +253,11 @@ queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
>>
>> static int hantro_try_ctrl(struct v4l2_ctrl *ctrl)
>> {
>> + struct hantro_ctx *ctx;
>> +
>> + ctx = container_of(ctrl->handler,
>> + struct hantro_ctx, ctrl_handler);
>> +
>> if (ctrl->id == V4L2_CID_STATELESS_H264_SPS) {
>> const struct v4l2_ctrl_h264_sps *sps = ctrl->p_new.p_h264_sps;
>>
>> @@ -268,12 +273,7 @@ static int hantro_try_ctrl(struct v4l2_ctrl *ctrl)
>> } else if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS) {
>> const struct v4l2_ctrl_hevc_sps *sps = ctrl->p_new.p_hevc_sps;
>>
>> - if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
>> - /* Luma and chroma bit depth mismatch */
>> - return -EINVAL;
>> - if (sps->bit_depth_luma_minus8 != 0)
>> - /* Only 8-bit is supported */
>> - return -EINVAL;
>> + return hantro_hevc_validate_sps(ctx, sps);
>> } else if (ctrl->id == V4L2_CID_STATELESS_VP9_FRAME) {
>> const struct v4l2_ctrl_vp9_frame *dec_params = ctrl->p_new.p_vp9_frame;
>>
>> diff --git a/drivers/staging/media/hantro/hantro_hevc.c b/drivers/staging/media/hantro/hantro_hevc.c
>> index 7fdec50dc853..6abef810b285 100644
>> --- a/drivers/staging/media/hantro/hantro_hevc.c
>> +++ b/drivers/staging/media/hantro/hantro_hevc.c
>> @@ -154,6 +154,30 @@ static int tile_buffer_reallocate(struct hantro_ctx *ctx)
>> return -ENOMEM;
>> }
>>
>> +int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps)
>> +{
>> + if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
>> + /* Luma and chroma bit depth mismatch */
>> + return -EINVAL;
>> + if (sps->bit_depth_luma_minus8 != 0)
>> + /* Only 8-bit is supported */
>> + return -EINVAL;
>> +
>> + /* for tile pixel format check if the width and height match
>> + * hardware constraints */
>> + if (ctx->vpu_dst_fmt->fourcc == V4L2_PIX_FMT_NV12_4L4) {
>> + if (ctx->dst_fmt.width !=
>> + ALIGN(sps->pic_width_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_width))
>> + return -EINVAL;
>> +
>> + if (ctx->dst_fmt.height !=
>> + ALIGN(sps->pic_height_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_height))
>> + return -EINVAL;
> No action needed, just a question. Can we output tiled out of the PP ? If so,
> perhaps we could handle the alignment difference by enabling the PP even if
> tiled. That would greatly help to handle compatibility between decoders and
> encoders in the future.
No tiled formats are produced by the decoder, PP produce NV12 format.
>
>> + }
>> +
>> + return 0;
>> +}
>> +
>> int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
>> {
>> struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec;
>> @@ -177,6 +201,10 @@ int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
>> if (WARN_ON(!ctrls->sps))
>> return -EINVAL;
>>
>> + ret = hantro_hevc_validate_sps(ctx, ctrls->sps);
>> + if (ret)
>> + return ret;
>> +
>> ctrls->pps =
>> hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_PPS);
>> if (WARN_ON(!ctrls->pps))
>> diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
>> index 994547fe41b9..0bba6378212d 100644
>> --- a/drivers/staging/media/hantro/hantro_hw.h
>> +++ b/drivers/staging/media/hantro/hantro_hw.h
>> @@ -341,6 +341,8 @@ int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx);
>> void hantro_hevc_ref_init(struct hantro_ctx *ctx);
>> dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, s32 poc);
>> int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr);
>> +int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps);
>> +
>>
>> static inline unsigned short hantro_vp9_num_sbs(unsigned short dimension)
>> {
>> diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c
>> index 71a6279750bf..93d0dcf69f4a 100644
>> --- a/drivers/staging/media/hantro/hantro_v4l2.c
>> +++ b/drivers/staging/media/hantro/hantro_v4l2.c
>> @@ -260,7 +260,7 @@ static int hantro_try_fmt(const struct hantro_ctx *ctx,
>> } else if (ctx->is_encoder) {
>> vpu_fmt = ctx->vpu_dst_fmt;
>> } else {
>> - vpu_fmt = ctx->vpu_src_fmt;
>> + vpu_fmt = fmt;
>> /*
>> * Width/height on the CAPTURE end of a decoder are ignored and
>> * replaced by the OUTPUT ones.
>> diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/media/hantro/imx8m_vpu_hw.c
>> index 9802508bade2..b6b2bf65e56d 100644
>> --- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
>> +++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
>> @@ -83,6 +83,14 @@ static const struct hantro_fmt imx8m_vpu_postproc_fmts[] = {
>> .fourcc = V4L2_PIX_FMT_YUYV,
>> .codec_mode = HANTRO_MODE_NONE,
>> .postprocessed = true,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 3840,
> I remember seeing limits to 4096 instead in VSI documentation (UWHD). Just like
> many hardcoded limits in this driver, perhaps the limits was copied from the
> white paper performance example. It would be weird that we can decode portrait
> UHD/FHD.
>
> I think for this patch, what I'd like to see is the UDH and FHD limits to be set
> as a define. Since if we got them wrong, fixing them later after this patch
> becomes a lot more work, as its copied over and over.
>
> Everything else looks good to me know, thanks for the update.
I will do that in the next version
Benjamin
>
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 2160,
>> + .step_height = MB_DIM,
>> + },
>> },
>> };
>>
>> @@ -90,6 +98,14 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[] = {
>> {
>> .fourcc = V4L2_PIX_FMT_NV12,
>> .codec_mode = HANTRO_MODE_NONE,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 3840,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 2160,
>> + .step_height = MB_DIM,
>> + },
>> },
>> {
>> .fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
>> @@ -137,6 +153,14 @@ static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = {
>> .fourcc = V4L2_PIX_FMT_NV12,
>> .codec_mode = HANTRO_MODE_NONE,
>> .postprocessed = true,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 3840,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 2160,
>> + .step_height = MB_DIM,
>> + },
>> },
>> };
>>
>> @@ -144,6 +168,14 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
>> {
>> .fourcc = V4L2_PIX_FMT_NV12_4L4,
>> .codec_mode = HANTRO_MODE_NONE,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 3840,
>> + .step_width = 4,
>> + .min_height = 48,
>> + .max_height = 2160,
>> + .step_height = 4,
>> + },
>> },
>> {
>> .fourcc = V4L2_PIX_FMT_HEVC_SLICE,
>> @@ -152,10 +184,10 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
>> .frmsize = {
>> .min_width = 48,
>> .max_width = 3840,
>> - .step_width = MB_DIM,
>> + .step_width = 4,
>> .min_height = 48,
>> .max_height = 2160,
>> - .step_height = MB_DIM,
>> + .step_height = 4,
>> },
>> },
>> {
>> @@ -165,10 +197,10 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
>> .frmsize = {
>> .min_width = 48,
>> .max_width = 3840,
>> - .step_width = MB_DIM,
>> + .step_width = 4,
>> .min_height = 48,
>> .max_height = 2160,
>> - .step_height = MB_DIM,
>> + .step_height = 4,
>> },
>> },
>> };
>> diff --git a/drivers/staging/media/hantro/rockchip_vpu_hw.c b/drivers/staging/media/hantro/rockchip_vpu_hw.c
>> index fc96501f3bc8..efba7fcdf207 100644
>> --- a/drivers/staging/media/hantro/rockchip_vpu_hw.c
>> +++ b/drivers/staging/media/hantro/rockchip_vpu_hw.c
>> @@ -63,6 +63,14 @@ static const struct hantro_fmt rockchip_vpu1_postproc_fmts[] = {
>> .fourcc = V4L2_PIX_FMT_YUYV,
>> .codec_mode = HANTRO_MODE_NONE,
>> .postprocessed = true,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 1920,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 1088,
>> + .step_height = MB_DIM,
>> + },
>> },
>> };
>>
>> @@ -70,6 +78,14 @@ static const struct hantro_fmt rk3066_vpu_dec_fmts[] = {
>> {
>> .fourcc = V4L2_PIX_FMT_NV12,
>> .codec_mode = HANTRO_MODE_NONE,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 1920,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 1088,
>> + .step_height = MB_DIM,
>> + },
>> },
>> {
>> .fourcc = V4L2_PIX_FMT_H264_SLICE,
>> @@ -116,6 +132,14 @@ static const struct hantro_fmt rk3288_vpu_dec_fmts[] = {
>> {
>> .fourcc = V4L2_PIX_FMT_NV12,
>> .codec_mode = HANTRO_MODE_NONE,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 4096,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 2304,
>> + .step_height = MB_DIM,
>> + },
>> },
>> {
>> .fourcc = V4L2_PIX_FMT_H264_SLICE,
>> @@ -162,6 +186,14 @@ static const struct hantro_fmt rk3399_vpu_dec_fmts[] = {
>> {
>> .fourcc = V4L2_PIX_FMT_NV12,
>> .codec_mode = HANTRO_MODE_NONE,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 1920,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 1088,
>> + .step_height = MB_DIM,
>> + },
>> },
>> {
>> .fourcc = V4L2_PIX_FMT_H264_SLICE,
>> diff --git a/drivers/staging/media/hantro/sama5d4_vdec_hw.c b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
>> index b2fc1c5613e1..07ee804e706b 100644
>> --- a/drivers/staging/media/hantro/sama5d4_vdec_hw.c
>> +++ b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
>> @@ -16,6 +16,14 @@ static const struct hantro_fmt sama5d4_vdec_postproc_fmts[] = {
>> .fourcc = V4L2_PIX_FMT_YUYV,
>> .codec_mode = HANTRO_MODE_NONE,
>> .postprocessed = true,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 1280,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 720,
>> + .step_height = MB_DIM,
>> + },
>> },
>> };
>>
>> @@ -23,6 +31,14 @@ static const struct hantro_fmt sama5d4_vdec_fmts[] = {
>> {
>> .fourcc = V4L2_PIX_FMT_NV12,
>> .codec_mode = HANTRO_MODE_NONE,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 1280,
>> + .step_width = MB_DIM,
>> + .min_height = 48,
>> + .max_height = 720,
>> + .step_height = MB_DIM,
>> + },
>> },
>> {
>> .fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
>> diff --git a/drivers/staging/media/hantro/sunxi_vpu_hw.c b/drivers/staging/media/hantro/sunxi_vpu_hw.c
>> index c0edd5856a0c..c2392c08febb 100644
>> --- a/drivers/staging/media/hantro/sunxi_vpu_hw.c
>> +++ b/drivers/staging/media/hantro/sunxi_vpu_hw.c
>> @@ -14,6 +14,14 @@ static const struct hantro_fmt sunxi_vpu_postproc_fmts[] = {
>> .fourcc = V4L2_PIX_FMT_NV12,
>> .codec_mode = HANTRO_MODE_NONE,
>> .postprocessed = true,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 3840,
>> + .step_width = 32,
>> + .min_height = 48,
>> + .max_height = 2160,
>> + .step_height = 32,
>> + },
>> },
>> };
>>
>> @@ -21,6 +29,14 @@ static const struct hantro_fmt sunxi_vpu_dec_fmts[] = {
>> {
>> .fourcc = V4L2_PIX_FMT_NV12_4L4,
>> .codec_mode = HANTRO_MODE_NONE,
>> + .frmsize = {
>> + .min_width = 48,
>> + .max_width = 3840,
>> + .step_width = 32,
>> + .min_height = 48,
>> + .max_height = 2160,
>> + .step_height = 32,
>> + },
>> },
>> {
>> .fourcc = V4L2_PIX_FMT_VP9_FRAME,
>> --
>> 2.32.0
>>
>>
More information about the linux-arm-kernel
mailing list