[PATCH v6 10/13] media: verisilicon: Add Rockchip AV1 decoder
AngeloGioacchino Del Regno
angelogioacchino.delregno at collabora.com
Thu Apr 13 02:16:19 PDT 2023
Il 12/04/23 13:56, Benjamin Gaignard ha scritto:
> Implement AV1 stateless decoder for rockchip VPU981.
> It decode 8 and 10 bits AV1 bitstreams.
> AV1 scaling feature is done by the postprocessor.
>
> Signed-off-by: Benjamin Gaignard <benjamin.gaignard at collabora.com>
> Reviewed-by: Nicolas Dufresne <nicolas.dufresne at collabora.com>
> ---
> Changes in v6:
> - Fix frame-larger-than warning.
>
> drivers/media/platform/verisilicon/Makefile | 1 +
> .../media/platform/verisilicon/hantro_hw.h | 64 +-
> .../verisilicon/rockchip_vpu981_hw_av1_dec.c | 2024 +++++++++++++++++
> .../verisilicon/rockchip_vpu981_regs.h | 477 ++++
> 4 files changed, 2564 insertions(+), 2 deletions(-)
> create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
> create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
>
..snip..
> diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h
> index d7641eced32e..25456fb960c8 100644
> --- a/drivers/media/platform/verisilicon/hantro_hw.h
> +++ b/drivers/media/platform/verisilicon/hantro_hw.h
> @@ -37,6 +37,8 @@
>
> #define NUM_REF_PICTURES (V4L2_HEVC_DPB_ENTRIES_NUM_MAX + 1)
>
> +#define AV1_MAX_FRAME_BUF_COUNT (V4L2_AV1_TOTAL_REFS_PER_FRAME + 1)
> +
> struct hantro_dev;
> struct hantro_ctx;
> struct hantro_buf;
> @@ -250,23 +252,81 @@ struct hantro_vp9_dec_hw_ctx {
> };
>
> /**
> - * hantro_av1_dec_hw_ctx
> + * struct hantro_av1_dec_ctrls
> + * @sequence: AV1 Sequence
> + * @tile_group_entry: AV1 Tile Group entry
> + * @frame: AV1 Frame Header OBU
> + * @film_grain: AV1 Film Grain
> + */
> +struct hantro_av1_dec_ctrls {
> + const struct v4l2_ctrl_av1_sequence *sequence;
> + const struct v4l2_ctrl_av1_tile_group_entry *tile_group_entry;
> + const struct v4l2_ctrl_av1_frame *frame;
> + const struct v4l2_ctrl_av1_film_grain *film_grain;
> +};
> +
> +struct hantro_av1_frame_ref {
> + int width;
> + int height;
> + int mi_cols;
> + int mi_rows;
I wonder if we can save some memory here, if w/h/mi_c/mi_r can never be
negative, it would be possible to change them to a unsigned type, in which
case, a u16 would probably be fine at least for width and height as I would
never expect a resolution of more than 65535x65535.
Even a s16 would probably be fine, anyway - but that's your call.
P.S.: If this is a structure coming from the HW instead, all members shall
use fixed size type!
..snip..
> diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
> new file mode 100644
> index 000000000000..b9fc70f606a3
> --- /dev/null
> +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
> @@ -0,0 +1,2024 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2021, Collabora
Shouldn't this be 2023, Collabora Ltd. ? :-)
> + *
> + * Author: Benjamin Gaignard <benjamin.gaignard at collabora.com>
> + */
> +
> +#include <media/v4l2-mem2mem.h>
> +#include "hantro.h"
> +#include "hantro_v4l2.h"
> +#include "rockchip_vpu981_regs.h"
> +
..snip..
> +
> +static int rockchip_vpu981_av1_tile_log2(int target)
> +{
> + int k;
> +
> + /*
> + * returns the smallest value for k such that 1 << k is greater
> + * than or equal to target
Spaces -> tabs here, please.
> + */
> + for (k = 0; (1 << k) < target; k++);
> +
> + return k;
> +}
> +
..snip..
> +static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
> + const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
> + ctrls->tile_group_entry;
> + int context_update_y =
> + tile_info->context_update_tile_id / tile_info->tile_cols;
> + int context_update_x =
> + tile_info->context_update_tile_id % tile_info->tile_cols;
> + int context_update_tile_id =
> + context_update_x * tile_info->tile_rows + context_update_y;
> + u8 *dst = av1_dec->tile_info.cpu;
> + struct hantro_dev *vpu = ctx->dev;
> + int tile0, tile1;
> +
> + memset(dst, 0, av1_dec->tile_info.size);
> +
> + for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
> + for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
> + int tile_id = tile1 * tile_info->tile_cols + tile0;
> + u32 start, end;
> + u32 y0 =
> + tile_info->height_in_sbs_minus_1[tile1] + 1;
> + u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
> +
> + // tile size in SB units (width,height)
Please be consistent with comments style.
You used C-style before, so please do the same here.
> + *dst++ = x0;
> + *dst++ = 0;
> + *dst++ = 0;
> + *dst++ = 0;
> + *dst++ = y0;
> + *dst++ = 0;
> + *dst++ = 0;
> + *dst++ = 0;
> +
> + // tile start position
> + start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
> + *dst++ = start & 255;
> + *dst++ = (start >> 8) & 255;
> + *dst++ = (start >> 16) & 255;
> + *dst++ = (start >> 24) & 255;
> +
> + // # of bytes in tile data
s/#/number/g
> + end = start + group_entry[tile_id].tile_size;
> + *dst++ = end & 255;
> + *dst++ = (end >> 8) & 255;
> + *dst++ = (end >> 16) & 255;
> + *dst++ = (end >> 24) & 255;
> + }
> + }
> +
> + hantro_reg_write(vpu, &av1_multicore_expect_context_update,
> + !!(context_update_x == 0));
fits in one line of 96 columns
> + hantro_reg_write(vpu, &av1_tile_enable,
> + !!((tile_info->tile_cols > 1) || (tile_info->tile_rows > 1)));
> + hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info->tile_cols);
> + hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows);
> + hantro_reg_write(vpu, &av1_context_update_tile_id,
> + context_update_tile_id);
ditto
> + hantro_reg_write(vpu, &av1_tile_transpose, 1);
> + if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) ||
> + rockchip_vpu981_av1_tile_log2(tile_info->tile_rows))
> + hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1);
> + else
> + hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
> +
> + hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
> +}
> +
..snip..
> +static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + bool frame_is_intra = IS_INTRA(frame->frame_type);
> + struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
> + int i;
> +
> + if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
> + return;
> +
> + for (i = 0; i < NUM_REF_FRAMES; i++) {
> + if (frame->refresh_frame_flags & (1 << i)) {
You can use the BIT() macro here:
if (frame->refresh_frame_flags & BIT(i)) {
> + struct mvcdfs stored_mv_cdf;
> +
> + rockchip_av1_get_cdfs(ctx, i);
> + stored_mv_cdf = av1_dec->cdfs->mv_cdf;
> + *av1_dec->cdfs = *out_cdfs;
> + if (frame_is_intra) {
> + av1_dec->cdfs->mv_cdf = stored_mv_cdf;
> + *av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
> + }
> + rockchip_av1_store_cdfs(ctx,
> + frame->refresh_frame_flags);
> + break;
> + }
> + }
> +}
..snip..
> +
> +static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
> +{
> + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> + const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> + const struct v4l2_av1_cdef *cdef = &frame->cdef;
> + struct hantro_dev *vpu = ctx->dev;
> + u32 luma_pri_strength = 0;
> + u16 luma_sec_strength = 0;
> + u32 chroma_pri_strength = 0;
> + u16 chroma_sec_strength = 0;
> + int i;
> +
> + hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
> + hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
> +
> + for (i = 0; i < (1 << cdef->bits); i++) {
for (i = 0; i < BIT(cdef->bits); i++) {
> + luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
> + if (cdef->y_sec_strength[i] == 4)
> + luma_sec_strength |= 3 << (i * 2);
> + else
> + luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
> +
> + chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
> + if (cdef->uv_sec_strength[i] == 4)
> + chroma_sec_strength |= 3 << (i * 2);
> + else
> + chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
> + }
> +
> + hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
> + luma_pri_strength);
> + hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
> + luma_sec_strength);
> + hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
> + chroma_pri_strength);
> + hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
> + chroma_sec_strength);
> +
> + hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
> +}
> +
..snip..
> diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
> new file mode 100644
> index 000000000000..182e6c830ff6
> --- /dev/null
> +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
> @@ -0,0 +1,477 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2022, Collabora
> + *
> + * Author: Benjamin Gaignard <benjamin.gaignard at collabora.com>
> + */
> +
> +#ifndef _ROCKCHIP_VPU981_REGS_H_
> +#define _ROCKCHIP_VPU981_REGS_H_
> +
> +#include "hantro.h"
> +
> +#define AV1_SWREG(nr) ((nr) * 4)
> +
> +#define AV1_DEC_REG(b, s, m) \
> + ((const struct hantro_reg) { \
> + .base = AV1_SWREG(b), \
> + .shift = s, \
> + .mask = m, \
> + })
> +
> +#define AV1_REG_INTERRUPT AV1_SWREG(1)
> +#define AV1_REG_INTERRUPT_DEC_RDY_INT BIT(12)
> +
> +#define AV1_REG_CONFIG AV1_SWREG(2)
> +#define AV1_REG_CONFIG_DEC_CLK_GATE_E BIT(10)
> +
> +#define av1_dec_e AV1_DEC_REG(1, 0, 0x1)
I personally dislike definitions in lowercase, but it's like that across
the entire driver and we must keep consistency, so it's fine.
Regards,
Angelo
More information about the Linux-rockchip
mailing list