[PATCH 1/2] media: rkvdec: reduce excessive stack usage in assemble_hw_pps()
Nicolas Dufresne
nicolas.dufresne at collabora.com
Mon Feb 2 05:42:41 PST 2026
Hi Arnd,
Le lundi 02 février 2026 à 10:47 +0100, Arnd Bergmann a écrit :
> From: Arnd Bergmann <arnd at arndb.de>
>
> The rkvdec_pps had a large set of bitfields, all of which
> as misaligned. This causes clang-21 and likely other versions to
> produce absolutely awful object code and a warning about very
> large stack usage, on targets without unaligned access:
>
> drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c:966:12: error: stack frame size (1472) exceeds limit (1280) in 'rkvdec_vp9_start' [-Werror,-Wframe-larger-than]
We had already addressed and validated that on clang-21, which indicates me that
we likely are missing an architecture (or a config) in our CI. Can you document
which architecture, configuration and flags was affected so we can add it on our
side ?
Our media pipeline before sending to Linus and the clang builds trace are in the
following link, in case it matters.
https://gitlab.freedesktop.org/linux-media/media-committers/-/pipelines/1588731
https://gitlab.freedesktop.org/linux-media/media-committers/-/jobs/91604655
>
> Part of the problem here is how all the bitfield accesses are
> inlined into a function that already has large structures on
> the stack.
Another observation is that you had to enable ASAN to make it miss-behave on for
loop unrolling (with complex bitfield writes). All I've obtained by visiting
the Link: is that its armv7-a architecture.
>
> Mark set_field_order_cnt() as noinline_for_stack, and split out
> the following accesses in assemble_hw_pps() into another noinline
> function, both of which now using around 800 bytes of stack in the
> same configuration.
>
> There is clearly still something wrong with clang here, but
> splitting it into multiple functions reduces the risk of stack
> overflow.
We've tried really hard to avoid this noninline_for_stack just because compilers
are buggy. I'll have a look again in case I find some ideas, but meanwhile, with
failing architecture in the commit message:
Reviewed-by: Nicolas Dufresne <nicolas.dufresne at collabora.com>
>
> Fixes: fde24907570d ("media: rkvdec: Add H264 support for the VDPU383 variant")
> Link: https://godbolt.org/z/acP1eKeq9
> Signed-off-by: Arnd Bergmann <arnd at arndb.de>
> ---
> .../rockchip/rkvdec/rkvdec-vdpu383-h264.c | 50 ++++++++++---------
> 1 file changed, 27 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c b/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c
> index 6ab3167addc8..ef69f2a36478 100644
> --- a/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c
> +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c
> @@ -130,7 +130,7 @@ struct rkvdec_h264_ctx {
> struct vdpu383_regs_h26x regs;
> };
>
> -static void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb)
> +static noinline_for_stack void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb)
> {
> pps->top_field_order_cnt0 = dpb[0].top_field_order_cnt;
> pps->bot_field_order_cnt0 = dpb[0].bottom_field_order_cnt;
> @@ -166,6 +166,31 @@ static void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_d
> pps->bot_field_order_cnt15 = dpb[15].bottom_field_order_cnt;
> }
>
> +static noinline_for_stack void set_dec_params(struct rkvdec_pps *pps, const struct v4l2_ctrl_h264_decode_params *dec_params)
> +{
> + const struct v4l2_h264_dpb_entry *dpb = dec_params->dpb;
> +
> + for (int i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) {
> + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
> + pps->is_longterm |= (1 << i);
> + pps->ref_field_flags |=
> + (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i;
> + pps->ref_colmv_use_flag |=
> + (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i;
> + pps->ref_topfield_used |=
> + (!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i;
> + pps->ref_botfield_used |=
> + (!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i;
> + }
> + pps->pic_field_flag =
> + !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC);
> + pps->pic_associated_flag =
> + !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD);
> +
> + pps->cur_top_field = dec_params->top_field_order_cnt;
> + pps->cur_bot_field = dec_params->bottom_field_order_cnt;
> +}
> +
> static void assemble_hw_pps(struct rkvdec_ctx *ctx,
> struct rkvdec_h264_run *run)
> {
> @@ -177,7 +202,6 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx,
> struct rkvdec_h264_priv_tbl *priv_tbl = h264_ctx->priv_tbl.cpu;
> struct rkvdec_sps_pps *hw_ps;
> u32 pic_width, pic_height;
> - u32 i;
>
> /*
> * HW read the SPS/PPS information from PPS packet index by PPS id.
> @@ -261,28 +285,8 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx,
> !!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT);
>
> set_field_order_cnt(&hw_ps->pps, dpb);
> + set_dec_params(&hw_ps->pps, dec_params);
>
> - for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) {
> - if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
> - hw_ps->pps.is_longterm |= (1 << i);
> -
> - hw_ps->pps.ref_field_flags |=
> - (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i;
> - hw_ps->pps.ref_colmv_use_flag |=
> - (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i;
> - hw_ps->pps.ref_topfield_used |=
> - (!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i;
> - hw_ps->pps.ref_botfield_used |=
> - (!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i;
> - }
> -
> - hw_ps->pps.pic_field_flag =
> - !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC);
> - hw_ps->pps.pic_associated_flag =
> - !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD);
> -
> - hw_ps->pps.cur_top_field = dec_params->top_field_order_cnt;
> - hw_ps->pps.cur_bot_field = dec_params->bottom_field_order_cnt;
> }
>
> static void rkvdec_write_regs(struct rkvdec_ctx *ctx)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 228 bytes
Desc: This is a digitally signed message part
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20260202/2492b855/attachment.sig>
More information about the linux-arm-kernel
mailing list