[PATCH v2 2/2] media: imx: vdic: Introduce mem2mem VDI deinterlacer driver

Wed Sep 25 13:45:20 PDT 2024

On 9/25/24 7:58 PM, Nicolas Dufresne wrote:

Hi,

[...]

>> +static struct v4l2_pix_format *
>> +ipu_mem2mem_vdic_get_format(struct ipu_mem2mem_vdic_priv *priv,
>> +			    enum v4l2_buf_type type)
>> +{
>> +	return &priv->fmt[V4L2_TYPE_IS_OUTPUT(type) ? V4L2_M2M_SRC : V4L2_M2M_DST];
>> +}
> 
>  From here ...
> 
>> +
>> +static bool ipu_mem2mem_vdic_format_is_yuv420(const u32 pixelformat)
>> +{
>> +	/* All 4:2:0 subsampled formats supported by this hardware */
>> +	return pixelformat == V4L2_PIX_FMT_YUV420 ||
>> +	       pixelformat == V4L2_PIX_FMT_YVU420 ||
>> +	       pixelformat == V4L2_PIX_FMT_NV12;
>> +}
>> +
>> +static bool ipu_mem2mem_vdic_format_is_yuv422(const u32 pixelformat)
>> +{
>> +	/* All 4:2:2 subsampled formats supported by this hardware */
>> +	return pixelformat == V4L2_PIX_FMT_UYVY ||
>> +	       pixelformat == V4L2_PIX_FMT_YUYV ||
>> +	       pixelformat == V4L2_PIX_FMT_YUV422P ||
>> +	       pixelformat == V4L2_PIX_FMT_NV16;
>> +}
>> +
>> +static bool ipu_mem2mem_vdic_format_is_yuv(const u32 pixelformat)
>> +{
>> +	return ipu_mem2mem_vdic_format_is_yuv420(pixelformat) ||
>> +	       ipu_mem2mem_vdic_format_is_yuv422(pixelformat);
>> +}
>> +
>> +static bool ipu_mem2mem_vdic_format_is_rgb16(const u32 pixelformat)
>> +{
>> +	/* All 16-bit RGB formats supported by this hardware */
>> +	return pixelformat == V4L2_PIX_FMT_RGB565;
>> +}
>> +
>> +static bool ipu_mem2mem_vdic_format_is_rgb24(const u32 pixelformat)
>> +{
>> +	/* All 24-bit RGB formats supported by this hardware */
>> +	return pixelformat == V4L2_PIX_FMT_RGB24 ||
>> +	       pixelformat == V4L2_PIX_FMT_BGR24;
>> +}
>> +
>> +static bool ipu_mem2mem_vdic_format_is_rgb32(const u32 pixelformat)
>> +{
>> +	/* All 32-bit RGB formats supported by this hardware */
>> +	return pixelformat == V4L2_PIX_FMT_XRGB32 ||
>> +	       pixelformat == V4L2_PIX_FMT_XBGR32 ||
>> +	       pixelformat == V4L2_PIX_FMT_BGRX32 ||
>> +	       pixelformat == V4L2_PIX_FMT_RGBX32;
>> +}
> 
> To here, these days, all this information can be derived from v4l2_format_info
> in v4l2-common in a way you don't have to create a big barrier to adding more
> formats in the future.

I am not sure I quite understand this suggestion, what should I change here?

Note that the IPUv3 seems to be done, it does not seem like there will 
be new SoCs with this block, so the list of formats here is likely final.

[...]

>> +static irqreturn_t ipu_mem2mem_vdic_nfb4eof_interrupt(int irq, void *dev_id)
>> +{
>> +	struct ipu_mem2mem_vdic_priv *priv = dev_id;
>> +
>> +	/* That is about all we can do about it, report it. */
>> +	dev_warn_ratelimited(priv->dev, "NFB4EOF error interrupt occurred\n");
> 
> Not sure this is right. If that means ipu_mem2mem_vdic_eof_interrupt won't fire,
> then it means streamoff/close after that will hang forever, leaving a zombie
> process behind.
> 
> Perhaps mark the buffers as ERROR, and finish the job.

The NFB4EOF interrupt is generated when the VDIC didn't write (all of) 
output frame . I think it stands for "New Frame Before EOF" or some 
such. Basically the currently written frame will be corrupted and the 
next frame(s) are likely going to be OK again.

>> +
>> +	return IRQ_HANDLED;
>> +}
>> +
>> +static void ipu_mem2mem_vdic_device_run(void *_ctx)
>> +{
>> +	struct ipu_mem2mem_vdic_ctx *ctx = _ctx;
>> +	struct ipu_mem2mem_vdic_priv *priv = ctx->priv;
>> +	struct vb2_v4l2_buffer *curr_buf, *dst_buf;
>> +	dma_addr_t prev_phys, curr_phys, out_phys;
>> +	struct v4l2_pix_format *infmt;
>> +	u32 phys_offset = 0;
>> +	unsigned long flags;
>> +
>> +	infmt = ipu_mem2mem_vdic_get_format(priv, V4L2_BUF_TYPE_VIDEO_OUTPUT);
>> +	if (V4L2_FIELD_IS_SEQUENTIAL(infmt->field))
>> +		phys_offset = infmt->sizeimage / 2;
>> +	else if (V4L2_FIELD_IS_INTERLACED(infmt->field))
>> +		phys_offset = infmt->bytesperline;
>> +	else
>> +		dev_err(priv->dev, "Invalid field %d\n", infmt->field);
>> +
>> +	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
>> +	out_phys = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
>> +
>> +	curr_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
>> +	if (!curr_buf) {
>> +		dev_err(priv->dev, "Not enough buffers\n");
>> +		return;
> 
> Impossible branch, has been checked by __v4l2_m2m_try_queue().

Fixed in V3

>> +	}
>> +
>> +	spin_lock_irqsave(&priv->irqlock, flags);
>> +
>> +	if (ctx->curr_buf) {
>> +		ctx->prev_buf = ctx->curr_buf;
>> +		ctx->curr_buf = curr_buf;
>> +	} else {
>> +		ctx->prev_buf = curr_buf;
>> +		ctx->curr_buf = curr_buf;
>> +		dev_warn(priv->dev, "Single-buffer mode, fix your userspace\n");
>> +	}
> 
> The driver is not taking ownership of prev_buf, only curr_buf is guaranteed to
> exist until v4l2_m2m_job_finish() is called. Usespace could streamoff, allocate
> new buffers, and then an old freed buffer may endup being used.

So, what should I do about this ? Is there some way to ref the buffer to 
keep it around ?

> Its also unclear to me how userspace can avoid this ugly warning, how can you
> have curr_buf set the first time ? (I might be missing something you this one
> though).

The warning happens when streaming starts and there is only one input 
frame available for the VDIC, which needs three fields to work 
correctly. So, if there in only one input frame, VDI uses the input 
frame bottom field as PREV field for the prediction, and input frame top 
and bottom fields as CURR and NEXT fields for the prediction, the result 
may be one sub-optimal deinterlaced output frame (the first one). Once 
another input frame gets enqueued, the VDIC uses the previous frame 
bottom field as PREV and the newly enqueued frame top and bottom fields 
as CURR and NEXT and the prediction works correctly from that point on.

> Perhaps what you want is a custom job_ready() callback, that ensure you have 2
> buffers in the OUTPUT queue ? You also need to ajust the CID
> MIN_BUFFERS_FOR_OUTPUT accordingly.

I had that before, but gstreamer didn't enqueue the two frames for me, 
so I got back to this variant for maximum compatibility.

>> +	prev_phys = vb2_dma_contig_plane_dma_addr(&ctx->prev_buf->vb2_buf, 0);
>> +	curr_phys = vb2_dma_contig_plane_dma_addr(&ctx->curr_buf->vb2_buf, 0);
>> +
>> +	priv->curr_ctx = ctx;
>> +	spin_unlock_irqrestore(&priv->irqlock, flags);
>> +
>> +	ipu_cpmem_set_buffer(priv->vdi_out_ch,  0, out_phys);
>> +	ipu_cpmem_set_buffer(priv->vdi_in_ch_p, 0, prev_phys + phys_offset);
>> +	ipu_cpmem_set_buffer(priv->vdi_in_ch,   0, curr_phys);
>> +	ipu_cpmem_set_buffer(priv->vdi_in_ch_n, 0, curr_phys + phys_offset);
>> +
>> +	/* No double buffering, always pick buffer 0 */
>> +	ipu_idmac_select_buffer(priv->vdi_out_ch, 0);
>> +	ipu_idmac_select_buffer(priv->vdi_in_ch_p, 0);
>> +	ipu_idmac_select_buffer(priv->vdi_in_ch, 0);
>> +	ipu_idmac_select_buffer(priv->vdi_in_ch_n, 0);
>> +
>> +	/* Enable the channels */
>> +	ipu_idmac_enable_channel(priv->vdi_out_ch);
>> +	ipu_idmac_enable_channel(priv->vdi_in_ch_p);
>> +	ipu_idmac_enable_channel(priv->vdi_in_ch);
>> +	ipu_idmac_enable_channel(priv->vdi_in_ch_n);
>> +}

[...]

>> +static int ipu_mem2mem_vdic_try_fmt(struct file *file, void *fh,
>> +				    struct v4l2_format *f)
>> +{
>> +	const struct imx_media_pixfmt *cc;
>> +	enum imx_pixfmt_sel cs;
>> +	u32 fourcc;
>> +
>> +	if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) {	/* Output */
>> +		cs = PIXFMT_SEL_YUV_RGB;	/* YUV direct / RGB via IC */
>> +
>> +		f->fmt.pix.field = V4L2_FIELD_NONE;
>> +	} else {
>> +		cs = PIXFMT_SEL_YUV;		/* YUV input only */
>> +
>> +		/*
>> +		 * Input must be interlaced with frame order.
>> +		 * Fall back to SEQ_TB otherwise.
>> +		 */
>> +		if (!V4L2_FIELD_HAS_BOTH(f->fmt.pix.field) ||
>> +		    f->fmt.pix.field == V4L2_FIELD_INTERLACED)
>> +			f->fmt.pix.field = V4L2_FIELD_SEQ_TB;
>> +	}
>> +
>> +	fourcc = f->fmt.pix.pixelformat;
>> +	cc = imx_media_find_pixel_format(fourcc, cs);
>> +	if (!cc) {
>> +		imx_media_enum_pixel_formats(&fourcc, 0, cs, 0);
>> +		cc = imx_media_find_pixel_format(fourcc, cs);
>> +	}
>> +
>> +	f->fmt.pix.pixelformat = cc->fourcc;
>> +
>> +	v4l_bound_align_image(&f->fmt.pix.width,
>> +			      1, 968, 1,
>> +			      &f->fmt.pix.height,
>> +			      1, 1024, 1, 1);
> 
> Perhaps use defines for the magic numbers ?

Fixed in V3, thanks

>> +
>> +	if (ipu_mem2mem_vdic_format_is_yuv420(f->fmt.pix.pixelformat))
>> +		f->fmt.pix.bytesperline = f->fmt.pix.width * 3 / 2;
>> +	else if (ipu_mem2mem_vdic_format_is_yuv422(f->fmt.pix.pixelformat))
>> +		f->fmt.pix.bytesperline = f->fmt.pix.width * 2;
>> +	else if (ipu_mem2mem_vdic_format_is_rgb16(f->fmt.pix.pixelformat))
>> +		f->fmt.pix.bytesperline = f->fmt.pix.width * 2;
>> +	else if (ipu_mem2mem_vdic_format_is_rgb24(f->fmt.pix.pixelformat))
>> +		f->fmt.pix.bytesperline = f->fmt.pix.width * 3;
>> +	else if (ipu_mem2mem_vdic_format_is_rgb32(f->fmt.pix.pixelformat))
>> +		f->fmt.pix.bytesperline = f->fmt.pix.width * 4;
>> +	else
>> +		f->fmt.pix.bytesperline = f->fmt.pix.width;
>> +
>> +	f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline;
> 
> And use v4l2-common ?

I don't really understand, there is nothing in v4l2-common.c that would 
be really useful replacement for this ?

>> +	return 0;
>> +}
>> +
>> +static int ipu_mem2mem_vdic_s_fmt(struct file *file, void *fh, struct v4l2_format *f)
>> +{
>> +	struct ipu_mem2mem_vdic_ctx *ctx = fh_to_ctx(fh);
>> +	struct ipu_mem2mem_vdic_priv *priv = ctx->priv;
>> +	struct v4l2_pix_format *fmt, *infmt, *outfmt;
>> +	struct vb2_queue *vq;
>> +	int ret;
>> +
>> +	vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
>> +	if (vb2_is_busy(vq)) {
>> +		dev_err(priv->dev, "%s queue busy\n",  __func__);
>> +		return -EBUSY;
>> +	}
>> +
>> +	ret = ipu_mem2mem_vdic_try_fmt(file, fh, f);
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	fmt = ipu_mem2mem_vdic_get_format(priv, f->type);
>> +	*fmt = f->fmt.pix;
>> +
>> +	/* Propagate colorimetry to the capture queue */
>> +	infmt = ipu_mem2mem_vdic_get_format(priv, V4L2_BUF_TYPE_VIDEO_OUTPUT);
>> +	outfmt = ipu_mem2mem_vdic_get_format(priv, V4L2_BUF_TYPE_VIDEO_CAPTURE);
>> +	outfmt->colorspace = infmt->colorspace;
>> +	outfmt->ycbcr_enc = infmt->ycbcr_enc;
>> +	outfmt->xfer_func = infmt->xfer_func;
>> +	outfmt->quantization = infmt->quantization;
> 
> So you can do CSC conversion but not colorimetry ? We have
> V4L2_PIX_FMT_FLAG_SET_CSC if you can do colorimetry transforms too. I have
> patches that I'll send for the csc-scaler driver.

See ipu_ic_calc_csc() , that's what does the colorspace conversion in 
this driver (on output from VDI).

[...]