[PATCH 22/22] drm: rockchip: Add VOP2 driver

Sascha Hauer s.hauer at pengutronix.de
Mon Jan 3 06:55:03 PST 2022


Hi Nicolas,

Thanks for the review. I have changed the points I don't comment on here
according to your suggestions.

> > +#include <linux/regmap.h>
> > +#include <linux/mfd/syscon.h>
> > +#include <linux/delay.h>
> > +#include <linux/swab.h>
> > +#include <linux/bitfield.h>
> > +#include <drm/drm_debugfs.h>
> > +#include <uapi/linux/videodev2.h>
> > +#include <drm/drm_vblank.h>
> > +#include <dt-bindings/soc/rockchip,vop2.h>
> 
> Alphabetically sort these includes? Not sure on what the
> convention is in the DRM subsystem.

Me neither. The first random file I looked at in drivers/gpu/drm/ has
the includes sorted alphabetically, so I'll do the same.

> 
> > +static void vop2_cfg_done(struct vop2_video_port *vp)
> > +{
> > +	struct vop2 *vop2 = vp->vop2;
> > +	uint32_t val;
> > +
> > +	val = vop2_readl(vop2, RK3568_REG_CFG_DONE);
> > +
> > +	val &= 0x7;
> 
> GENMASK(2, 0) instead of 0x7, should that be a constant somewhere?
> 
> > +
> > +	vop2_writel(vop2, RK3568_REG_CFG_DONE,
> > +		    val | BIT(vp->id) | RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN);

Replaced that with the following which doesn't need a mask:

	regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE,
				BIT(vp->id) | RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN);


> > +}
> > +
> > +static void vop2_win_disable(struct vop2_win *win)
> > +{
> > +	vop2_win_write(win, VOP2_WIN_ENABLE, 0);
> > +
> > +	if (vop2_cluster_window(win))
> > +		vop2_win_write(win, VOP2_WIN_CLUSTER_ENABLE, 0);
> > +}
> > +
> > +static uint32_t vop2_afbc_transform_offset(struct drm_plane_state *pstate,
> > +					   bool afbc_half_block_en)
> > +{
> > +	struct drm_rect *src = &pstate->src;
> > +	struct drm_framebuffer *fb = pstate->fb;
> > +	uint32_t bpp = fb->format->cpp[0] * 8;
> > +	uint32_t vir_width = (fb->pitches[0] << 3) / bpp;
> > +	uint32_t width = drm_rect_width(src) >> 16;
> > +	uint32_t height = drm_rect_height(src) >> 16;
> > +	uint32_t act_xoffset = src->x1 >> 16;
> > +	uint32_t act_yoffset = src->y1 >> 16;
> > +	uint32_t align16_crop = 0;
> > +	uint32_t align64_crop = 0;
> > +	uint32_t height_tmp = 0;
> > +	uint32_t transform_tmp = 0;
> > +	uint8_t transform_xoffset = 0;
> > +	uint8_t transform_yoffset = 0;
> > +	uint8_t top_crop = 0;
> > +	uint8_t top_crop_line_num = 0;
> > +	uint8_t bottom_crop_line_num = 0;
> > +
> > +	/* 16 pixel align */
> > +	if (height & 0xf)
> > +		align16_crop = 16 - (height & 0xf);
> > +
> > +	height_tmp = height + align16_crop;
> > +
> > +	/* 64 pixel align */
> > +	if (height_tmp & 0x3f)
> > +		align64_crop = 64 - (height_tmp & 0x3f);
> > +
> > +	top_crop_line_num = top_crop << 2;
> > +	if (top_crop == 0)
> > +		bottom_crop_line_num = align16_crop + align64_crop;
> > +	else if (top_crop == 1)
> > +		bottom_crop_line_num = align16_crop + align64_crop + 12;
> > +	else if (top_crop == 2)
> > +		bottom_crop_line_num = align16_crop + align64_crop + 8;
> 
> top_crop == 0 is always taken from what I can gather, rest is
> dead code. Is this intentional? It doesn't seem intentional.

It's the same in the downstream driver. I don't know what top_crop is
good for. I just removed the dead code for now.

> > +		transform_xoffset = transform_tmp & 0xf;
> > +		transform_tmp = vir_width - width - act_xoffset;
> > +		transform_yoffset = transform_tmp & 0xf;
> > +		break;
> > +	case 0:
> > +		transform_tmp = act_xoffset;
> > +		transform_xoffset = transform_tmp & 0xf;
> > +		transform_tmp = top_crop_line_num + act_yoffset;
> > +
> > +		if (afbc_half_block_en)
> > +			transform_yoffset = transform_tmp & 0x7;
> > +		else
> > +			transform_yoffset = transform_tmp & 0xf;
> > +
> > +		break;
> > +	}
> > +
> > +	return (transform_xoffset & 0xf) | ((transform_yoffset & 0xf) << 16);
> > +}
> 
> 0xf, 0x3f, 0x7 might be more clear as GENMASK.
> if (afbc_half_block_en) branches can be moved out of cases and
> assign a transform mask variable instead.

Sure. Other than that the function can be simplified a bit more.

> 
> > +
> > +/*
> > + * A Cluster window has 2048 x 16 line buffer, which can
> > + * works at 2048 x 16(Full) or 4096 x 8 (Half) mode.
> > + * for Cluster_lb_mode register:
> > + * 0: half mode, for plane input width range 2048 ~ 4096
> > + * 1: half mode, for cluster work at 2 * 2048 plane mode
> > + * 2: half mode, for rotate_90/270 mode
> > + *
> > + */
> > +static int vop2_get_cluster_lb_mode(struct vop2_win *win, struct drm_plane_state *pstate)
> > +{
> > +	if ((pstate->rotation & DRM_MODE_ROTATE_270) || (pstate->rotation & DRM_MODE_ROTATE_90))
> > +		return 2;
> > +	else
> > +		return 0;
> > +}
> 
> What about 1?

1 is used in the downstream driver for cluster sub windows. These are
currently not supported, I don't know yet where this is leading to.

> 
> > +
> > +/*
> > + * bli_sd_factor = (src - 1) / (dst - 1) << 12;
> > + * avg_sd_factor:
> > + * bli_su_factor:
> > + * bic_su_factor:
> > + * = (src - 1) / (dst - 1) << 16;
> > + *
> > + * gt2 enable: dst get one line from two line of the src
> > + * gt4 enable: dst get one line from four line of the src.
> > + *
> > + */
> > +static uint16_t vop2_scale_factor(enum scale_mode mode,
> > +				  int32_t filter_mode,
> > +				  uint32_t src, uint32_t dst)
> > +{
> > +	uint32_t fac;
> > +	int i;
> > +
> > +	if (mode == SCALE_NONE)
> > +		return 0;
> > +
> > +	/*
> > +	 * A workaround to avoid zero div.
> > +	 */
> > +	if (dst == 1 || src == 1) {
> > +		dst++;
> > +		src++;
> > +	}
> > +
> > +	if ((mode == SCALE_DOWN) && (filter_mode == VOP2_SCALE_DOWN_BIL)) {
> > +		fac = ((src - 1) << 12) / (dst - 1);
> > +		for (i = 0; i < 100; i++) {
> > +			if (fac * (dst - 1) >> 12 < (src - 1))
> > +				break;
> > +			fac -= 1;
> > +			DRM_DEBUG("down fac cali: src:%d, dst:%d, fac:0x%x\n", src, dst, fac);
> > +		}
> > +	} else {
> > +		fac = ((src - 1) << 16) / (dst - 1);
> > +		for (i = 0; i < 100; i++) {
> > +			if (fac * (dst - 1) >> 16 < (src - 1))
> > +				break;
> > +			fac -= 1;
> > +			DRM_DEBUG("up fac cali:  src:%d, dst:%d, fac:0x%x\n", src, dst, fac);
> > +		}
> > +	}
> > +
> > +	return fac;
> > +}
> 
> src = 0 or dst = 0 causes an uint underflow here.

Right. Looking at it these loops are rather unnecessary and duplicating
the code also. The whole function goes down to:

static uint16_t vop2_scale_factor(uint32_t src, uint32_t dst)
{
	uint32_t fac;
	int shift;

	if (src == dst)
		return 0;

	if (dst < 2)
		return U16_MAX;

	if (src < 2)
		return 0;

	if (src > dst)
		shift = 12;
	else
		shift = 16;

	src--;
	dst--;

	fac = DIV_ROUND_UP(src << shift, dst) - 1;

	if (fac > U16_MAX)
		return U16_MAX;

	return fac;
}


> > +static void vop2_enable(struct vop2 *vop2)
> > +{
> > +	int ret;
> > +	uint32_t v;
> > +
> > +	ret = pm_runtime_get_sync(vop2->dev);
> > +	if (ret < 0) {
> > +		drm_err(vop2->drm, "failed to get pm runtime: %d\n", ret);
> > +		return;
> > +	}
> > +
> > +	ret = vop2_core_clks_prepare_enable(vop2);
> > +	if (ret) {
> > +		pm_runtime_put_sync(vop2->dev);
> > +		return;
> > +	}
> > +
> > +	if (vop2->data->soc_id == 3566)
> > +		vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
> > +
> > +	vop2_writel(vop2, RK3568_REG_CFG_DONE, RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN);
> > +
> > +	/*
> > +	 * Disable auto gating, this is a workaround to
> > +	 * avoid display image shift when a window enabled.
> > +	 */
> > +	v = vop2_readl(vop2, RK3568_SYS_AUTO_GATING_CTRL);
> > +	v &= ~RK3568_SYS_AUTO_GATING_CTRL__AUTO_GATING_EN;
> > +	vop2_writel(vop2, RK3568_SYS_AUTO_GATING_CTRL, v);
> > +
> > +	vop2_writel(vop2, RK3568_SYS0_INT_CLR,
> > +		    VOP2_INT_BUS_ERRPR << 16 | VOP2_INT_BUS_ERRPR);
> > +	vop2_writel(vop2, RK3568_SYS0_INT_EN,
> > +		    VOP2_INT_BUS_ERRPR << 16 | VOP2_INT_BUS_ERRPR);
> > +	vop2_writel(vop2, RK3568_SYS1_INT_CLR,
> > +		    VOP2_INT_BUS_ERRPR << 16 | VOP2_INT_BUS_ERRPR);
> > +	vop2_writel(vop2, RK3568_SYS1_INT_EN,
> > +		    VOP2_INT_BUS_ERRPR << 16 | VOP2_INT_BUS_ERRPR);
> > +}
> 
> Does reg writes but doesn't have the spinlock.

I didn't care about it at all when working on this driver, so we can be
sure it's at the wrong places right now.

I think we can just drop the register spinlock. The driver uses regmap
now, so read-modify-write accesses are covered by the regmap internal
locking as long as we use regmap_update_bits(). The remaining accesses
are either orthogonal anyway or are covered by the vop2_lock mutex.

> > +	vop2_lock(vop2);
> > +
> > +	ret = clk_prepare_enable(vp->dclk);
> > +	if (ret < 0) {
> > +		drm_err(vop2->drm, "failed to enable dclk for video port%d - %d\n",
> > +			      vp->id, ret);
> > +		return;
> 
> vop2_lock is never unlocked in this branch

I'll move the call to vop2_lock() below the clk_prepare_enable().

> > +	}
> > +
> > +	pm_runtime_put(vop2->dev);
> > +
> > +	return ret;
> > +}
> 
> Does reg writes, does the ISR need the reg spinlock here? I'm not
> sure.
> 
> In general there appear to be a lot of issues with the register
> lock, so either I'm misunderstanding what it's supposed to protect
> or the code is very thread unsafe.

Yeah, the original driver had the spinlock and as said I kept it but
didn't maintain it. Let's remove it for now.

Sascha

-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |



More information about the linux-arm-kernel mailing list