[PATCH 03/13] media: rockchip: rga: allocate DMA descriptors per buffer
Robin Murphy
robin.murphy at arm.com
Thu Sep 14 08:12:16 PDT 2023
On 2023-09-14 13:40, Michael Tretter wrote:
> The RGA driver allocates two buffers for the DMA descriptors of the
> input and output buffers. Whenever a new job is processed, the
> descriptor list is updated for the current buffers.
>
> By updating the descriptor list during buf_prepare, it is possible to
> correctly fail DMABUF imports if the buffers that shall be imported are
> not within the 32 bit address range that can be addressed by the RGA.
>
> Managing the DMA descriptor list with the buffer also makes it easier to
> track the buffer mapping and the plane offsets into this mapping.
>
> The cost is that the driver now requires DMA coherent memory per buffer
> for the descriptor list. However, the size scales with the size of the
> video buffers and is not allocated if the RGA is not used.
>
> While at it, use dma_alloc_coherent to allocate the descriptors and get
> rid of the virt_to_phys calls to get the physical addresses.
>
> Signed-off-by: Michael Tretter <m.tretter at pengutronix.de>
> ---
> drivers/media/platform/rockchip/rga/rga-buf.c | 78 +++++++++++++++++----------
> drivers/media/platform/rockchip/rga/rga-hw.c | 26 ++++-----
> drivers/media/platform/rockchip/rga/rga.c | 31 ++---------
> drivers/media/platform/rockchip/rga/rga.h | 21 ++++++--
> 4 files changed, 86 insertions(+), 70 deletions(-)
>
> diff --git a/drivers/media/platform/rockchip/rga/rga-buf.c b/drivers/media/platform/rockchip/rga/rga-buf.c
> index df5ebc90e32d..e8dcc0d5cb90 100644
> --- a/drivers/media/platform/rockchip/rga/rga-buf.c
> +++ b/drivers/media/platform/rockchip/rga/rga-buf.c
> @@ -55,16 +55,54 @@ rga_queue_setup(struct vb2_queue *vq,
> return 0;
> }
>
> +static int rga_buf_init(struct vb2_buffer *vb)
> +{
> + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> + struct rga_vb_buffer *rbuf = vb_to_rga(vbuf);
> + struct rga_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> + struct rockchip_rga *rga = ctx->rga;
> + struct rga_frame *f = rga_get_frame(ctx, vb->vb2_queue->type);
> + int n_desc = 0;
> +
> + n_desc = DIV_ROUND_UP(f->size, PAGE_SIZE);
> +
> + rbuf->n_desc = n_desc;
> + rbuf->dma_desc = dma_alloc_coherent(rga->dev,
> + rbuf->n_desc * sizeof(*rbuf->dma_desc),
> + &rbuf->dma_desc_pa, GFP_KERNEL);
> + if (!rbuf->dma_desc)
> + return -ENOMEM;
> +
> + return 0;
> +}
> +
> static int rga_buf_prepare(struct vb2_buffer *vb)
> {
> + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> + struct rga_vb_buffer *rbuf = vb_to_rga(vbuf);
> struct rga_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> struct rga_frame *f = rga_get_frame(ctx, vb->vb2_queue->type);
> + struct rockchip_rga *rga = ctx->rga;
> + int n_desc;
>
> if (IS_ERR(f))
> return PTR_ERR(f);
>
> vb2_set_plane_payload(vb, 0, f->size);
>
> + /* Create local MMU table for RGA */
> + n_desc = fill_descriptors(rbuf->dma_desc,
> + vb2_dma_sg_plane_desc(vb, 0));
> + if (n_desc < 0) {
> + dev_err(rga->dev, "Failed to map buffer");
> + return n_desc;
> + }
> +
> + /* sync local MMU table for RGA */
> + dma_sync_single_for_device(rga->dev, rbuf->dma_desc_pa,
> + n_desc * sizeof(*rbuf->dma_desc),
> + DMA_BIDIRECTIONAL);
This sync is still wrong (and DMA_API_DEBUG should complain), but also
now pointless if you're using a coherent buffer (as is the one in
rga_cmd_set(), seemingly)
Thanks,
Robin.
> +
> return 0;
> }
>
> @@ -76,6 +114,17 @@ static void rga_buf_queue(struct vb2_buffer *vb)
> v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
> }
>
> +static void rga_buf_cleanup(struct vb2_buffer *vb)
> +{
> + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> + struct rga_vb_buffer *rbuf = vb_to_rga(vbuf);
> + struct rga_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> + struct rockchip_rga *rga = ctx->rga;
> +
> + dma_free_coherent(rga->dev, rbuf->n_desc * sizeof(*rbuf->dma_desc),
> + rbuf->dma_desc, rbuf->dma_desc_pa);
> +}
> +
> static void rga_buf_return_buffers(struct vb2_queue *q,
> enum vb2_buffer_state state)
> {
> @@ -119,37 +168,12 @@ static void rga_buf_stop_streaming(struct vb2_queue *q)
>
> const struct vb2_ops rga_qops = {
> .queue_setup = rga_queue_setup,
> + .buf_init = rga_buf_init,
> .buf_prepare = rga_buf_prepare,
> .buf_queue = rga_buf_queue,
> + .buf_cleanup = rga_buf_cleanup,
> .wait_prepare = vb2_ops_wait_prepare,
> .wait_finish = vb2_ops_wait_finish,
> .start_streaming = rga_buf_start_streaming,
> .stop_streaming = rga_buf_stop_streaming,
> };
> -
> -/* RGA MMU is a 1-Level MMU, so it can't be used through the IOMMU API.
> - * We use it more like a scatter-gather list.
> - */
> -void rga_buf_map(struct vb2_buffer *vb)
> -{
> - struct rga_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> - struct rockchip_rga *rga = ctx->rga;
> - struct rga_dma_desc *pages;
> - unsigned int num_desc = 0;
> -
> - if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
> - pages = rga->src_mmu_pages;
> - else
> - pages = rga->dst_mmu_pages;
> -
> - /* Create local MMU table for RGA */
> - num_desc = fill_descriptors(pages, vb2_dma_sg_plane_desc(vb, 0));
> - if (num_desc < 0) {
> - dev_err(rga->dev, "Failed to map buffer");
> - return;
> - }
> -
> - /* sync local MMU table for RGA */
> - dma_sync_single_for_device(rga->dev, virt_to_phys(pages),
> - num_desc * sizeof(*pages), DMA_BIDIRECTIONAL);
> -}
> diff --git a/drivers/media/platform/rockchip/rga/rga-hw.c b/drivers/media/platform/rockchip/rga/rga-hw.c
> index aaa96f256356..b391d97d4632 100644
> --- a/drivers/media/platform/rockchip/rga/rga-hw.c
> +++ b/drivers/media/platform/rockchip/rga/rga-hw.c
> @@ -119,40 +119,40 @@ static struct rga_addr_offset *rga_lookup_draw_pos(struct
> return NULL;
> }
>
> -static void rga_cmd_set_src_addr(struct rga_ctx *ctx, void *mmu_pages)
> +static void rga_cmd_set_src_addr(struct rga_ctx *ctx, dma_addr_t dma_addr)
> {
> struct rockchip_rga *rga = ctx->rga;
> u32 *dest = rga->cmdbuf_virt;
> unsigned int reg;
>
> reg = RGA_MMU_SRC_BASE - RGA_MODE_BASE_REG;
> - dest[reg >> 2] = virt_to_phys(mmu_pages) >> 4;
> + dest[reg >> 2] = dma_addr >> 4;
>
> reg = RGA_MMU_CTRL1 - RGA_MODE_BASE_REG;
> dest[reg >> 2] |= 0x7;
> }
>
> -static void rga_cmd_set_src1_addr(struct rga_ctx *ctx, void *mmu_pages)
> +static void rga_cmd_set_src1_addr(struct rga_ctx *ctx, dma_addr_t dma_addr)
> {
> struct rockchip_rga *rga = ctx->rga;
> u32 *dest = rga->cmdbuf_virt;
> unsigned int reg;
>
> reg = RGA_MMU_SRC1_BASE - RGA_MODE_BASE_REG;
> - dest[reg >> 2] = virt_to_phys(mmu_pages) >> 4;
> + dest[reg >> 2] = dma_addr >> 4;
>
> reg = RGA_MMU_CTRL1 - RGA_MODE_BASE_REG;
> dest[reg >> 2] |= 0x7 << 4;
> }
>
> -static void rga_cmd_set_dst_addr(struct rga_ctx *ctx, void *mmu_pages)
> +static void rga_cmd_set_dst_addr(struct rga_ctx *ctx, dma_addr_t dma_addr)
> {
> struct rockchip_rga *rga = ctx->rga;
> u32 *dest = rga->cmdbuf_virt;
> unsigned int reg;
>
> reg = RGA_MMU_DST_BASE - RGA_MODE_BASE_REG;
> - dest[reg >> 2] = virt_to_phys(mmu_pages) >> 4;
> + dest[reg >> 2] = dma_addr >> 4;
>
> reg = RGA_MMU_CTRL1 - RGA_MODE_BASE_REG;
> dest[reg >> 2] |= 0x7 << 8;
> @@ -375,20 +375,21 @@ static void rga_cmd_set_mode(struct rga_ctx *ctx)
> dest[(RGA_MODE_CTRL - RGA_MODE_BASE_REG) >> 2] = mode.val;
> }
>
> -static void rga_cmd_set(struct rga_ctx *ctx)
> +static void rga_cmd_set(struct rga_ctx *ctx,
> + struct rga_vb_buffer *src, struct rga_vb_buffer *dst)
> {
> struct rockchip_rga *rga = ctx->rga;
>
> memset(rga->cmdbuf_virt, 0, RGA_CMDBUF_SIZE * 4);
>
> - rga_cmd_set_src_addr(ctx, rga->src_mmu_pages);
> + rga_cmd_set_src_addr(ctx, src->dma_desc_pa);
> /*
> * Due to hardware bug,
> * src1 mmu also should be configured when using alpha blending.
> */
> - rga_cmd_set_src1_addr(ctx, rga->dst_mmu_pages);
> + rga_cmd_set_src1_addr(ctx, dst->dma_desc_pa);
>
> - rga_cmd_set_dst_addr(ctx, rga->dst_mmu_pages);
> + rga_cmd_set_dst_addr(ctx, dst->dma_desc_pa);
> rga_cmd_set_mode(ctx);
>
> rga_cmd_set_trans_info(ctx);
> @@ -400,11 +401,12 @@ static void rga_cmd_set(struct rga_ctx *ctx)
> PAGE_SIZE, DMA_BIDIRECTIONAL);
> }
>
> -void rga_hw_start(struct rockchip_rga *rga)
> +void rga_hw_start(struct rockchip_rga *rga,
> + struct rga_vb_buffer *src, struct rga_vb_buffer *dst)
> {
> struct rga_ctx *ctx = rga->curr;
>
> - rga_cmd_set(ctx);
> + rga_cmd_set(ctx, src, dst);
>
> rga_write(rga, RGA_SYS_CTRL, 0x00);
>
> diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c
> index 25f5b5eebf13..f18fccc7b204 100644
> --- a/drivers/media/platform/rockchip/rga/rga.c
> +++ b/drivers/media/platform/rockchip/rga/rga.c
> @@ -45,10 +45,7 @@ static void device_run(void *prv)
> src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
> dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
>
> - rga_buf_map(&src->vb2_buf);
> - rga_buf_map(&dst->vb2_buf);
> -
> - rga_hw_start(rga);
> + rga_hw_start(rga, vb_to_rga(src), vb_to_rga(dst));
>
> spin_unlock_irqrestore(&rga->ctrl_lock, flags);
> }
> @@ -101,7 +98,7 @@ queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
> src_vq->drv_priv = ctx;
> src_vq->ops = &rga_qops;
> src_vq->mem_ops = &vb2_dma_sg_memops;
> - src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
> + src_vq->buf_struct_size = sizeof(struct rga_vb_buffer);
> src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> src_vq->lock = &ctx->rga->mutex;
> src_vq->dev = ctx->rga->v4l2_dev.dev;
> @@ -115,7 +112,7 @@ queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
> dst_vq->drv_priv = ctx;
> dst_vq->ops = &rga_qops;
> dst_vq->mem_ops = &vb2_dma_sg_memops;
> - dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
> + dst_vq->buf_struct_size = sizeof(struct rga_vb_buffer);
> dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> dst_vq->lock = &ctx->rga->mutex;
> dst_vq->dev = ctx->rga->v4l2_dev.dev;
> @@ -872,26 +869,13 @@ static int rga_probe(struct platform_device *pdev)
> goto rel_m2m;
> }
>
> - rga->src_mmu_pages =
> - (unsigned int *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 3);
> - if (!rga->src_mmu_pages) {
> - ret = -ENOMEM;
> - goto free_dma;
> - }
> - rga->dst_mmu_pages =
> - (unsigned int *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 3);
> - if (!rga->dst_mmu_pages) {
> - ret = -ENOMEM;
> - goto free_src_pages;
> - }
> -
> def_frame.stride = (def_frame.width * def_frame.fmt->depth) >> 3;
> def_frame.size = def_frame.stride * def_frame.height;
>
> ret = video_register_device(vfd, VFL_TYPE_VIDEO, -1);
> if (ret) {
> v4l2_err(&rga->v4l2_dev, "Failed to register video device\n");
> - goto free_dst_pages;
> + goto free_dma;
> }
>
> v4l2_info(&rga->v4l2_dev, "Registered %s as /dev/%s\n",
> @@ -899,10 +883,6 @@ static int rga_probe(struct platform_device *pdev)
>
> return 0;
>
> -free_dst_pages:
> - free_pages((unsigned long)rga->dst_mmu_pages, 3);
> -free_src_pages:
> - free_pages((unsigned long)rga->src_mmu_pages, 3);
> free_dma:
> dma_free_attrs(rga->dev, RGA_CMDBUF_SIZE, rga->cmdbuf_virt,
> rga->cmdbuf_phy, DMA_ATTR_WRITE_COMBINE);
> @@ -925,9 +905,6 @@ static void rga_remove(struct platform_device *pdev)
> dma_free_attrs(rga->dev, RGA_CMDBUF_SIZE, rga->cmdbuf_virt,
> rga->cmdbuf_phy, DMA_ATTR_WRITE_COMBINE);
>
> - free_pages((unsigned long)rga->src_mmu_pages, 3);
> - free_pages((unsigned long)rga->dst_mmu_pages, 3);
> -
> v4l2_info(&rga->v4l2_dev, "Removing\n");
>
> v4l2_m2m_release(rga->m2m_dev);
> diff --git a/drivers/media/platform/rockchip/rga/rga.h b/drivers/media/platform/rockchip/rga/rga.h
> index 22f7da28ac51..ae984d5a236d 100644
> --- a/drivers/media/platform/rockchip/rga/rga.h
> +++ b/drivers/media/platform/rockchip/rga/rga.h
> @@ -85,15 +85,27 @@ struct rockchip_rga {
> struct rga_ctx *curr;
> dma_addr_t cmdbuf_phy;
> void *cmdbuf_virt;
> - struct rga_dma_desc *src_mmu_pages;
> - struct rga_dma_desc *dst_mmu_pages;
> };
>
> +struct rga_vb_buffer {
> + struct vb2_v4l2_buffer vb_buf;
> + struct list_head queue;
> +
> + /* RGA MMU mapping for this buffer */
> + struct rga_dma_desc *dma_desc;
> + dma_addr_t dma_desc_pa;
> + int n_desc;
> +};
> +
> +static inline struct rga_vb_buffer *vb_to_rga(struct vb2_v4l2_buffer *vb)
> +{
> + return container_of(vb, struct rga_vb_buffer, vb_buf);
> +}
> +
> struct rga_frame *rga_get_frame(struct rga_ctx *ctx, enum v4l2_buf_type type);
>
> /* RGA Buffers Manage */
> extern const struct vb2_ops rga_qops;
> -void rga_buf_map(struct vb2_buffer *vb);
>
> /* RGA Hardware */
> static inline void rga_write(struct rockchip_rga *rga, u32 reg, u32 value)
> @@ -114,6 +126,7 @@ static inline void rga_mod(struct rockchip_rga *rga, u32 reg, u32 val, u32 mask)
> rga_write(rga, reg, temp);
> };
>
> -void rga_hw_start(struct rockchip_rga *rga);
> +void rga_hw_start(struct rockchip_rga *rga,
> + struct rga_vb_buffer *src, struct rga_vb_buffer *dst);
>
> #endif
>
More information about the linux-arm-kernel
mailing list