[PATCH 4/7] drm/apu: Add support of IOMMU
Robin Murphy
robin.murphy at arm.com
Thu May 18 06:24:53 PDT 2023
On 2023-05-17 15:52, Alexandre Bailon wrote:
> Some APU devices are behind an IOMMU.
> For some of these devices, we can't use DMA API because
> they use static addresses so we have to manually use
> IOMMU API to correctly map the buffers.
Except you still need to use the DMA for the sake of cache coherency and
any other aspects :(
> This adds support of IOMMU.
>
> Signed-off-by: Alexandre Bailon <abailon at baylibre.com>
> Reviewed-by: Julien Stephan <jstephan at baylibre.com>
> ---
> drivers/gpu/drm/apu/apu_drv.c | 4 +
> drivers/gpu/drm/apu/apu_gem.c | 174 +++++++++++++++++++++++++++++
> drivers/gpu/drm/apu/apu_internal.h | 16 +++
> drivers/gpu/drm/apu/apu_sched.c | 28 +++++
> include/uapi/drm/apu_drm.h | 12 +-
> 5 files changed, 233 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/apu/apu_drv.c b/drivers/gpu/drm/apu/apu_drv.c
> index b6bd340b2bc8..a0dce785a02a 100644
> --- a/drivers/gpu/drm/apu/apu_drv.c
> +++ b/drivers/gpu/drm/apu/apu_drv.c
> @@ -23,6 +23,10 @@ static const struct drm_ioctl_desc ioctls[] = {
> DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(APU_GEM_DEQUEUE, ioctl_gem_dequeue,
> DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(APU_GEM_IOMMU_MAP, ioctl_gem_iommu_map,
> + DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(APU_GEM_IOMMU_UNMAP, ioctl_gem_iommu_unmap,
> + DRM_RENDER_ALLOW),
> };
>
> DEFINE_DRM_GEM_DMA_FOPS(apu_drm_ops);
> diff --git a/drivers/gpu/drm/apu/apu_gem.c b/drivers/gpu/drm/apu/apu_gem.c
> index 0e7b3b27942c..0a91363754c5 100644
> --- a/drivers/gpu/drm/apu/apu_gem.c
> +++ b/drivers/gpu/drm/apu/apu_gem.c
> @@ -2,6 +2,9 @@
> //
> // Copyright 2020 BayLibre SAS
>
> +#include <linux/iommu.h>
> +#include <linux/iova.h>
> +
> #include <drm/drm_gem_dma_helper.h>
>
> #include <uapi/drm/apu_drm.h>
> @@ -42,6 +45,7 @@ int ioctl_gem_new(struct drm_device *dev, void *data,
> */
> apu_obj->size = args->size;
> apu_obj->offset = 0;
> + apu_obj->iommu_refcount = 0;
> mutex_init(&apu_obj->mutex);
>
> ret = drm_gem_handle_create(file_priv, gem_obj, &args->handle);
> @@ -54,3 +58,173 @@ int ioctl_gem_new(struct drm_device *dev, void *data,
>
> return 0;
> }
> +
> +void apu_bo_iommu_unmap(struct apu_drm *apu_drm, struct apu_gem_object *obj)
> +{
> + int iova_pfn;
> + int i;
> +
> + if (!obj->iommu_sgt)
> + return;
> +
> + mutex_lock(&obj->mutex);
> + obj->iommu_refcount--;
> + if (obj->iommu_refcount) {
> + mutex_unlock(&obj->mutex);
> + return;
> + }
> +
> + iova_pfn = PHYS_PFN(obj->iova);
Using mm layer operations on IOVAs looks wrong. In practice I don't
think it's ultimately harmful, other than potentially making less
efficient use of IOVA space if the CPU page size is larger than the
IOMMU page size, but it's still a bad code smell when you're using an
IOVA abstraction that is deliberately decoupled from CPU pages.
> + for (i = 0; i < obj->iommu_sgt->nents; i++) {
> + iommu_unmap(apu_drm->domain, PFN_PHYS(iova_pfn),
> + PAGE_ALIGN(obj->iommu_sgt->sgl[i].length));
> + iova_pfn += PHYS_PFN(PAGE_ALIGN(obj->iommu_sgt->sgl[i].length));
You can unmap a set of IOVA-contiguous mappings as a single range with
one call.
> + }
> +
> + sg_free_table(obj->iommu_sgt);
> + kfree(obj->iommu_sgt);
> +
> + free_iova(&apu_drm->iovad, PHYS_PFN(obj->iova));
> + mutex_unlock(&obj->mutex);
> +}
> +
> +static struct sg_table *apu_get_sg_table(struct drm_gem_object *obj)
> +{
> + if (obj->funcs)
> + return obj->funcs->get_sg_table(obj);
> + return NULL;
> +}
> +
> +int apu_bo_iommu_map(struct apu_drm *apu_drm, struct drm_gem_object *obj)
> +{
> + struct apu_gem_object *apu_obj = to_apu_bo(obj);
> + struct scatterlist *sgl;
> + phys_addr_t phys;
> + int total_buf_space;
> + int iova_pfn;
> + int iova;
> + int ret;
> + int i;
> +
> + mutex_lock(&apu_obj->mutex);
> + apu_obj->iommu_refcount++;
> + if (apu_obj->iommu_refcount != 1) {
> + mutex_unlock(&apu_obj->mutex);
> + return 0;
> + }
> +
> + apu_obj->iommu_sgt = apu_get_sg_table(obj);
> + if (IS_ERR(apu_obj->iommu_sgt)) {
> + mutex_unlock(&apu_obj->mutex);
> + return PTR_ERR(apu_obj->iommu_sgt);
> + }
> +
> + total_buf_space = obj->size;
> + iova_pfn = alloc_iova_fast(&apu_drm->iovad,
> + total_buf_space >> PAGE_SHIFT,
> + apu_drm->iova_limit_pfn, true);
If you need things mapped at specific addresses like the commit message
claims, the DMA IOVA allocator is a terrible tool for the job. DRM
already has its own more flexible abstraction for address space
management in the form of drm_mm, so as a DRM driver it would seem a lot
more sensible to use one of those.
And even if you could justify using this allocator, I can't imagine
there's any way you'd need the _fast version (further illustrated by the
fact that you're freeing the IOVAs wrongly for that).
> + apu_obj->iova = PFN_PHYS(iova_pfn);
> +
> + if (!iova_pfn) {
> + dev_err(apu_drm->dev, "Failed to allocate iova address\n");
> + mutex_unlock(&apu_obj->mutex);
> + return -ENOMEM;
> + }
> +
> + iova = apu_obj->iova;
> + sgl = apu_obj->iommu_sgt->sgl;
> + for (i = 0; i < apu_obj->iommu_sgt->nents; i++) {
> + phys = page_to_phys(sg_page(&sgl[i]));
> + ret =
> + iommu_map(apu_drm->domain, PFN_PHYS(iova_pfn), phys,
> + PAGE_ALIGN(sgl[i].length), IOMMU_READ | IOMMU_WRITE,
> + GFP_KERNEL);
> + if (ret) {
> + dev_err(apu_drm->dev, "Failed to iommu map\n");
> + free_iova(&apu_drm->iovad, iova_pfn);
> + mutex_unlock(&apu_obj->mutex);
> + return ret;
> + }
> + iova += sgl[i].offset + sgl[i].length;
> + iova_pfn += PHYS_PFN(PAGE_ALIGN(sgl[i].length));
This looks a lot like it should just be iommu_map_sg(). Also it makes me
suspicious of the relationship between obj->size and the sgtable - if
the size is already pre-calculated to include any required padding then
why can't the caller just provide aligned SG segments in the first
place? Conversely if it's the original un-padded size, then any padding
you *do* add at this point means you're going to overrun the allocated
IOVA space.
> + }
> + mutex_unlock(&apu_obj->mutex);
> +
> + return 0;
> +}
> +
> +int ioctl_gem_iommu_map(struct drm_device *dev, void *data,
> + struct drm_file *file_priv)
> +{
> + struct apu_drm *apu_drm = dev->dev_private;
> + struct drm_apu_gem_iommu_map *args = data;
> + struct drm_gem_object **bos;
> + void __user *bo_handles;
> + u64 *das;
> + int ret;
> + int i;
> +
> + if (!apu_drm->domain)
> + return -ENODEV;
> +
> + das = kvmalloc_array(args->bo_handle_count, sizeof(*das), GFP_KERNEL);
Does anything prevent userspace passing random numbers and being able to
cause arbitrarily large allocations of unaccounted kernel memory here?
> + if (!das)
> + return -ENOMEM;
> +
> + bo_handles = (void __user *)(uintptr_t) args->bo_handles;
> + ret = drm_gem_objects_lookup(file_priv, bo_handles,
> + args->bo_handle_count, &bos);
> + if (ret) {
> + kvfree(das);
> + return ret;
> + }
> +
> + for (i = 0; i < args->bo_handle_count; i++) {
> + ret = apu_bo_iommu_map(apu_drm, bos[i]);
> + if (ret) {
> + /* TODO: handle error */
Yes, that would be a good thing to do.
> + break;
> + }
> + das[i] = to_apu_bo(bos[i])->iova + to_apu_bo(bos[i])->offset;
> + }
> +
> + if (copy_to_user((void *)args->bo_device_addresses, das,
> + args->bo_handle_count * sizeof(u64))) {
> + ret = -EFAULT;
> + DRM_DEBUG("Failed to copy device addresses\n");
> + goto out;
> + }
> +
> +out:
> + kvfree(das);
> + kvfree(bos);
> +
> + return 0;
> +}
> +
> +int ioctl_gem_iommu_unmap(struct drm_device *dev, void *data,
> + struct drm_file *file_priv)
> +{
> + struct apu_drm *apu_drm = dev->dev_private;
> + struct drm_apu_gem_iommu_map *args = data;
> + struct drm_gem_object **bos;
> + void __user *bo_handles;
> + int ret;
> + int i;
> +
> + if (!apu_drm->domain)
> + return -ENODEV;
> +
> + bo_handles = (void __user *)(uintptr_t) args->bo_handles;
> + ret = drm_gem_objects_lookup(file_priv, bo_handles,
> + args->bo_handle_count, &bos);
> + if (ret)
> + return ret;
> +
> + for (i = 0; i < args->bo_handle_count; i++)
> + apu_bo_iommu_unmap(apu_drm, to_apu_bo(bos[i]));
> +
> + kvfree(bos);
> +
> + return 0;
> +}
> diff --git a/drivers/gpu/drm/apu/apu_internal.h b/drivers/gpu/drm/apu/apu_internal.h
> index 021a3efdedf2..ea4183f3fb15 100644
> --- a/drivers/gpu/drm/apu/apu_internal.h
> +++ b/drivers/gpu/drm/apu/apu_internal.h
> @@ -2,6 +2,9 @@
> #ifndef __APU_INTERNAL_H__
> #define __APU_INTERNAL_H__
>
> +#include <linux/iommu.h>
> +#include <linux/iova.h>
> +
> #include <drm/drm_drv.h>
> #include <drm/drm_gem_dma_helper.h>
> #include <drm/gpu_scheduler.h>
> @@ -9,7 +12,10 @@
> struct apu_gem_object {
> struct drm_gem_dma_object base;
> struct mutex mutex;
> + struct sg_table *iommu_sgt;
> + int iommu_refcount;
> size_t size;
> + u32 iova;
Really? "Common infrastructure that could be re-used to support many
accelerators", in 2023, that still assumes 32-bit addressing?
> u32 offset;
> };
>
> @@ -35,6 +41,10 @@ struct apu_drm {
> struct drm_device base;
> struct device *dev;
>
> + struct iommu_domain *domain;
Oh, nothing ever allocates this domain or attaches to it, so this is all
dead code :(
> + struct iova_domain iovad;
> + int iova_limit_pfn;
(and nothing initialises these either)
> +
> struct list_head cores;
> struct list_head node;
>
> @@ -165,12 +175,18 @@ struct apu_gem_object *to_apu_bo(struct drm_gem_object *obj);
> struct drm_gem_object *apu_gem_create_object(struct drm_device *dev,
> size_t size);
>
> +int apu_bo_iommu_map(struct apu_drm *apu_drm, struct drm_gem_object *obj);
> +void apu_bo_iommu_unmap(struct apu_drm *apu_drm, struct apu_gem_object *obj);
> int ioctl_gem_new(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> int ioctl_gem_user_new(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> struct dma_buf *apu_gem_prime_export(struct drm_gem_object *gem,
> int flags);
> +int ioctl_gem_iommu_map(struct drm_device *dev, void *data,
> + struct drm_file *file_priv);
> +int ioctl_gem_iommu_unmap(struct drm_device *dev, void *data,
> + struct drm_file *file_priv);
> int ioctl_gem_queue(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> int ioctl_gem_dequeue(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/apu/apu_sched.c b/drivers/gpu/drm/apu/apu_sched.c
> index 13b6fbd00bd8..716d4b7f2d55 100644
> --- a/drivers/gpu/drm/apu/apu_sched.c
> +++ b/drivers/gpu/drm/apu/apu_sched.c
> @@ -117,6 +117,8 @@ static void apu_job_cleanup(struct kref *ref)
> struct apu_gem_object *apu_obj;
>
> apu_obj = to_apu_bo(job->bos[i]);
> + if (job->apu->domain)
> + apu_bo_iommu_unmap(job->apu, apu_obj);
> drm_gem_object_put(job->bos[i]);
> }
>
> @@ -397,6 +399,7 @@ static int apu_lookup_bos(struct drm_device *dev, struct drm_file *file_priv,
> struct drm_apu_gem_queue *args, struct apu_job *job)
> {
> void __user *bo_handles;
> + unsigned int i;
> int ret;
>
> job->bo_count = args->bo_handle_count;
> @@ -413,6 +416,31 @@ static int apu_lookup_bos(struct drm_device *dev, struct drm_file *file_priv,
> bo_handles = (void __user *)(uintptr_t) args->bo_handles;
> ret = drm_gem_objects_lookup(file_priv, bo_handles,
> job->bo_count, &job->bos);
> + if (ret)
> + return ret;
> +
> + if (!job->apu->domain)
> + return 0;
> +
> + for (i = 0; i < job->bo_count; i++) {
> + ret = apu_bo_iommu_map(job->apu, job->bos[i]);
> + if (ret)
> + goto err_iommu_map;
> + }
> +
> + return ret;
> +
> +err_iommu_map:
> + kvfree(job->implicit_fences);
> + for (i = 0; i < job->bo_count; i++) {
> + struct apu_gem_object *apu_obj;
> +
> + apu_obj = to_apu_bo(job->bos[i]);
> + if (job->apu->domain)
If the domain *did* ever exist, but could suddenly disappear at any
point after you've decided to go ahead and start mapping things into it,
then there is a heck of a lot of sychronisation missing from this whole
infrastructure.
Thanks,
Robin.
> + apu_bo_iommu_unmap(job->apu, apu_obj);
> + drm_gem_object_put(job->bos[i]);
> + }
> + kvfree(job->bos);
>
> return ret;
> }
> diff --git a/include/uapi/drm/apu_drm.h b/include/uapi/drm/apu_drm.h
> index c47000097040..0ecc739d8aed 100644
> --- a/include/uapi/drm/apu_drm.h
> +++ b/include/uapi/drm/apu_drm.h
> @@ -41,6 +41,12 @@ struct drm_apu_gem_dequeue {
> __u64 data;
> };
>
> +struct drm_apu_gem_iommu_map {
> + __u64 bo_handles;
> + __u32 bo_handle_count;
> + __u64 bo_device_addresses;
> +};
> +
> struct apu_job_event {
> struct drm_event base;
> __u32 out_sync;
> @@ -57,12 +63,16 @@ struct drm_apu_state {
> #define DRM_APU_GEM_NEW 0x01
> #define DRM_APU_GEM_QUEUE 0x02
> #define DRM_APU_GEM_DEQUEUE 0x03
> -#define DRM_APU_NUM_IOCTLS 0x04
> +#define DRM_APU_GEM_IOMMU_MAP 0x04
> +#define DRM_APU_GEM_IOMMU_UNMAP 0x05
> +#define DRM_APU_NUM_IOCTLS 0x06
>
> #define DRM_IOCTL_APU_STATE DRM_IOWR(DRM_COMMAND_BASE + DRM_APU_STATE, struct drm_apu_state)
> #define DRM_IOCTL_APU_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_APU_GEM_NEW, struct drm_apu_gem_new)
> #define DRM_IOCTL_APU_GEM_QUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_APU_GEM_QUEUE, struct drm_apu_gem_queue)
> #define DRM_IOCTL_APU_GEM_DEQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_APU_GEM_DEQUEUE, struct drm_apu_gem_dequeue)
> +#define DRM_IOCTL_APU_GEM_IOMMU_MAP DRM_IOWR(DRM_COMMAND_BASE + DRM_APU_GEM_IOMMU_MAP, struct drm_apu_gem_iommu_map)
> +#define DRM_IOCTL_APU_GEM_IOMMU_UNMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_APU_GEM_IOMMU_UNMAP, struct drm_apu_gem_iommu_map)
>
> #if defined(__cplusplus)
> }
More information about the linux-arm-kernel
mailing list