[PATCH v6 06/25] iommu/io-pgtable-arm: Rework to use the iommu-pages API

Mostafa Saleh smostafa at google.com
Mon May 4 05:19:37 PDT 2026


On Fri, May 01, 2026 at 09:24:24AM -0300, Jason Gunthorpe wrote:
> On Fri, May 01, 2026 at 11:19:08AM +0000, Mostafa Saleh wrote:
> > To prepare for supporting io-pgtable-arm in the pKVM hypervisor,
> > we need to abstract away standard kernel allocations, frees, virt/phys
> > conversions, and DMA API mapping.
> > 
> > This patch introduces a set of generic wrappers in iommu-pages.h:
> > - iommu_alloc_data
> > - iommu_free_data
> > - iommu_virt_to_phys
> > - iommu_phys_to_virt
> > - iommu_pages_dma_map
> > - iommu_pages_dma_mapping_error
> > - iommu_pages_dma_unmap
> 
> Wah? This has nothing to do with iommu pages? This just leaking
> everything iommu pages abstracted out :(
> 
> When I said to use iommu-pages, I meant to use the existing API, not a
> completely different one.
> 
> From an iommu-pages perspective the issue is this code open codes
> dma_map_single()/etc instead of using the API surface
> iommu_pages_start_incoherent()
> 
> This is annoying to fix beacuse the external allocator messes it up,
> but I think with some #ifdef you can probably fix it up.
> 
> So.. I suggest you update it to use the iommu_pages API, #ifdef out
> the allocator so the pkvm pkvm doesn't need to deal with it. Then
> compile a special iommu-pages for the pkvm side presenting the same
> API.

I see, we still need to leave the DMA-API calls for the custom config,
as I am not sure if it can use pages not backed by the vmemmap, I
pushed that into a separate function so it’s easily compiled out.

Without this patch, now it looks like:

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 0208e5897c29..1583b9916b09 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -248,26 +248,15 @@ static dma_addr_t __arm_lpae_dma_addr(void *pages)
 	return (dma_addr_t)virt_to_phys(pages);
 }

-static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
-				    struct io_pgtable_cfg *cfg,
-				    void *cookie)
+static void *__arm_lpae_cfg_alloc(size_t size, gfp_t gfp,
+				  struct io_pgtable_cfg *cfg,
+				  void *cookie)
 {
 	struct device *dev = cfg->iommu_dev;
-	size_t alloc_size;
 	dma_addr_t dma;
 	void *pages;

-	/*
-	 * For very small starting-level translation tables the HW requires a
-	 * minimum alignment of at least 64 to cover all cases.
-	 */
-	alloc_size = max(size, 64);
-	if (cfg->alloc)
-		pages = cfg->alloc(cookie, alloc_size, gfp);
-	else
-		pages = iommu_alloc_pages_node_sz(dev_to_node(dev), gfp,
-						  alloc_size);
-
+	pages = cfg->alloc(cookie, size, gfp);
 	if (!pages)
 		return NULL;

@@ -289,26 +278,67 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
 out_unmap:
 	dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
 	dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
-
 out_free:
-	if (cfg->free)
-		cfg->free(cookie, pages, size);
-	else
-		iommu_free_pages(pages);
-
+	cfg->free(cookie, pages, size);
 	return NULL;
 }

-static void __arm_lpae_free_pages(void *pages, size_t size,
-				  struct io_pgtable_cfg *cfg,
-				  void *cookie)
+static void __arm_lpae_cfg_free(void *pages, size_t size,
+				struct io_pgtable_cfg *cfg,
+				void *cookie)
 {
 	if (!cfg->coherent_walk)
 		dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
 				 size, DMA_TO_DEVICE);

-	if (cfg->free)
-		cfg->free(cookie, pages, size);
+	cfg->free(cookie, pages, size);
+}
+
+static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
+				    struct io_pgtable_cfg *cfg,
+				    void *cookie)
+{
+	struct device *dev = cfg->iommu_dev;
+	size_t alloc_size;
+	void *pages;
+
+	/*
+	 * For very small starting-level translation tables the HW requires a
+	 * minimum alignment of at least 64 to cover all cases.
+	 */
+	alloc_size = max(size, 64);
+	if (cfg->alloc)
+		return __arm_lpae_cfg_alloc(alloc_size, gfp, cfg, cookie);
+
+	pages = iommu_alloc_pages_node_sz(dev_to_node(dev), gfp, alloc_size);
+	if (!pages)
+		return NULL;
+
+	if (!cfg->coherent_walk) {
+		int ret = iommu_pages_start_incoherent(pages, dev);
+
+		if (ret) {
+			if (ret == -EOPNOTSUPP)
+				dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
+			iommu_free_pages(pages);
+			return NULL;
+		}
+	}
+
+	return pages;
+}
+
+static void __arm_lpae_free_pages(void *pages, size_t size,
+				  struct io_pgtable_cfg *cfg,
+				  void *cookie)
+{
+	if (cfg->free) {
+		__arm_lpae_cfg_free(pages, size, cfg, cookie);
+		return;
+	}
+
+	if (!cfg->coherent_walk)
+		iommu_pages_free_incoherent(pages, cfg->iommu_dev);
 	else
 		iommu_free_pages(pages);
 }


Thanks,
Mostafa

> 
> You should have a pkvm shim header that provides
> kmalloc/kfree/virt_to_phys in the normal way and just #include that in
> io-pgtable when doing a pkvm build instead of hacking up all the code.

Ok, I can do that in another change, but I believe it's better to
change the usage in this file to arm_lpae_*(virt_to_phys...) so it's
clear which parts are intended for that.

Thanks,
Mostafa

> 
> Jason



More information about the linux-arm-kernel mailing list