[PATCH v8 14/29] iommufd/viommu: Add IOMMUFD_CMD_HW_QUEUE_ALLOC ioctl

Nicolin Chen nicolinc at nvidia.com
Wed Jul 9 22:34:23 PDT 2025


On Wed, Jul 09, 2025 at 11:55:20AM -0700, Nicolin Chen wrote:
> On Wed, Jul 09, 2025 at 02:09:04PM -0300, Jason Gunthorpe wrote:
> > On Fri, Jul 04, 2025 at 06:13:30PM -0700, Nicolin Chen wrote:
> > > +static struct iommufd_access *
> > > +iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd,
> > > +			    struct iommufd_viommu *viommu, phys_addr_t *base_pa)
> > > +{
> > > +	struct iommufd_access *access;
> > > +	struct page **pages;
> > > +	size_t max_npages;
> > > +	size_t length;
> > > +	u64 offset;
> > > +	size_t i;
> > > +	int rc;
> > > +
> > > +	offset =
> > > +		cmd->nesting_parent_iova - PAGE_ALIGN(cmd->nesting_parent_iova);
> > 
> > PAGE_ALIGN is ALIGN UP, that is the wrong direction?
> > 
> > It is just:
> > 
> >       offset = cmd->nesting_parent_iova % PAGE_SIZE;
> > 
> > And this is missing:
> > 
> > 	*base_pa = (page_to_pfn(pages[0]) << PAGE_SHIFT) + offset;
> > 
> > ??
> 
> Yes, my bad. And I realized that all my testings are page aligned.
> Maybe we could add an IOMMU_TEST_OP_HW_QUEUE_CHECK_ADDR..

I found that this needed a bit more care. The pin_pages() API
accepts aligned iova and length inputs.

So, doing this instead (and adding a selftest coverage):

diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 00641204efb2..91339f799916 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -206,7 +206,11 @@ static void iommufd_hw_queue_destroy_access(struct iommufd_ctx *ictx,
 					    struct iommufd_access *access,
 					    u64 base_iova, size_t length)
 {
-	iommufd_access_unpin_pages(access, base_iova, length);
+	u64 aligned_iova = PAGE_ALIGN_DOWN(base_iova);
+	u64 offset = base_iova - aligned_iova;
+
+	iommufd_access_unpin_pages(access, aligned_iova,
+				   PAGE_ALIGN(length + offset));
 	iommufd_access_detach_internal(access);
 	iommufd_access_destroy_internal(ictx, access);
 }
@@ -239,22 +243,23 @@ static struct iommufd_access *
 iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd,
 			    struct iommufd_viommu *viommu, phys_addr_t *base_pa)
 {
+	u64 aligned_iova = PAGE_ALIGN_DOWN(cmd->nesting_parent_iova);
+	u64 offset = cmd->nesting_parent_iova - aligned_iova;
 	struct iommufd_access *access;
 	struct page **pages;
 	size_t max_npages;
 	size_t length;
-	u64 offset;
 	size_t i;
 	int rc;
 
-	offset =
-		cmd->nesting_parent_iova - PAGE_ALIGN(cmd->nesting_parent_iova);
-	/* DIV_ROUND_UP(offset + cmd->length, PAGE_SIZE) */
+	/* max_npages = DIV_ROUND_UP(offset + cmd->length, PAGE_SIZE) */
 	if (check_add_overflow(offset, cmd->length, &length))
 		return ERR_PTR(-ERANGE);
 	if (check_add_overflow(length, PAGE_SIZE - 1, &length))
 		return ERR_PTR(-ERANGE);
 	max_npages = length / PAGE_SIZE;
+	/* length needs to be page aligned too */
+	length = max_npages * PAGE_SIZE;
 
 	/*
 	 * Use kvcalloc() to avoid memory fragmentation for a large page array.
@@ -274,8 +279,7 @@ iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd,
 	if (rc)
 		goto out_destroy;
 
-	rc = iommufd_access_pin_pages(access, cmd->nesting_parent_iova,
-				      cmd->length, pages, 0);
+	rc = iommufd_access_pin_pages(access, aligned_iova, length, pages, 0);
 	if (rc)
 		goto out_detach;
 
@@ -287,13 +291,12 @@ iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd,
 		goto out_unpin;
 	}
 
-	*base_pa = page_to_pfn(pages[0]) << PAGE_SHIFT;
+	*base_pa = (page_to_pfn(pages[0]) << PAGE_SHIFT) + offset;
 	kfree(pages);
 	return access;
 
 out_unpin:
-	iommufd_access_unpin_pages(access, cmd->nesting_parent_iova,
-				   cmd->length);
+	iommufd_access_unpin_pages(access, aligned_iova, length);
 out_detach:
 	iommufd_access_detach_internal(access);
 out_destroy:
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 9d5b852d5e19..694b178f8ca4 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -3104,7 +3104,7 @@ TEST_F(iommufd_viommu, hw_queue)
 	/* Allocate index=0, declare ownership of the iova */
 	test_cmd_hw_queue_alloc(viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 0,
 				iova, PAGE_SIZE, &hw_queue_id[0]);
-	/* Fail duplicate */
+	/* Fail duplicated index */
 	test_err_hw_queue_alloc(EEXIST, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
 				0, iova, PAGE_SIZE, &hw_queue_id[0]);
 	/* Fail unmap, due to iova ownership */
@@ -3112,9 +3112,10 @@ TEST_F(iommufd_viommu, hw_queue)
 	/* The 2nd page is not pinned, so it can be unmmap */
 	test_ioctl_ioas_unmap(iova + PAGE_SIZE, PAGE_SIZE);
 
-	/* Allocate index=1 */
+	/* Allocate index=1, with an unaligned case */
 	test_cmd_hw_queue_alloc(viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 1,
-				iova, PAGE_SIZE, &hw_queue_id[1]);
+				iova + PAGE_SIZE / 2, PAGE_SIZE / 2,
+				&hw_queue_id[1]);
 	/* Fail to destroy, due to dependency */
 	EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hw_queue_id[0]));
 



More information about the linux-arm-kernel mailing list