[PATCH v2] iommu/rockchip: Drop global rk_ops in favor of per-device ops
Jonas Karlman
jonas at kwiboo.se
Wed Apr 1 01:25:58 PDT 2026
Hi,
On 4/1/2026 9:59 AM, Simon Xue wrote:
> Hi all,
>
> A gentle ping on this patch.
>
> 在 2026/3/13 17:32, Shawn Lin 写道:
>> 在 2026/03/10 星期二 18:53, Simon Xue 写道:
>>> The driver currently uses a global rk_ops pointer, forcing all IOMMU
>>> instances to share the same operations. This restricts the driver from
>>> supporting SoCs that might integrate different versions of IOMMU
>>> hardware.
>>>
>>> Since the IOMMU framework passes the master device information to
>>> iommu_paging_domain_alloc(), the global variable is no longer needed.
>>>
>>> Fix this by moving rk_ops into struct rk_iommu and struct
>>> rk_iommu_domain.
>>> Initialize it per-device during probe via of_device_get_match_data(),
>>> and replace all global references with the instance-specific pointers.
>>>
>>
>> Thanks for the patch, Simon. I've tested it on the RK3576 EVB1 with
>> PCIe1 + IOMMU. NVMe works fine on it, and I also verified the IOVA
>> allocated in the NVMe driver, they look correct as I manually limited
>> the memblock to under 2GB, so here it is:
>>
>> nvme 0001:21:00.0: cq_dma_addr: 0x00000000f7fc7000
>>
>> Tested-by: Shawn Lin <shawn.lin at rock-chips.com>
>> Reviewed-by: Shawn Lin <shawn.lin at rock-chips.com>
>>
>>> Signed-off-by: Simon Xue <xxm at rock-chips.com>
>>> ---
>>> v2:
>>> - Remove the one-time-used 'ops' variable in rk_iommu_probe()
>>>
>>> drivers/iommu/rockchip-iommu.c | 71 ++++++++++++++++------------------
>>> 1 file changed, 33 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/drivers/iommu/rockchip-iommu.c
>>> b/drivers/iommu/rockchip-iommu.c
>>> index 0013cf196c57..4da80136933c 100644
>>> --- a/drivers/iommu/rockchip-iommu.c
>>> +++ b/drivers/iommu/rockchip-iommu.c
>>> @@ -82,6 +82,14 @@
>>> */
>>> #define RK_IOMMU_PGSIZE_BITMAP 0x007ff000
>>> +struct rk_iommu_ops {
>>> + phys_addr_t (*pt_address)(u32 dte);
>>> + u32 (*mk_dtentries)(dma_addr_t pt_dma);
>>> + u32 (*mk_ptentries)(phys_addr_t page, int prot);
>>> + u64 dma_bit_mask;
>>> + gfp_t gfp_flags;
>>> +};
>>> +
>>> struct rk_iommu_domain {
>>> struct list_head iommus;
>>> u32 *dt; /* page directory table */
>>> @@ -89,6 +97,7 @@ struct rk_iommu_domain {
>>> spinlock_t iommus_lock; /* lock for iommus list */
>>> spinlock_t dt_lock; /* lock for modifying page directory table */
>>> struct device *dma_dev;
>>> + const struct rk_iommu_ops *rk_ops;
>>> struct iommu_domain domain;
>>> };
>>> @@ -98,14 +107,6 @@ static const char * const rk_iommu_clocks[] = {
>>> "aclk", "iface",
>>> };
>>> -struct rk_iommu_ops {
>>> - phys_addr_t (*pt_address)(u32 dte);
>>> - u32 (*mk_dtentries)(dma_addr_t pt_dma);
>>> - u32 (*mk_ptentries)(phys_addr_t page, int prot);
>>> - u64 dma_bit_mask;
>>> - gfp_t gfp_flags;
>>> -};
>>> -
>>> struct rk_iommu {
>>> struct device *dev;
>>> void __iomem **bases;
>>> @@ -117,6 +118,7 @@ struct rk_iommu {
>>> struct iommu_device iommu;
>>> struct list_head node; /* entry in rk_iommu_domain.iommus */
>>> struct iommu_domain *domain; /* domain to which iommu is
>>> attached */
>>> + const struct rk_iommu_ops *rk_ops;
Do we really need the rk_ops on both the rk_iommu and rk_iommu_domain?
>>> };
>>> struct rk_iommudata {
>>> @@ -124,7 +126,6 @@ struct rk_iommudata {
>>> struct rk_iommu *iommu;
>>> };
>>> -static const struct rk_iommu_ops *rk_ops;
>>> static struct iommu_domain rk_identity_domain;
>>> static inline void rk_table_flush(struct rk_iommu_domain *dom,
>>> dma_addr_t dma,
>>> @@ -510,7 +511,7 @@ static int rk_iommu_force_reset(struct rk_iommu
>>> *iommu)
>>> * and verifying that upper 5 (v1) or 7 (v2) nybbles are read
>>> back.
>>> */
>>> for (i = 0; i < iommu->num_mmu; i++) {
>>> - dte_addr = rk_ops->pt_address(DTE_ADDR_DUMMY);
>>> + dte_addr = iommu->rk_ops->pt_address(DTE_ADDR_DUMMY);
Maybe this patch it trying to do too much at once that makes it harder
to review? To simplify review maybe one patch just drops the global
rk_ops and a assigns a local version based on domain/iommu, and a second
patch changes to use the iommu->rk_ops directly? Just a thought.
>>> rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr);
>>> if (dte_addr != rk_iommu_read(iommu->bases[i],
>>> RK_MMU_DTE_ADDR)) {
>>> @@ -551,7 +552,7 @@ static void log_iova(struct rk_iommu *iommu, int
>>> index, dma_addr_t iova)
>>> page_offset = rk_iova_page_offset(iova);
>>> mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR);
>>> - mmu_dte_addr_phys = rk_ops->pt_address(mmu_dte_addr);
>>> + mmu_dte_addr_phys = iommu->rk_ops->pt_address(mmu_dte_addr);
>>> dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index);
>>> dte_addr = phys_to_virt(dte_addr_phys);
>>> @@ -560,14 +561,14 @@ static void log_iova(struct rk_iommu *iommu,
>>> int index, dma_addr_t iova)
>>> if (!rk_dte_is_pt_valid(dte))
>>> goto print_it;
>>> - pte_addr_phys = rk_ops->pt_address(dte) + (pte_index * 4);
>>> + pte_addr_phys = iommu->rk_ops->pt_address(dte) + (pte_index * 4);
>>> pte_addr = phys_to_virt(pte_addr_phys);
>>> pte = *pte_addr;
>>> if (!rk_pte_is_page_valid(pte))
>>> goto print_it;
>>> - page_addr_phys = rk_ops->pt_address(pte) + page_offset;
>>> + page_addr_phys = iommu->rk_ops->pt_address(pte) + page_offset;
>>> page_flags = pte & RK_PTE_PAGE_FLAGS_MASK;
>>> print_it:
>>> @@ -663,13 +664,13 @@ static phys_addr_t rk_iommu_iova_to_phys(struct
>>> iommu_domain *domain,
>>> if (!rk_dte_is_pt_valid(dte))
>>> goto out;
>>> - pt_phys = rk_ops->pt_address(dte);
>>> + pt_phys = rk_domain->rk_ops->pt_address(dte);
>>> page_table = (u32 *)phys_to_virt(pt_phys);
>>> pte = page_table[rk_iova_pte_index(iova)];
>>> if (!rk_pte_is_page_valid(pte))
>>> goto out;
>>> - phys = rk_ops->pt_address(pte) + rk_iova_page_offset(iova);
>>> + phys = rk_domain->rk_ops->pt_address(pte) +
>>> rk_iova_page_offset(iova);
>>> out:
>>> spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
>>> @@ -730,7 +731,7 @@ static u32 *rk_dte_get_page_table(struct
>>> rk_iommu_domain *rk_domain,
>>> if (rk_dte_is_pt_valid(dte))
>>> goto done;
>>> - page_table = iommu_alloc_pages_sz(GFP_ATOMIC | rk_ops->gfp_flags,
>>> + page_table = iommu_alloc_pages_sz(GFP_ATOMIC |
>>> rk_domain->rk_ops->gfp_flags,
>>> SPAGE_SIZE);
>>> if (!page_table)
>>> return ERR_PTR(-ENOMEM);
>>> @@ -742,13 +743,13 @@ static u32 *rk_dte_get_page_table(struct
>>> rk_iommu_domain *rk_domain,
>>> return ERR_PTR(-ENOMEM);
>>> }
>>> - dte = rk_ops->mk_dtentries(pt_dma);
>>> + dte = rk_domain->rk_ops->mk_dtentries(pt_dma);
>>> *dte_addr = dte;
>>> rk_table_flush(rk_domain,
>>> rk_domain->dt_dma + dte_index * sizeof(u32), 1);
>>> done:
>>> - pt_phys = rk_ops->pt_address(dte);
>>> + pt_phys = rk_domain->rk_ops->pt_address(dte);
>>> return (u32 *)phys_to_virt(pt_phys);
>>> }
>>> @@ -790,7 +791,7 @@ static int rk_iommu_map_iova(struct
>>> rk_iommu_domain *rk_domain, u32 *pte_addr,
>>> if (rk_pte_is_page_valid(pte))
>>> goto unwind;
>>> - pte_addr[pte_count] = rk_ops->mk_ptentries(paddr, prot);
>>> + pte_addr[pte_count] = rk_domain->rk_ops->mk_ptentries(paddr,
>>> prot);
>>> paddr += SPAGE_SIZE;
>>> }
>>> @@ -812,7 +813,7 @@ static int rk_iommu_map_iova(struct
>>> rk_iommu_domain *rk_domain, u32 *pte_addr,
>>> pte_count * SPAGE_SIZE);
>>> iova += pte_count * SPAGE_SIZE;
>>> - page_phys = rk_ops->pt_address(pte_addr[pte_count]);
>>> + page_phys = rk_domain->rk_ops->pt_address(pte_addr[pte_count]);
>>> pr_err("iova: %pad already mapped to %pa cannot remap to phys:
>>> %pa prot: %#x\n",
>>> &iova, &page_phys, &paddr, prot);
>>> @@ -849,7 +850,7 @@ static int rk_iommu_map(struct iommu_domain
>>> *domain, unsigned long _iova,
>>> pte_index = rk_iova_pte_index(iova);
>>> pte_addr = &page_table[pte_index];
>>> - pte_dma = rk_ops->pt_address(dte_index) + pte_index *
>>> sizeof(u32);
>>> + pte_dma = rk_domain->rk_ops->pt_address(dte_index) + pte_index *
>>> sizeof(u32);
>>> ret = rk_iommu_map_iova(rk_domain, pte_addr, pte_dma, iova,
>>> paddr, size, prot);
>>> @@ -887,7 +888,7 @@ static size_t rk_iommu_unmap(struct
>>> iommu_domain *domain, unsigned long _iova,
>>> return 0;
>>> }
>>> - pt_phys = rk_ops->pt_address(dte);
>>> + pt_phys = rk_domain->rk_ops->pt_address(dte);
>>> pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova);
>>> pte_dma = pt_phys + rk_iova_pte_index(iova) * sizeof(u32);
>>> unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma,
>>> size);
>>> @@ -945,7 +946,7 @@ static int rk_iommu_enable(struct rk_iommu *iommu)
>>> for (i = 0; i < iommu->num_mmu; i++) {
>>> rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR,
>>> - rk_ops->mk_dtentries(rk_domain->dt_dma));
>>> + iommu->rk_ops->mk_dtentries(rk_domain->dt_dma));
>>> rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
>>> rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK,
>>> RK_MMU_IRQ_MASK);
>>> }
>>> @@ -1068,17 +1069,19 @@ static struct iommu_domain
>>> *rk_iommu_domain_alloc_paging(struct device *dev)
>>> if (!rk_domain)
>>> return NULL;
>>> + iommu = rk_iommu_from_dev(dev);
>>> + rk_domain->rk_ops = iommu->rk_ops;
As mentioned above, this seem strange and is possible just a shortcut
due to current call-paths? Why do we assign "iommu ops" to the domain?
Regards,
Jonas
>>> +
>>> /*
>>> * rk32xx iommus use a 2 level pagetable.
>>> * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries.
>>> * Allocate one 4 KiB page for each table.
>>> */
>>> - rk_domain->dt = iommu_alloc_pages_sz(GFP_KERNEL |
>>> rk_ops->gfp_flags,
>>> + rk_domain->dt = iommu_alloc_pages_sz(GFP_KERNEL |
>>> rk_domain->rk_ops->gfp_flags,
>>> SPAGE_SIZE);
>>> if (!rk_domain->dt)
>>> goto err_free_domain;
>>> - iommu = rk_iommu_from_dev(dev);
>>> rk_domain->dma_dev = iommu->dev;
>>> rk_domain->dt_dma = dma_map_single(rk_domain->dma_dev,
>>> rk_domain->dt,
>>> SPAGE_SIZE, DMA_TO_DEVICE);
>>> @@ -1117,7 +1120,7 @@ static void rk_iommu_domain_free(struct
>>> iommu_domain *domain)
>>> for (i = 0; i < NUM_DT_ENTRIES; i++) {
>>> u32 dte = rk_domain->dt[i];
>>> if (rk_dte_is_pt_valid(dte)) {
>>> - phys_addr_t pt_phys = rk_ops->pt_address(dte);
>>> + phys_addr_t pt_phys = rk_domain->rk_ops->pt_address(dte);
>>> u32 *page_table = phys_to_virt(pt_phys);
>>> dma_unmap_single(rk_domain->dma_dev, pt_phys,
>>> SPAGE_SIZE, DMA_TO_DEVICE);
>>> @@ -1197,7 +1200,6 @@ static int rk_iommu_probe(struct
>>> platform_device *pdev)
>>> struct device *dev = &pdev->dev;
>>> struct rk_iommu *iommu;
>>> struct resource *res;
>>> - const struct rk_iommu_ops *ops;
>>> int num_res = pdev->num_resources;
>>> int err, i;
>>> @@ -1211,16 +1213,9 @@ static int rk_iommu_probe(struct
>>> platform_device *pdev)
>>> iommu->dev = dev;
>>> iommu->num_mmu = 0;
>>> - ops = of_device_get_match_data(dev);
>>> - if (!rk_ops)
>>> - rk_ops = ops;
>>> -
>>> - /*
>>> - * That should not happen unless different versions of the
>>> - * hardware block are embedded the same SoC
>>> - */
>>> - if (WARN_ON(rk_ops != ops))
>>> - return -EINVAL;
>>> + iommu->rk_ops = of_device_get_match_data(dev);
>>> + if (!iommu->rk_ops)
>>> + return -ENOENT;
>>> iommu->bases = devm_kcalloc(dev, num_res, sizeof(*iommu->bases),
>>> GFP_KERNEL);
>>> @@ -1286,7 +1281,7 @@ static int rk_iommu_probe(struct
>>> platform_device *pdev)
>>> goto err_pm_disable;
>>> }
>>> - dma_set_mask_and_coherent(dev, rk_ops->dma_bit_mask);
>>> + dma_set_mask_and_coherent(dev, iommu->rk_ops->dma_bit_mask);
>>> err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL,
>>> dev_name(dev));
>>> if (err)
>>>
>>
>
> _______________________________________________
> Linux-rockchip mailing list
> Linux-rockchip at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-rockchip
More information about the Linux-rockchip
mailing list