[PATCH v5 11/14] sparc: Use physical address DMA mapping
John Paul Adrian Glaubitz
glaubitz at physik.fu-berlin.de
Tue Feb 17 04:27:26 PST 2026
Hello Leon,
On Wed, 2025-10-15 at 12:12 +0300, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro at nvidia.com>
>
> Convert sparc architecture DMA code to use .map_phys callback.
>
> Signed-off-by: Leon Romanovsky <leonro at nvidia.com>
> ---
> arch/sparc/kernel/iommu.c | 30 +++++++++++++++++-----------
> arch/sparc/kernel/pci_sun4v.c | 31 ++++++++++++++++++-----------
> arch/sparc/mm/io-unit.c | 38 ++++++++++++++++++-----------------
> arch/sparc/mm/iommu.c | 46 ++++++++++++++++++++++---------------------
> 4 files changed, 82 insertions(+), 63 deletions(-)
>
> diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
> index da0363692528..46ef88bc9c26 100644
> --- a/arch/sparc/kernel/iommu.c
> +++ b/arch/sparc/kernel/iommu.c
> @@ -260,26 +260,35 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
> free_pages((unsigned long)cpu, order);
> }
>
> -static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
> - unsigned long offset, size_t sz,
> - enum dma_data_direction direction,
> +static dma_addr_t dma_4u_map_phys(struct device *dev, phys_addr_t phys,
> + size_t sz, enum dma_data_direction direction,
> unsigned long attrs)
> {
> struct iommu *iommu;
> struct strbuf *strbuf;
> iopte_t *base;
> unsigned long flags, npages, oaddr;
> - unsigned long i, base_paddr, ctx;
> + unsigned long i, ctx;
> u32 bus_addr, ret;
> unsigned long iopte_protection;
>
> + if (unlikely(attrs & DMA_ATTR_MMIO))
> + /*
> + * This check is included because older versions of the code
> + * lacked MMIO path support, and my ability to test this path
> + * is limited. However, from a software technical standpoint,
> + * there is no restriction, as the following code operates
> + * solely on physical addresses.
> + */
> + goto bad_no_ctx;
> +
> iommu = dev->archdata.iommu;
> strbuf = dev->archdata.stc;
>
> if (unlikely(direction == DMA_NONE))
> goto bad_no_ctx;
>
> - oaddr = (unsigned long)(page_address(page) + offset);
> + oaddr = (unsigned long)(phys_to_virt(phys));
> npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
> npages >>= IO_PAGE_SHIFT;
>
> @@ -296,7 +305,6 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
> bus_addr = (iommu->tbl.table_map_base +
> ((base - iommu->page_table) << IO_PAGE_SHIFT));
> ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
> - base_paddr = __pa(oaddr & IO_PAGE_MASK);
> if (strbuf->strbuf_enabled)
> iopte_protection = IOPTE_STREAMING(ctx);
> else
> @@ -304,8 +312,8 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
> if (direction != DMA_TO_DEVICE)
> iopte_protection |= IOPTE_WRITE;
>
> - for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE)
> - iopte_val(*base) = iopte_protection | base_paddr;
> + for (i = 0; i < npages; i++, base++, phys += IO_PAGE_SIZE)
> + iopte_val(*base) = iopte_protection | phys;
>
> return ret;
>
> @@ -383,7 +391,7 @@ static void strbuf_flush(struct strbuf *strbuf, struct iommu *iommu,
> vaddr, ctx, npages);
> }
>
> -static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
> +static void dma_4u_unmap_phys(struct device *dev, dma_addr_t bus_addr,
> size_t sz, enum dma_data_direction direction,
> unsigned long attrs)
> {
> @@ -753,8 +761,8 @@ static int dma_4u_supported(struct device *dev, u64 device_mask)
> static const struct dma_map_ops sun4u_dma_ops = {
> .alloc = dma_4u_alloc_coherent,
> .free = dma_4u_free_coherent,
> - .map_page = dma_4u_map_page,
> - .unmap_page = dma_4u_unmap_page,
> + .map_phys = dma_4u_map_phys,
> + .unmap_phys = dma_4u_unmap_phys,
> .map_sg = dma_4u_map_sg,
> .unmap_sg = dma_4u_unmap_sg,
> .sync_single_for_cpu = dma_4u_sync_single_for_cpu,
> diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
> index b720b21ccfbd..791f0a76665f 100644
> --- a/arch/sparc/kernel/pci_sun4v.c
> +++ b/arch/sparc/kernel/pci_sun4v.c
> @@ -352,9 +352,8 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
> free_pages((unsigned long)cpu, order);
> }
>
> -static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
> - unsigned long offset, size_t sz,
> - enum dma_data_direction direction,
> +static dma_addr_t dma_4v_map_phys(struct device *dev, phys_addr_t phys,
> + size_t sz, enum dma_data_direction direction,
> unsigned long attrs)
> {
> struct iommu *iommu;
> @@ -362,18 +361,27 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
> struct iommu_map_table *tbl;
> u64 mask;
> unsigned long flags, npages, oaddr;
> - unsigned long i, base_paddr;
> - unsigned long prot;
> + unsigned long i, prot;
> dma_addr_t bus_addr, ret;
> long entry;
>
> + if (unlikely(attrs & DMA_ATTR_MMIO))
> + /*
> + * This check is included because older versions of the code
> + * lacked MMIO path support, and my ability to test this path
> + * is limited. However, from a software technical standpoint,
> + * there is no restriction, as the following code operates
> + * solely on physical addresses.
> + */
> + goto bad;
> +
> iommu = dev->archdata.iommu;
> atu = iommu->atu;
>
> if (unlikely(direction == DMA_NONE))
> goto bad;
>
> - oaddr = (unsigned long)(page_address(page) + offset);
> + oaddr = (unsigned long)(phys_to_virt(phys));
> npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
> npages >>= IO_PAGE_SHIFT;
>
> @@ -391,7 +399,6 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
>
> bus_addr = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
> ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
> - base_paddr = __pa(oaddr & IO_PAGE_MASK);
> prot = HV_PCI_MAP_ATTR_READ;
> if (direction != DMA_TO_DEVICE)
> prot |= HV_PCI_MAP_ATTR_WRITE;
> @@ -403,8 +410,8 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
>
> iommu_batch_start(dev, prot, entry);
>
> - for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
> - long err = iommu_batch_add(base_paddr, mask);
> + for (i = 0; i < npages; i++, phys += IO_PAGE_SIZE) {
> + long err = iommu_batch_add(phys, mask);
> if (unlikely(err < 0L))
> goto iommu_map_fail;
> }
> @@ -426,7 +433,7 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
> return DMA_MAPPING_ERROR;
> }
>
> -static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
> +static void dma_4v_unmap_phys(struct device *dev, dma_addr_t bus_addr,
> size_t sz, enum dma_data_direction direction,
> unsigned long attrs)
> {
> @@ -686,8 +693,8 @@ static int dma_4v_supported(struct device *dev, u64 device_mask)
> static const struct dma_map_ops sun4v_dma_ops = {
> .alloc = dma_4v_alloc_coherent,
> .free = dma_4v_free_coherent,
> - .map_page = dma_4v_map_page,
> - .unmap_page = dma_4v_unmap_page,
> + .map_phys = dma_4v_map_phys,
> + .unmap_phys = dma_4v_unmap_phys,
> .map_sg = dma_4v_map_sg,
> .unmap_sg = dma_4v_unmap_sg,
> .dma_supported = dma_4v_supported,
> diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c
> index d8376f61b4d0..d409cb450de4 100644
> --- a/arch/sparc/mm/io-unit.c
> +++ b/arch/sparc/mm/io-unit.c
> @@ -94,13 +94,14 @@ static int __init iounit_init(void)
> subsys_initcall(iounit_init);
>
> /* One has to hold iounit->lock to call this */
> -static unsigned long iounit_get_area(struct iounit_struct *iounit, unsigned long vaddr, int size)
> +static dma_addr_t iounit_get_area(struct iounit_struct *iounit,
> + phys_addr_t phys, int size)
> {
> int i, j, k, npages;
> unsigned long rotor, scan, limit;
> iopte_t iopte;
>
> - npages = ((vaddr & ~PAGE_MASK) + size + (PAGE_SIZE-1)) >> PAGE_SHIFT;
> + npages = (offset_in_page(phys) + size + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
>
> /* A tiny bit of magic ingredience :) */
> switch (npages) {
> @@ -109,7 +110,7 @@ static unsigned long iounit_get_area(struct iounit_struct *iounit, unsigned long
> default: i = 0x0213; break;
> }
>
> - IOD(("iounit_get_area(%08lx,%d[%d])=", vaddr, size, npages));
> + IOD(("%s(%pa,%d[%d])=", __func__, &phys, size, npages));
>
> next: j = (i & 15);
> rotor = iounit->rotor[j - 1];
> @@ -124,7 +125,8 @@ nexti: scan = find_next_zero_bit(iounit->bmap, limit, scan);
> }
> i >>= 4;
> if (!(i & 15))
> - panic("iounit_get_area: Couldn't find free iopte slots for (%08lx,%d)\n", vaddr, size);
> + panic("iounit_get_area: Couldn't find free iopte slots for (%pa,%d)\n",
> + &phys, size);
> goto next;
> }
> for (k = 1, scan++; k < npages; k++)
> @@ -132,30 +134,29 @@ nexti: scan = find_next_zero_bit(iounit->bmap, limit, scan);
> goto nexti;
> iounit->rotor[j - 1] = (scan < limit) ? scan : iounit->limit[j - 1];
> scan -= npages;
> - iopte = MKIOPTE(__pa(vaddr & PAGE_MASK));
> - vaddr = IOUNIT_DMA_BASE + (scan << PAGE_SHIFT) + (vaddr & ~PAGE_MASK);
> + iopte = MKIOPTE(phys & PAGE_MASK);
> + phys = IOUNIT_DMA_BASE + (scan << PAGE_SHIFT) + offset_in_page(phys);
> for (k = 0; k < npages; k++, iopte = __iopte(iopte_val(iopte) + 0x100), scan++) {
> set_bit(scan, iounit->bmap);
> sbus_writel(iopte_val(iopte), &iounit->page_table[scan]);
> }
> - IOD(("%08lx\n", vaddr));
> - return vaddr;
> + IOD(("%pa\n", &phys));
> + return phys;
> }
>
> -static dma_addr_t iounit_map_page(struct device *dev, struct page *page,
> - unsigned long offset, size_t len, enum dma_data_direction dir,
> - unsigned long attrs)
> +static dma_addr_t iounit_map_phys(struct device *dev, phys_addr_t phys,
> + size_t len, enum dma_data_direction dir, unsigned long attrs)
> {
> - void *vaddr = page_address(page) + offset;
> struct iounit_struct *iounit = dev->archdata.iommu;
> - unsigned long ret, flags;
> + unsigned long flags;
> + dma_addr_t ret;
>
> /* XXX So what is maxphys for us and how do drivers know it? */
> if (!len || len > 256 * 1024)
> return DMA_MAPPING_ERROR;
>
> spin_lock_irqsave(&iounit->lock, flags);
> - ret = iounit_get_area(iounit, (unsigned long)vaddr, len);
> + ret = iounit_get_area(iounit, phys, len);
> spin_unlock_irqrestore(&iounit->lock, flags);
> return ret;
> }
> @@ -171,14 +172,15 @@ static int iounit_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
> /* FIXME: Cache some resolved pages - often several sg entries are to the same page */
> spin_lock_irqsave(&iounit->lock, flags);
> for_each_sg(sgl, sg, nents, i) {
> - sg->dma_address = iounit_get_area(iounit, (unsigned long) sg_virt(sg), sg->length);
> + sg->dma_address =
> + iounit_get_area(iounit, sg_phys(sg), sg->length);
> sg->dma_length = sg->length;
> }
> spin_unlock_irqrestore(&iounit->lock, flags);
> return nents;
> }
>
> -static void iounit_unmap_page(struct device *dev, dma_addr_t vaddr, size_t len,
> +static void iounit_unmap_phys(struct device *dev, dma_addr_t vaddr, size_t len,
> enum dma_data_direction dir, unsigned long attrs)
> {
> struct iounit_struct *iounit = dev->archdata.iommu;
> @@ -279,8 +281,8 @@ static const struct dma_map_ops iounit_dma_ops = {
> .alloc = iounit_alloc,
> .free = iounit_free,
> #endif
> - .map_page = iounit_map_page,
> - .unmap_page = iounit_unmap_page,
> + .map_phys = iounit_map_phys,
> + .unmap_phys = iounit_unmap_phys,
> .map_sg = iounit_map_sg,
> .unmap_sg = iounit_unmap_sg,
> };
> diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
> index 5a5080db800f..f48adf62724a 100644
> --- a/arch/sparc/mm/iommu.c
> +++ b/arch/sparc/mm/iommu.c
> @@ -181,18 +181,20 @@ static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte)
> }
> }
>
> -static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page,
> - unsigned long offset, size_t len, bool per_page_flush)
> +static dma_addr_t __sbus_iommu_map_phys(struct device *dev, phys_addr_t paddr,
> + size_t len, bool per_page_flush, unsigned long attrs)
> {
> struct iommu_struct *iommu = dev->archdata.iommu;
> - phys_addr_t paddr = page_to_phys(page) + offset;
> - unsigned long off = paddr & ~PAGE_MASK;
> + unsigned long off = offset_in_page(paddr);
> unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
> unsigned long pfn = __phys_to_pfn(paddr);
> unsigned int busa, busa0;
> iopte_t *iopte, *iopte0;
> int ioptex, i;
>
> + if (unlikely(attrs & DMA_ATTR_MMIO))
> + return DMA_MAPPING_ERROR;
> +
> /* XXX So what is maxphys for us and how do drivers know it? */
> if (!len || len > 256 * 1024)
> return DMA_MAPPING_ERROR;
> @@ -202,10 +204,10 @@ static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page,
> * XXX Is this a good assumption?
> * XXX What if someone else unmaps it here and races us?
> */
> - if (per_page_flush && !PageHighMem(page)) {
> + if (per_page_flush && !PhysHighMem(paddr)) {
> unsigned long vaddr, p;
>
> - vaddr = (unsigned long)page_address(page) + offset;
> + vaddr = (unsigned long)phys_to_virt(paddr);
> for (p = vaddr & PAGE_MASK; p < vaddr + len; p += PAGE_SIZE)
> flush_page_for_dma(p);
> }
> @@ -231,19 +233,19 @@ static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page,
> return busa0 + off;
> }
>
> -static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev,
> - struct page *page, unsigned long offset, size_t len,
> - enum dma_data_direction dir, unsigned long attrs)
> +static dma_addr_t sbus_iommu_map_phys_gflush(struct device *dev,
> + phys_addr_t phys, size_t len, enum dma_data_direction dir,
> + unsigned long attrs)
> {
> flush_page_for_dma(0);
> - return __sbus_iommu_map_page(dev, page, offset, len, false);
> + return __sbus_iommu_map_phys(dev, phys, len, false, attrs);
> }
>
> -static dma_addr_t sbus_iommu_map_page_pflush(struct device *dev,
> - struct page *page, unsigned long offset, size_t len,
> - enum dma_data_direction dir, unsigned long attrs)
> +static dma_addr_t sbus_iommu_map_phys_pflush(struct device *dev,
> + phys_addr_t phys, size_t len, enum dma_data_direction dir,
> + unsigned long attrs)
> {
> - return __sbus_iommu_map_page(dev, page, offset, len, true);
> + return __sbus_iommu_map_phys(dev, phys, len, true, attrs);
> }
>
> static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl,
> @@ -254,8 +256,8 @@ static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl,
> int j;
>
> for_each_sg(sgl, sg, nents, j) {
> - sg->dma_address =__sbus_iommu_map_page(dev, sg_page(sg),
> - sg->offset, sg->length, per_page_flush);
> + sg->dma_address = __sbus_iommu_map_phys(dev, sg_phys(sg),
> + sg->length, per_page_flush, attrs);
> if (sg->dma_address == DMA_MAPPING_ERROR)
> return -EIO;
> sg->dma_length = sg->length;
> @@ -277,7 +279,7 @@ static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl,
> return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, true);
> }
>
> -static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr,
> +static void sbus_iommu_unmap_phys(struct device *dev, dma_addr_t dma_addr,
> size_t len, enum dma_data_direction dir, unsigned long attrs)
> {
> struct iommu_struct *iommu = dev->archdata.iommu;
> @@ -303,7 +305,7 @@ static void sbus_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl,
> int i;
>
> for_each_sg(sgl, sg, nents, i) {
> - sbus_iommu_unmap_page(dev, sg->dma_address, sg->length, dir,
> + sbus_iommu_unmap_phys(dev, sg->dma_address, sg->length, dir,
> attrs);
> sg->dma_address = 0x21212121;
> }
> @@ -426,8 +428,8 @@ static const struct dma_map_ops sbus_iommu_dma_gflush_ops = {
> .alloc = sbus_iommu_alloc,
> .free = sbus_iommu_free,
> #endif
> - .map_page = sbus_iommu_map_page_gflush,
> - .unmap_page = sbus_iommu_unmap_page,
> + .map_phys = sbus_iommu_map_phys_gflush,
> + .unmap_phys = sbus_iommu_unmap_phys,
> .map_sg = sbus_iommu_map_sg_gflush,
> .unmap_sg = sbus_iommu_unmap_sg,
> };
> @@ -437,8 +439,8 @@ static const struct dma_map_ops sbus_iommu_dma_pflush_ops = {
> .alloc = sbus_iommu_alloc,
> .free = sbus_iommu_free,
> #endif
> - .map_page = sbus_iommu_map_page_pflush,
> - .unmap_page = sbus_iommu_unmap_page,
> + .map_phys = sbus_iommu_map_phys_pflush,
> + .unmap_phys = sbus_iommu_unmap_phys,
> .map_sg = sbus_iommu_map_sg_pflush,
> .unmap_sg = sbus_iommu_unmap_sg,
> };
This change has just been reported to cause a regression with the igb
driver on some SPARC server systems:
Feb 16 15:20:22 sparcy kernel: igb 0000:0c:00.0 enp12s0f0: igb: enp12s0f0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX
Feb 16 15:20:23 sparcy kernel: iommu_batch_flush: ATU map of [00000400:2:1000000000a7d:1:2736c000] failed with status 8
Feb 16 15:20:23 sparcy kernel: igb 0000:0c:00.0: TX DMA map failed
Feb 16 15:20:23 sparcy kernel: iommu_batch_flush: ATU map of [00000400:2:1000000000a7e:1:2736c000] failed with status 8
Feb 16 15:20:23 sparcy kernel: igb 0000:0c:00.0: TX DMA map failed
Feb 16 15:20:23 sparcy kernel: igb 0000:0c:00.1 enp12s0f1: igb: enp12s0f1 NIC Link is Up 100 Mbps Full Duplex, Flow Control: RX
Feb 16 15:20:23 sparcy kernel: iommu_batch_flush: ATU map of [00000400:2:10000001c02af:1:2736e000] failed with status 8
Feb 16 15:20:23 sparcy kernel: iommu_batch_flush: ATU map of [00000400:2:100000038009f:1:27390000] failed with status 8
Feb 16 15:20:23 sparcy kernel: igb 0000:0c:00.1: TX DMA map failed
Feb 16 15:20:23 sparcy last message buffered 1 times
Feb 16 15:20:23 sparcy kernel: iommu_batch_flush: ATU map of [00000400:2:10000001c02b1:1:2736e000] failed with status 8
Feb 16 15:20:23 sparcy kernel: igb 0000:0c:00.0: TX DMA map failed
Feb 16 15:20:24 sparcy kernel: iommu_batch_flush: ATU map of [00000400:2:1000000000a80:1:2736c000] failed with status 8
Feb 16 15:20:24 sparcy kernel: igb 0000:0c:00.0: TX DMA map failed
Feb 16 15:20:24 sparcy kernel: iommu_batch_flush: ATU map of [00000400:2:1000000000a81:1:2736c000] failed with status 8
Feb 16 15:20:24 sparcy kernel: igb 0000:0c:00.0: TX DMA map failed
Feb 16 15:20:24 sparcy kernel: iommu_batch_flush: ATU map of [00000400:2:1000000200000:1:2738e000] failed with status 8
Feb 16 15:20:24 sparcy kernel: igb 0000:0c:00.1: TX DMA map failed
Feb 16 15:20:24 sparcy kernel: iommu_batch_flush: ATU map of [00000400:2:1000000000a82:1:2736c000] failed with status 8
Feb 16 15:20:24 sparcy kernel: igb 0000:0c:00.1: TX DMA map failed
Please see also the discussion in [1].
Adrian
> [1] https://github.com/sparclinux/issues/issues/75
--
.''`. John Paul Adrian Glaubitz
: :' : Debian Developer
`. `' Physicist
`- GPG: 62FF 8A75 84E0 2956 9546 0006 7426 3B37 F5B5 F913
More information about the linux-arm-kernel
mailing list