[PATCH 20/21] ARM: dma-mapping: split out arch_dma_mark_clean() helper

Arnd Bergmann arnd at kernel.org
Mon Mar 27 05:13:16 PDT 2023


From: Arnd Bergmann <arnd at arndb.de>

The arm version of the arch_sync_dma_for_cpu() function annotates pages as
PG_dcache_clean after a DMA, but no other architecture does this here. On
ia64, the same thing is done in arch_sync_dma_for_cpu(), so it makes sense
to use the same hook in order to have identical arch_sync_dma_for_cpu()
semantics as all other architectures.

Splitting this out has multiple effects:

 - for dma-direct, this now gets called after arch_sync_dma_for_cpu()
   for DMA_FROM_DEVICE mappings, but not for DMA_BIDIRECTIONAL. While
   it would not be harmful to keep doing it for bidirectional mappings,
   those are apparently not used in any callers that care about the flag.

 - Since arm has its own dma-iommu abstraction, this now also needs to
   call the same function, so the calls are added there to mirror the
   dma-direct version.

 - Like dma-direct, the dma-iommu version now marks the dcache clean
   for both coherent and noncoherent devices after a DMA, but it only
   does this for DMA_FROM_DEVICE, not DMA_BIDIRECTIONAL.

[ HELP NEEDED: can anyone confirm that it is a correct assumption
  on arm that a cache-coherent device writing to a page always results
  in it being in a PG_dcache_clean state like on ia64, or can a device
  write directly into the dcache?]

Signed-off-by: Arnd Bergmann <arnd at arndb.de>
---
 arch/arm/Kconfig          |  1 +
 arch/arm/mm/dma-mapping.c | 71 +++++++++++++++++++++++----------------
 2 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e24a9820e12f..125d58c54ab1 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -7,6 +7,7 @@ config ARM
 	select ARCH_HAS_BINFMT_FLAT
 	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_DEBUG_VIRTUAL if MMU
+	select ARCH_HAS_DMA_MARK_CLEAN if MMU
 	select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FORTIFY_SOURCE
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index cc702cb27ae7..b703cb83d27e 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -665,6 +665,28 @@ static void dma_cache_maint(phys_addr_t paddr,
 	} while (left);
 }
 
+/*
+ * Mark the D-cache clean for these pages to avoid extra flushing.
+ */
+void arch_dma_mark_clean(phys_addr_t paddr, size_t size)
+{
+	unsigned long pfn = PFN_UP(paddr);
+	unsigned long off = paddr & (PAGE_SIZE - 1);
+	size_t left = size;
+
+	if (size < PAGE_SIZE)
+		return;
+
+	if (off)
+		left -= PAGE_SIZE - off;
+
+	while (left >= PAGE_SIZE) {
+		struct page *page = pfn_to_page(pfn++);
+		set_bit(PG_dcache_clean, &page->flags);
+		left -= PAGE_SIZE;
+	}
+}
+
 static bool arch_sync_dma_cpu_needs_post_dma_flush(void)
 {
 	if (IS_ENABLED(CONFIG_CPU_V6) ||
@@ -715,24 +737,6 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
 		outer_inv_range(paddr, paddr + size);
 		dma_cache_maint(paddr, size, dmac_inv_range);
 	}
-
-	/*
-	 * Mark the D-cache clean for these pages to avoid extra flushing.
-	 */
-	if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
-		unsigned long pfn = PFN_UP(paddr);
-		unsigned long off = paddr & (PAGE_SIZE - 1);
-		size_t left = size;
-
-		if (off)
-			left -= PAGE_SIZE - off;
-
-		while (left >= PAGE_SIZE) {
-			struct page *page = pfn_to_page(pfn++);
-			set_bit(PG_dcache_clean, &page->flags);
-			left -= PAGE_SIZE;
-		}
-	}
 }
 
 #ifdef CONFIG_ARM_DMA_USE_IOMMU
@@ -1294,6 +1298,17 @@ static int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg,
 	return -EINVAL;
 }
 
+static void arm_iommu_sync_dma_for_cpu(phys_addr_t phys, size_t len,
+				       enum dma_data_direction dir,
+				       bool dma_coherent)
+{
+	if (!dma_coherent)
+		arch_sync_dma_for_cpu(phys, s->length, dir);
+
+	if (dir == DMA_FROM_DEVICE)
+		arch_dma_mark_clean(phys, s->length);
+}
+
 /**
  * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
  * @dev: valid struct device pointer
@@ -1316,8 +1331,9 @@ static void arm_iommu_unmap_sg(struct device *dev,
 		if (sg_dma_len(s))
 			__iommu_remove_mapping(dev, sg_dma_address(s),
 					       sg_dma_len(s));
-		if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-			arch_sync_dma_for_cpu(sg_phys(s), s->length, dir);
+		if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+			arm_iommu_sync_dma_for_cpu(sg_phys(s), s->length, dir,
+						   dev->dma_coherent);
 	}
 }
 
@@ -1335,12 +1351,9 @@ static void arm_iommu_sync_sg_for_cpu(struct device *dev,
 	struct scatterlist *s;
 	int i;
 
-	if (dev->dma_coherent)
-		return;
-
 	for_each_sg(sg, s, nents, i)
-		arch_sync_dma_for_cpu(sg_phys(s), s->length, dir);
-
+		arm_iommu_sync_dma_for_cpu(sg_phys(s), s->length, dir,
+					   dev->dma_coherent);
 }
 
 /**
@@ -1425,9 +1438,9 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
 	if (!iova)
 		return;
 
-	if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 		phys = iommu_iova_to_phys(mapping->domain, handle);
-		arch_sync_dma_for_cpu(phys, size, dir);
+		arm_iommu_sync_dma_for_cpu(phys, size, dir, dev->dma_coherent);
 	}
 
 	iommu_unmap(mapping->domain, iova, len);
@@ -1497,11 +1510,11 @@ static void arm_iommu_sync_single_for_cpu(struct device *dev,
 	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	phys_addr_t phys;
 
-	if (dev->dma_coherent || !(handle & PAGE_MASK))
+	if (!(handle & PAGE_MASK))
 		return;
 
 	phys = iommu_iova_to_phys(mapping->domain, handle);
-	arch_sync_dma_for_cpu(phys, size, dir);
+	arm_iommu_sync_dma_for_cpu(phys, size, dir, dev->dma_coherent);
 }
 
 static void arm_iommu_sync_single_for_device(struct device *dev,
-- 
2.39.2




More information about the linux-arm-kernel mailing list