[RFC 0/2] fix dma_map_sg not to do barriers for each buffer

Russell King - ARM Linux linux at arm.linux.org.uk
Wed Feb 10 16:21:56 EST 2010


On Wed, Feb 10, 2010 at 12:37:28PM -0800, adharmap at codeaurora.org wrote:
> From: Abhijeet Dharmapurikar <adharmap at quicinc.com>
> 
> Please refer to the post here
> http://lkml.org/lkml/2010/1/4/347
> 
> These changes are to introduce barrierless dma_map_area and dma_unmap_area and
> use them to map the buffers in the scatterlist. For the last buffer, call
> the normal dma_map_area(aka with barriers) effectively executing the barrier
> at the end of the operation.

What if we make dma_map_area and dma_unmap_area both be barrier-less,
and instead have a separate dma_barrier method - eg, something like the
attached?

This might allow for better I-cache usage by not having to duplicate the
DMA cache coherence functions.

PS, you haven't sorted out all the processor support files for your change.

 arch/arm/include/asm/cacheflush.h  |    4 ++++
 arch/arm/include/asm/dma-mapping.h |    8 ++++++++
 arch/arm/mm/cache-fa.S             |   13 +++++++------
 arch/arm/mm/cache-v3.S             |    3 +++
 arch/arm/mm/cache-v4.S             |    3 +++
 arch/arm/mm/cache-v4wb.S           |    9 +++++++--
 arch/arm/mm/cache-v4wt.S           |    3 +++
 arch/arm/mm/cache-v6.S             |   13 +++++++------
 arch/arm/mm/cache-v7.S             |    9 ++++++---
 arch/arm/mm/dma-mapping.c          |   16 ++++++++++++++++
 arch/arm/mm/proc-arm1020e.S        |   10 +++++++---
 arch/arm/mm/proc-arm1022.S         |   10 +++++++---
 arch/arm/mm/proc-arm1026.S         |   10 +++++++---
 arch/arm/mm/proc-arm920.S          |   10 +++++++---
 arch/arm/mm/proc-arm922.S          |   10 +++++++---
 arch/arm/mm/proc-arm925.S          |   10 +++++++---
 arch/arm/mm/proc-arm926.S          |   10 +++++++---
 arch/arm/mm/proc-arm940.S          |   10 +++++++---
 arch/arm/mm/proc-arm946.S          |   10 +++++++---
 arch/arm/mm/proc-feroceon.S        |   13 ++++++++-----
 arch/arm/mm/proc-mohawk.S          |   10 +++++++---
 arch/arm/mm/proc-xsc3.S            |   10 +++++++---
 arch/arm/mm/proc-xscale.S          |   10 +++++++---
 23 files changed, 156 insertions(+), 58 deletions(-)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index e290885..5928e78 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -200,6 +200,7 @@ struct cpu_cache_fns {
 
 	void (*dma_map_area)(const void *, size_t, int);
 	void (*dma_unmap_area)(const void *, size_t, int);
+	void (*dma_barrier)(void);
 
 	void (*dma_flush_range)(const void *, const void *);
 };
@@ -232,6 +233,7 @@ extern struct cpu_cache_fns cpu_cache;
  */
 #define dmac_map_area			cpu_cache.dma_map_area
 #define dmac_unmap_area		cpu_cache.dma_unmap_area
+#define dmac_barrier			cpu_cache.dma_barrier
 #define dmac_flush_range		cpu_cache.dma_flush_range
 
 #else
@@ -258,10 +260,12 @@ extern void __cpuc_flush_dcache_area(void *, size_t);
  */
 #define dmac_map_area			__glue(_CACHE,_dma_map_area)
 #define dmac_unmap_area		__glue(_CACHE,_dma_unmap_area)
+#define dmac_barrier			__glue(_CACHE,_dma_barrier)
 #define dmac_flush_range		__glue(_CACHE,_dma_flush_range)
 
 extern void dmac_map_area(const void *, size_t, int);
 extern void dmac_unmap_area(const void *, size_t, int);
+extern void dmac_barrier(void);
 extern void dmac_flush_range(const void *, const void *);
 
 #endif
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 256ee1c..4a0824c 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -110,6 +110,8 @@ static inline void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
 		___dma_page_dev_to_cpu(page, off, size, dir);
 }
 
+extern void __dma_barrier(enum dma_data_direction);
+
 /*
  * Return whether the given device DMA address mask can be supported
  * properly.  For example, if your device can only drive the low 24-bits
@@ -345,6 +347,7 @@ static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
 	BUG_ON(!valid_dma_direction(dir));
 
 	__dma_single_cpu_to_dev(cpu_addr, size, dir);
+	__dma_barrier(dir);
 
 	return virt_to_dma(dev, cpu_addr);
 }
@@ -369,6 +372,7 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
 	BUG_ON(!valid_dma_direction(dir));
 
 	__dma_page_cpu_to_dev(page, offset, size, dir);
+	__dma_barrier(dir);
 
 	return page_to_dma(dev, page) + offset;
 }
@@ -391,6 +395,7 @@ static inline void dma_unmap_single(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir)
 {
 	__dma_single_dev_to_cpu(dma_to_virt(dev, handle), size, dir);
+	__dma_barrier(dir);
 }
 
 /**
@@ -412,6 +417,7 @@ static inline void dma_unmap_page(struct device *dev, dma_addr_t handle,
 {
 	__dma_page_dev_to_cpu(dma_to_page(dev, handle), handle & ~PAGE_MASK,
 		size, dir);
+	__dma_barrier(dir);
 }
 #endif /* CONFIG_DMABOUNCE */
 
@@ -443,6 +449,7 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev,
 		return;
 
 	__dma_single_dev_to_cpu(dma_to_virt(dev, handle) + offset, size, dir);
+	__dma_barrier(dir);
 }
 
 static inline void dma_sync_single_range_for_device(struct device *dev,
@@ -455,6 +462,7 @@ static inline void dma_sync_single_range_for_device(struct device *dev,
 		return;
 
 	__dma_single_cpu_to_dev(dma_to_virt(dev, handle) + offset, size, dir);
+	__dma_barrier(dir);
 }
 
 static inline void dma_sync_single_for_cpu(struct device *dev,
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
index 7148e53..cdcfae2 100644
--- a/arch/arm/mm/cache-fa.S
+++ b/arch/arm/mm/cache-fa.S
@@ -168,8 +168,6 @@ fa_dma_inv_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	mov	pc, lr
 
 /*
@@ -186,8 +184,6 @@ fa_dma_clean_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mov	r0, #0	
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	mov	pc, lr
 
 /*
@@ -201,8 +197,6 @@ ENTRY(fa_dma_flush_range)
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mov	r0, #0	
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	mov	pc, lr
 
 /*
@@ -229,6 +223,12 @@ ENTRY(fa_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(fa_dma_unmap_area)
 
+ENTRY(fa_dma_barrier)
+	mov	r0, #0	
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+ENDPROC(fa_dma_barrier)
+
 	__INITDATA
 
 	.type	fa_cache_fns, #object
@@ -241,5 +241,6 @@ ENTRY(fa_cache_fns)
 	.long	fa_flush_kern_dcache_area
 	.long	fa_dma_map_area
 	.long	fa_dma_unmap_area
+	.long	fa_dma_barrier
 	.long	fa_dma_flush_range
 	.size	fa_cache_fns, . - fa_cache_fns
diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S
index c2ff3c5..df34458 100644
--- a/arch/arm/mm/cache-v3.S
+++ b/arch/arm/mm/cache-v3.S
@@ -114,9 +114,11 @@ ENTRY(v3_dma_unmap_area)
  *	- dir	- DMA direction
  */
 ENTRY(v3_dma_map_area)
+ENTRY(v3_dma_barrier)
 	mov	pc, lr
 ENDPROC(v3_dma_unmap_area)
 ENDPROC(v3_dma_map_area)
+ENDPROC(v3_dma_barrier)
 
 	__INITDATA
 
@@ -130,5 +132,6 @@ ENTRY(v3_cache_fns)
 	.long	v3_flush_kern_dcache_area
 	.long	v3_dma_map_area
 	.long	v3_dma_unmap_area
+	.long	v3_dma_barrier
 	.long	v3_dma_flush_range
 	.size	v3_cache_fns, . - v3_cache_fns
diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S
index 4810f7e..20260b1 100644
--- a/arch/arm/mm/cache-v4.S
+++ b/arch/arm/mm/cache-v4.S
@@ -126,9 +126,11 @@ ENTRY(v4_dma_unmap_area)
  *	- dir	- DMA direction
  */
 ENTRY(v4_dma_map_area)
+ENTRY(v4_dma_barrier)
 	mov	pc, lr
 ENDPROC(v4_dma_unmap_area)
 ENDPROC(v4_dma_map_area)
+ENDPROC(v4_dma_barrier)
 
 	__INITDATA
 
@@ -142,5 +144,6 @@ ENTRY(v4_cache_fns)
 	.long	v4_flush_kern_dcache_area
 	.long	v4_dma_map_area
 	.long	v4_dma_unmap_area
+	.long	v4_dma_barrier
 	.long	v4_dma_flush_range
 	.size	v4_cache_fns, . - v4_cache_fns
diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S
index df8368a..9c9c875 100644
--- a/arch/arm/mm/cache-v4wb.S
+++ b/arch/arm/mm/cache-v4wb.S
@@ -183,7 +183,6 @@ v4wb_dma_inv_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	mov	pc, lr
 
 /*
@@ -200,7 +199,6 @@ v4wb_dma_clean_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	mov	pc, lr
 
 /*
@@ -240,6 +238,12 @@ ENTRY(v4wb_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(v4wb_dma_unmap_area)
 
+ENTRY(v4wb_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+ENDPROC(v4wb_dma_barrier)
+
 	__INITDATA
 
 	.type	v4wb_cache_fns, #object
@@ -252,5 +256,6 @@ ENTRY(v4wb_cache_fns)
 	.long	v4wb_flush_kern_dcache_area
 	.long	v4wb_dma_map_area
 	.long	v4wb_dma_unmap_area
+	.long	v4wb_dma_barrier
 	.long	v4wb_dma_flush_range
 	.size	v4wb_cache_fns, . - v4wb_cache_fns
diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S
index 45c7031..223eea4 100644
--- a/arch/arm/mm/cache-v4wt.S
+++ b/arch/arm/mm/cache-v4wt.S
@@ -180,9 +180,11 @@ ENTRY(v4wt_dma_unmap_area)
  *	- dir	- DMA direction
  */
 ENTRY(v4wt_dma_map_area)
+ENTRY(v4wt_dma_barrier)
 	mov	pc, lr
 ENDPROC(v4wt_dma_unmap_area)
 ENDPROC(v4wt_dma_map_area)
+ENDPROC(v4wt_dma_barrier)
 
 	__INITDATA
 
@@ -196,5 +198,6 @@ ENTRY(v4wt_cache_fns)
 	.long	v4wt_flush_kern_dcache_area
 	.long	v4wt_dma_map_area
 	.long	v4wt_dma_unmap_area
+	.long	v4wt_dma_barrier
 	.long	v4wt_dma_flush_range
 	.size	v4wt_cache_fns, . - v4wt_cache_fns
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index 9d89c67..b294854 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -219,8 +219,6 @@ v6_dma_inv_range:
 	add	r0, r0, #D_CACHE_LINE_SIZE
 	cmp	r0, r1
 	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	mov	pc, lr
 
 /*
@@ -239,8 +237,6 @@ v6_dma_clean_range:
 	add	r0, r0, #D_CACHE_LINE_SIZE
 	cmp	r0, r1
 	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	mov	pc, lr
 
 /*
@@ -259,8 +255,6 @@ ENTRY(v6_dma_flush_range)
 	add	r0, r0, #D_CACHE_LINE_SIZE
 	cmp	r0, r1
 	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	mov	pc, lr
 
 /*
@@ -289,6 +283,12 @@ ENTRY(v6_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(v6_dma_unmap_area)
 
+ENTRY(v6_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+ENDPROC(v6_dma_barrier)
+
 	__INITDATA
 
 	.type	v6_cache_fns, #object
@@ -301,5 +301,6 @@ ENTRY(v6_cache_fns)
 	.long	v6_flush_kern_dcache_area
 	.long	v6_dma_map_area
 	.long	v6_dma_unmap_area
+	.long	v6_dma_barrier
 	.long	v6_dma_flush_range
 	.size	v6_cache_fns, . - v6_cache_fns
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index bcd64f2..d89d55a 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -231,7 +231,6 @@ v7_dma_inv_range:
 	add	r0, r0, r2
 	cmp	r0, r1
 	blo	1b
-	dsb
 	mov	pc, lr
 ENDPROC(v7_dma_inv_range)
 
@@ -249,7 +248,6 @@ v7_dma_clean_range:
 	add	r0, r0, r2
 	cmp	r0, r1
 	blo	1b
-	dsb
 	mov	pc, lr
 ENDPROC(v7_dma_clean_range)
 
@@ -267,7 +265,6 @@ ENTRY(v7_dma_flush_range)
 	add	r0, r0, r2
 	cmp	r0, r1
 	blo	1b
-	dsb
 	mov	pc, lr
 ENDPROC(v7_dma_flush_range)
 
@@ -297,6 +294,11 @@ ENTRY(v7_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(v7_dma_unmap_area)
 
+ENTRY(v7_dma_barrier)
+	dsb
+	mov	pc, lr
+ENDPROC(v7_dma_barrier)
+
 	__INITDATA
 
 	.type	v7_cache_fns, #object
@@ -309,5 +311,6 @@ ENTRY(v7_cache_fns)
 	.long	v7_flush_kern_dcache_area
 	.long	v7_dma_map_area
 	.long	v7_dma_unmap_area
+	.long	v6_dma_barrier
 	.long	v7_dma_flush_range
 	.size	v7_cache_fns, . - v7_cache_fns
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 64daef2..debe7cb 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -108,6 +108,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
 	memset(ptr, 0, size);
 	dmac_flush_range(ptr, ptr + size);
 	outer_flush_range(__pa(ptr), __pa(ptr) + size);
+	dmac_barrier();
 
 	return page;
 }
@@ -509,6 +510,12 @@ void ___dma_page_dev_to_cpu(struct page *page, unsigned long off,
 }
 EXPORT_SYMBOL(___dma_page_dev_to_cpu);
 
+void __dma_barrier(enum dma_data_direction dir)
+{
+	dmac_barrier();
+}
+EXPORT_SYMBOL(__dma_barrier);
+
 /**
  * dma_map_sg - map a set of SG buffers for streaming mode DMA
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -537,6 +544,9 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		if (dma_mapping_error(dev, s->dma_address))
 			goto bad_mapping;
 	}
+
+	__dma_barrier(dir);
+
 	return nents;
 
  bad_mapping:
@@ -564,6 +574,8 @@ void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 
 	for_each_sg(sg, s, nents, i)
 		dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
+
+	__dma_barrier(dir);
 }
 EXPORT_SYMBOL(dma_unmap_sg);
 
@@ -588,6 +600,8 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 		__dma_page_dev_to_cpu(sg_page(s), s->offset,
 				      s->length, dir);
 	}
+
+	__dma_barrier(dir);
 }
 EXPORT_SYMBOL(dma_sync_sg_for_cpu);
 
@@ -612,5 +626,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 		__dma_page_cpu_to_dev(sg_page(s), s->offset,
 				      s->length, dir);
 	}
+
+	__dma_barrier(dir);
 }
 EXPORT_SYMBOL(dma_sync_sg_for_device);
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index d278298..fea33c9 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -271,7 +271,6 @@ arm1020e_dma_inv_range:
 	cmp	r0, r1
 	blo	1b
 #endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -293,7 +292,6 @@ arm1020e_dma_clean_range:
 	cmp	r0, r1
 	blo	1b
 #endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -313,7 +311,6 @@ ENTRY(arm1020e_dma_flush_range)
 	cmp	r0, r1
 	blo	1b
 #endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -340,6 +337,12 @@ ENTRY(arm1020e_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(arm1020e_dma_unmap_area)
 
+ENTRY(arm1020e_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+ENDPROC(arm1020e_dma_barrier)
+
 ENTRY(arm1020e_cache_fns)
 	.long	arm1020e_flush_kern_cache_all
 	.long	arm1020e_flush_user_cache_all
@@ -349,6 +352,7 @@ ENTRY(arm1020e_cache_fns)
 	.long	arm1020e_flush_kern_dcache_area
 	.long	arm1020e_dma_map_area
 	.long	arm1020e_dma_unmap_area
+	.long	arm1020e_dma_barrier
 	.long	arm1020e_dma_flush_range
 
 	.align	5
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index ce13e4a..ba1a7df 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -260,7 +260,6 @@ arm1022_dma_inv_range:
 	cmp	r0, r1
 	blo	1b
 #endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -282,7 +281,6 @@ arm1022_dma_clean_range:
 	cmp	r0, r1
 	blo	1b
 #endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -302,7 +300,6 @@ ENTRY(arm1022_dma_flush_range)
 	cmp	r0, r1
 	blo	1b
 #endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -329,6 +326,12 @@ ENTRY(arm1022_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(arm1022_dma_unmap_area)
 
+ENTRY(arm1022_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+ENDPROC(arm1022_dma_barrier)
+
 ENTRY(arm1022_cache_fns)
 	.long	arm1022_flush_kern_cache_all
 	.long	arm1022_flush_user_cache_all
@@ -338,6 +341,7 @@ ENTRY(arm1022_cache_fns)
 	.long	arm1022_flush_kern_dcache_area
 	.long	arm1022_dma_map_area
 	.long	arm1022_dma_unmap_area
+	.long	arm1022_dma_barrier
 	.long	arm1022_dma_flush_range
 
 	.align	5
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index 636672a..de648f1 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -254,7 +254,6 @@ arm1026_dma_inv_range:
 	cmp	r0, r1
 	blo	1b
 #endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -276,7 +275,6 @@ arm1026_dma_clean_range:
 	cmp	r0, r1
 	blo	1b
 #endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -296,7 +294,6 @@ ENTRY(arm1026_dma_flush_range)
 	cmp	r0, r1
 	blo	1b
 #endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -323,6 +320,12 @@ ENTRY(arm1026_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(arm1026_dma_unmap_area)
 
+ENTRY(arm1026_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+ENDPROC(arm1026_dma_barrier)
+
 ENTRY(arm1026_cache_fns)
 	.long	arm1026_flush_kern_cache_all
 	.long	arm1026_flush_user_cache_all
@@ -332,6 +335,7 @@ ENTRY(arm1026_cache_fns)
 	.long	arm1026_flush_kern_dcache_area
 	.long	arm1026_dma_map_area
 	.long	arm1026_dma_unmap_area
+	.long	arm1026_dma_barrier
 	.long	arm1026_dma_flush_range
 
 	.align	5
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 8be8199..ec74093 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -249,7 +249,6 @@ arm920_dma_inv_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -268,7 +267,6 @@ arm920_dma_clean_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -285,7 +283,6 @@ ENTRY(arm920_dma_flush_range)
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -312,6 +309,12 @@ ENTRY(arm920_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(arm920_dma_unmap_area)
 
+ENTRY(arm920_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+ENDPROC(arm920_dma_barrier)
+
 ENTRY(arm920_cache_fns)
 	.long	arm920_flush_kern_cache_all
 	.long	arm920_flush_user_cache_all
@@ -321,6 +324,7 @@ ENTRY(arm920_cache_fns)
 	.long	arm920_flush_kern_dcache_area
 	.long	arm920_dma_map_area
 	.long	arm920_dma_unmap_area
+	.long	arm920_dma_barrier
 	.long	arm920_dma_flush_range
 
 #endif
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index c0ff8e4..474d4c6 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -251,7 +251,6 @@ arm922_dma_inv_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -270,7 +269,6 @@ arm922_dma_clean_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -287,7 +285,6 @@ ENTRY(arm922_dma_flush_range)
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -314,6 +311,12 @@ ENTRY(arm922_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(arm922_dma_unmap_area)
 
+ENTRY(arm922_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+ENDPROC(arm922_dma_barrier)
+
 ENTRY(arm922_cache_fns)
 	.long	arm922_flush_kern_cache_all
 	.long	arm922_flush_user_cache_all
@@ -323,6 +326,7 @@ ENTRY(arm922_cache_fns)
 	.long	arm922_flush_kern_dcache_area
 	.long	arm922_dma_map_area
 	.long	arm922_dma_unmap_area
+	.long	arm922_dma_barrier
 	.long	arm922_dma_flush_range
 
 #endif
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index 3c6cffe..0336ae3 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -295,7 +295,6 @@ arm925_dma_inv_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -316,7 +315,6 @@ arm925_dma_clean_range:
 	cmp	r0, r1
 	blo	1b
 #endif
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -338,7 +336,6 @@ ENTRY(arm925_dma_flush_range)
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -365,6 +362,12 @@ ENTRY(arm925_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(arm925_dma_unmap_area)
 
+ENTRY(arm925_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+ENDPROC(arm925_dma_barrier)
+
 ENTRY(arm925_cache_fns)
 	.long	arm925_flush_kern_cache_all
 	.long	arm925_flush_user_cache_all
@@ -374,6 +377,7 @@ ENTRY(arm925_cache_fns)
 	.long	arm925_flush_kern_dcache_area
 	.long	arm925_dma_map_area
 	.long	arm925_dma_unmap_area
+	.long	arm925_dma_barrier
 	.long	arm925_dma_flush_range
 
 ENTRY(cpu_arm925_dcache_clean_area)
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 75b707c..473bbe6 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -258,7 +258,6 @@ arm926_dma_inv_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -279,7 +278,6 @@ arm926_dma_clean_range:
 	cmp	r0, r1
 	blo	1b
 #endif
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -301,7 +299,6 @@ ENTRY(arm926_dma_flush_range)
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -328,6 +325,12 @@ ENTRY(arm926_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(arm926_dma_unmap_area)
 
+ENTRY(arm926_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+ENDPROC(arm926_dma_barrier)
+
 ENTRY(arm926_cache_fns)
 	.long	arm926_flush_kern_cache_all
 	.long	arm926_flush_user_cache_all
@@ -337,6 +340,7 @@ ENTRY(arm926_cache_fns)
 	.long	arm926_flush_kern_dcache_area
 	.long	arm926_dma_map_area
 	.long	arm926_dma_unmap_area
+	.long	arm926_dma_barrier
 	.long	arm926_dma_flush_range
 
 ENTRY(cpu_arm926_dcache_clean_area)
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index 1af1657..c44c963 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -180,7 +180,6 @@ arm940_dma_inv_range:
 	bcs	2b				@ entries 63 to 0
 	subs	r1, r1, #1 << 4
 	bcs	1b				@ segments 7 to 0
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -204,7 +203,6 @@ ENTRY(cpu_arm940_dcache_clean_area)
 	subs	r1, r1, #1 << 4
 	bcs	1b				@ segments 7 to 0
 #endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -230,7 +228,6 @@ ENTRY(arm940_dma_flush_range)
 	bcs	2b				@ entries 63 to 0
 	subs	r1, r1, #1 << 4
 	bcs	1b				@ segments 7 to 0
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -257,6 +254,12 @@ ENTRY(arm940_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(arm940_dma_unmap_area)
 
+ENTRY(arm940_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+ENDPROC(arm940_dma_barrier)
+
 ENTRY(arm940_cache_fns)
 	.long	arm940_flush_kern_cache_all
 	.long	arm940_flush_user_cache_all
@@ -266,6 +269,7 @@ ENTRY(arm940_cache_fns)
 	.long	arm940_flush_kern_dcache_area
 	.long	arm940_dma_map_area
 	.long	arm940_dma_unmap_area
+	.long	arm940_dma_barrier
 	.long	arm940_dma_flush_range
 
 	__INIT
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index 1664b6a..11e9ad7 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -227,7 +227,6 @@ arm946_dma_inv_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -248,7 +247,6 @@ arm946_dma_clean_range:
 	cmp	r0, r1
 	blo	1b
 #endif
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -272,7 +270,6 @@ ENTRY(arm946_dma_flush_range)
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -299,6 +296,12 @@ ENTRY(arm946_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(arm946_dma_unmap_area)
 
+ENTRY(arm946_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+ENDPROC(arm946_dma_barrier)
+
 ENTRY(arm946_cache_fns)
 	.long	arm946_flush_kern_cache_all
 	.long	arm946_flush_user_cache_all
@@ -308,6 +311,7 @@ ENTRY(arm946_cache_fns)
 	.long	arm946_flush_kern_dcache_area
 	.long	arm946_dma_map_area
 	.long	arm946_dma_unmap_area
+	.long	arm946_dma_barrier
 	.long	arm946_dma_flush_range
 
 
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 53e6323..50a309e 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -284,7 +284,6 @@ feroceon_dma_inv_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 	.align	5
@@ -320,7 +319,6 @@ feroceon_dma_clean_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 	.align	5
@@ -333,7 +331,6 @@ feroceon_range_dma_clean_range:
 	mcr	p15, 5, r0, c15, c13, 0		@ D clean range start
 	mcr	p15, 5, r1, c15, c13, 1		@ D clean range top
 	msr	cpsr_c, r2			@ restore interrupts
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -351,7 +348,6 @@ ENTRY(feroceon_dma_flush_range)
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 	.align	5
@@ -364,7 +360,6 @@ ENTRY(feroceon_range_dma_flush_range)
 	mcr	p15, 5, r0, c15, c15, 0		@ D clean/inv range start
 	mcr	p15, 5, r1, c15, c15, 1		@ D clean/inv range top
 	msr	cpsr_c, r2			@ restore interrupts
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -405,6 +400,12 @@ ENTRY(feroceon_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(feroceon_dma_unmap_area)
 
+ENTRY(feroceon_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+ENDPROC(feroceon_dma_barrier)
+
 ENTRY(feroceon_cache_fns)
 	.long	feroceon_flush_kern_cache_all
 	.long	feroceon_flush_user_cache_all
@@ -414,6 +415,7 @@ ENTRY(feroceon_cache_fns)
 	.long	feroceon_flush_kern_dcache_area
 	.long	feroceon_dma_map_area
 	.long	feroceon_dma_unmap_area
+	.long	feroceon_dma_barrier
 	.long	feroceon_dma_flush_range
 
 ENTRY(feroceon_range_cache_fns)
@@ -425,6 +427,7 @@ ENTRY(feroceon_range_cache_fns)
 	.long	feroceon_range_flush_kern_dcache_area
 	.long	feroceon_range_dma_map_area
 	.long	feroceon_dma_unmap_area
+	.long	feroceon_dma_barrier
 	.long	feroceon_range_dma_flush_range
 
 	.align	5
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index caa3115..09e8883 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -228,7 +228,6 @@ mohawk_dma_inv_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -247,7 +246,6 @@ mohawk_dma_clean_range:
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -265,7 +263,6 @@ ENTRY(mohawk_dma_flush_range)
 	add	r0, r0, #CACHE_DLINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
 /*
@@ -292,6 +289,12 @@ ENTRY(mohawk_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(mohawk_dma_unmap_area)
 
+ENTRY(mohawk_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+ENDPROC(mohawk_dma_barrier)
+
 ENTRY(mohawk_cache_fns)
 	.long	mohawk_flush_kern_cache_all
 	.long	mohawk_flush_user_cache_all
@@ -301,6 +304,7 @@ ENTRY(mohawk_cache_fns)
 	.long	mohawk_flush_kern_dcache_area
 	.long	mohawk_dma_map_area
 	.long	mohawk_dma_unmap_area
+	.long	mohawk_dma_barrier
 	.long	mohawk_dma_flush_range
 
 ENTRY(cpu_mohawk_dcache_clean_area)
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index 046b3d8..d033ed4 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -267,7 +267,6 @@ xsc3_dma_inv_range:
 	add	r0, r0, #CACHELINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
 	mov	pc, lr
 
 /*
@@ -284,7 +283,6 @@ xsc3_dma_clean_range:
 	add	r0, r0, #CACHELINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
 	mov	pc, lr
 
 /*
@@ -301,7 +299,6 @@ ENTRY(xsc3_dma_flush_range)
 	add	r0, r0, #CACHELINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
 	mov	pc, lr
 
 /*
@@ -328,6 +325,12 @@ ENTRY(xsc3_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(xsc3_dma_unmap_area)
 
+ENTRY(xsc3_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
+	mov	pc, lr
+ENDPROC(xsc3_dma_barrier)
+
 ENTRY(xsc3_cache_fns)
 	.long	xsc3_flush_kern_cache_all
 	.long	xsc3_flush_user_cache_all
@@ -337,6 +340,7 @@ ENTRY(xsc3_cache_fns)
 	.long	xsc3_flush_kern_dcache_area
 	.long	xsc3_dma_map_area
 	.long	xsc3_dma_unmap_area
+	.long	xsc3_dma_barrier
 	.long	xsc3_dma_flush_range
 
 ENTRY(cpu_xsc3_dcache_clean_area)
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 63037e2..e390ae6 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -325,7 +325,6 @@ xscale_dma_inv_range:
 	add	r0, r0, #CACHELINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
 	mov	pc, lr
 
 /*
@@ -342,7 +341,6 @@ xscale_dma_clean_range:
 	add	r0, r0, #CACHELINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
 	mov	pc, lr
 
 /*
@@ -360,7 +358,6 @@ ENTRY(xscale_dma_flush_range)
 	add	r0, r0, #CACHELINESIZE
 	cmp	r0, r1
 	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
 	mov	pc, lr
 
 /*
@@ -400,6 +397,12 @@ ENTRY(xscale_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(xscale_dma_unmap_area)
 
+ENTRY(xscale_dma_barrier)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mov	pc, lr
+ENDPROC(xscsale_dma_barrier)
+
 ENTRY(xscale_cache_fns)
 	.long	xscale_flush_kern_cache_all
 	.long	xscale_flush_user_cache_all
@@ -409,6 +412,7 @@ ENTRY(xscale_cache_fns)
 	.long	xscale_flush_kern_dcache_area
 	.long	xscale_dma_map_area
 	.long	xscale_dma_unmap_area
+	.long	xscale_dma_barrier
 	.long	xscale_dma_flush_range
 
 /*



More information about the linux-arm-kernel mailing list