[RFC] ARM: kernel: io: Optimize memcpy_fromio function.

Pardeep Kumar Singla b45784 at freescale.com
Mon Sep 9 12:20:04 EDT 2013


Currently memcpy_fromio function is copying byte by byte data.
By replacing this function with inline assembly code, it is copying now 32 bytes at one time.
By running two test cases(Tested on mx6qsabresd board),results are following :-

a)First test case  by calling the memcpy_fromio function only once:-
	1. With Optimization it is just taking 6 usec.
	2. Without optimization it is taking 114usec.
b)Second test case by calling the memcpy_fromio function 100000 times.
	1.With Optimization it is just taking .8 sec
	2.Without optimization it is taking 11 sec.

Signed-off-by: Pardeep Kumar Singla <b45784 at freescale.com>
---
 arch/arm/kernel/io.c |   37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/arch/arm/kernel/io.c b/arch/arm/kernel/io.c
index dcd5b4d..3eb8961 100644
--- a/arch/arm/kernel/io.c
+++ b/arch/arm/kernel/io.c
@@ -4,16 +4,39 @@
 
 /*
  * Copy data from IO memory space to "real" memory space.
- * This needs to be optimized.
  */
+void *asmcopy_8w(void *dst, void *src, int blocks);
+asm("						\n\
+	.align  2				\n\
+	.text					\n\
+	.global asmcopy_8w			\n\
+	.type asmcopy_8w, %function		\n\
+asmcopy_8w:					\n\
+	stmfd sp!, {r3-r10, lr}			\n\
+.loop:  ldmia r1!, {r3-r10}			\n\
+	stmia r0!, {r3-r10}			\n\
+	subs r2, r2, #1				\n\
+	bne .loop				\n\
+	ldmfd sp!, {r3-r10, pc}			\n\
+");
+
 void _memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
 {
-	unsigned char *t = to;
-	while (count) {
-		count--;
-		*t = readb(from);
-		t++;
-		from++;
+	unsigned char *dst = (unsigned char *)to;
+	unsigned char *src = (unsigned char *)from;
+	if ((((int)src & 3) == 0) && (((int)dst & 3) == 0) && (count >= 32)) {
+		/* copy big chunks */
+		asmcopy_8w(dst, src, count >> 5);
+		dst += count & (~0x1f);
+		src += count & (~0x1f);
+		count &= 0x1f;
+	}
+
+	/* un-aligned or trailing accesses */
+	while (count--) {
+		*dst = readb(src);
+		dst++;
+		src++;
 	}
 }
 
-- 
1.7.9.5





More information about the linux-arm-kernel mailing list