[PATCH] arm64: clear_page: use stnp non-temporal instruction for performance optimizing
Guanghui Feng
guanghuifeng at linux.alibaba.com
Tue Nov 16 07:08:14 PST 2021
When clear page mem, there is no need to alloc cache for storing these
mem value. And the copy_page.S have used stnp instruction for optimizing.
So I rewrite the clear_page.S with stnp. At the same time, I have tested it
with stnp instruction which will get about twice the performance improvement.
Signed-off-by: Guanghui Feng <guanghuifeng at linux.alibaba.com>
---
arch/arm64/lib/clear_page.S | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
index b84b179..e9dc2d6 100644
--- a/arch/arm64/lib/clear_page.S
+++ b/arch/arm64/lib/clear_page.S
@@ -15,13 +15,18 @@
* x0 - dest
*/
SYM_FUNC_START_PI(clear_page)
- mrs x1, dczid_el0
- and w1, w1, #0xf
- mov x2, #4
- lsl x1, x2, x1
-
-1: dc zva, x0
- add x0, x0, x1
+ mov x1, #0
+ mov x2, #0
+1:
+ stnp x1, x2, [x0]
+ stnp x1, x2, [x0, #16]
+ stnp x1, x2, [x0, #32]
+ stnp x1, x2, [x0, #48]
+ stnp x1, x2, [x0, #64]
+ stnp x1, x2, [x0, #80]
+ stnp x1, x2, [x0, #96]
+ stnp x1, x2, [x0, #112]
+ add x0, x0, #128
tst x0, #(PAGE_SIZE - 1)
b.ne 1b
ret
--
1.8.3.1
More information about the linux-arm-kernel
mailing list