[PATCH 4/5] arm64: add smp_vcond_load_relaxed()
Haris Okanovic
harisokn at amazon.com
Tue Nov 5 10:30:40 PST 2024
Implement smp_vcond_load_relaxed() atop __delay_until_ul() on arm64,
to reduce number of busy loops while waiting for a value condition.
This implementation only support unsigned long words. It can be extended
via the enclosed case structure in barrier.h as needed.
Signed-off-by: Haris Okanovic <harisokn at amazon.com>
---
arch/arm64/include/asm/barrier.h | 18 ++++++++++++++++++
arch/arm64/lib/delay.c | 16 ++++++++++++++++
2 files changed, 34 insertions(+)
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 1ca947d5c939..188327e3ce72 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -203,6 +203,24 @@ do { \
(typeof(*ptr))VAL; \
})
+extern unsigned long __smp_vcond_load_relaxed_ul(
+ u64 nsecs, unsigned long* addr, unsigned long mask, unsigned long val);
+
+#define smp_vcond_load_relaxed(nsecs, addr, mask, val) ({ \
+ u64 __nsecs = (nsecs); \
+ typeof(addr) __addr = (addr); \
+ typeof(*__addr) __mask = (mask); \
+ typeof(*__addr) __val = (val); \
+ typeof(*__addr) __cur; \
+ switch (sizeof(*__addr)) { \
+ case sizeof(unsigned long): \
+ __cur = __smp_vcond_load_relaxed_ul( \
+ __nsecs, __addr, __mask, __val); \
+ break; \
+ } \
+ (__cur); \
+})
+
#define smp_cond_load_acquire(ptr, cond_expr) \
({ \
typeof(ptr) __PTR = (ptr); \
diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c
index a7c3040af316..a61a13b04439 100644
--- a/arch/arm64/lib/delay.c
+++ b/arch/arm64/lib/delay.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/timex.h>
+#include <linux/sched/clock.h>
#include <clocksource/arm_arch_timer.h>
#include <asm/readex.h>
@@ -97,3 +98,18 @@ void __ndelay(unsigned long nsecs)
__delay(NSECS_TO_CYCLES(nsecs));
}
EXPORT_SYMBOL(__ndelay);
+
+unsigned long __smp_vcond_load_relaxed_ul(
+ u64 nsecs, unsigned long* addr, unsigned long mask, unsigned long val)
+{
+ const u64 start = local_clock_noinstr();
+ const u64 cycles = NSECS_TO_CYCLES(nsecs);
+ unsigned long cur;
+
+ do {
+ cur = __delay_until_ul(cycles, addr, mask, val);
+ } while((cur & mask) != val && local_clock_noinstr() - start < nsecs);
+
+ return cur;
+}
+EXPORT_SYMBOL(__smp_vcond_load_relaxed_ul);
--
2.34.1
More information about the linux-arm-kernel
mailing list