[PATCH v4 2/2] arm64: use WFE for long delays

Julien Thierry julien.thierry at arm.com
Fri Oct 13 06:32:56 PDT 2017


The current delay implementation uses the yield instruction, which is a
hint that it is beneficial to schedule another thread. As this is a hint,
it may be implemented as a NOP, causing all delays to be busy loops. This
is the case for many existing CPUs.

Taking advantage of the generic timer sending periodic events to all
cores, we can use WFE during delays to reduce power consumption. This is
beneficial only for delays longer than the period of the timer event
stream.

If timer event stream is not enabled, delays will behave as yield/busy
loops.

Signed-off-by: Julien Thierry <julien.thierry at arm.com>
Cc: Catalin Marinas <catalin.marinas at arm.com>
Cc: Will Deacon <will.deacon at arm.com>
Cc: Mark Rutland <mark.rutland at arm.com>
---
 arch/arm64/lib/delay.c               | 23 +++++++++++++++++++----
 include/clocksource/arm_arch_timer.h |  4 +++-
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c
index dad4ec9..e48ac40 100644
--- a/arch/arm64/lib/delay.c
+++ b/arch/arm64/lib/delay.c
@@ -24,10 +24,28 @@
 #include <linux/module.h>
 #include <linux/timex.h>

+#include <clocksource/arm_arch_timer.h>
+
+#define USECS_TO_CYCLES(time_usecs)			\
+	xloops_to_cycles((time_usecs) * 0x10C7UL)
+
+static inline unsigned long xloops_to_cycles(unsigned long xloops)
+{
+	return (xloops * loops_per_jiffy * HZ) >> 32;
+}
+
 void __delay(unsigned long cycles)
 {
 	cycles_t start = get_cycles();

+	if (arch_timer_evtstrm_available()) {
+		const cycles_t timer_evt_period =
+			USECS_TO_CYCLES(ARCH_TIMER_EVT_STREAM_PERIOD_US);
+
+		while ((get_cycles() - start + timer_evt_period) < cycles)
+			wfe();
+	}
+
 	while ((get_cycles() - start) < cycles)
 		cpu_relax();
 }
@@ -35,10 +53,7 @@ void __delay(unsigned long cycles)

 inline void __const_udelay(unsigned long xloops)
 {
-	unsigned long loops;
-
-	loops = xloops * loops_per_jiffy * HZ;
-	__delay(loops >> 32);
+	__delay(xloops_to_cycles(xloops));
 }
 EXPORT_SYMBOL(__const_udelay);

diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 4e28283..349e595 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -67,7 +67,9 @@ enum arch_timer_spi_nr {
 #define ARCH_TIMER_USR_VT_ACCESS_EN	(1 << 8) /* virtual timer registers */
 #define ARCH_TIMER_USR_PT_ACCESS_EN	(1 << 9) /* physical timer registers */

-#define ARCH_TIMER_EVT_STREAM_FREQ	10000	/* 100us */
+#define ARCH_TIMER_EVT_STREAM_PERIOD_US	100
+#define ARCH_TIMER_EVT_STREAM_FREQ				\
+	(USEC_PER_SEC / ARCH_TIMER_EVT_STREAM_PERIOD_US)

 struct arch_timer_kvm_info {
 	struct timecounter timecounter;
--
1.9.1



More information about the linux-arm-kernel mailing list