Many embedded platforms use the clocksource which is used for timekeeping as their sched_clock as well. Though they need to take care about the wraparounds as sched_clock() wants to be monotonic increasing nsescs. The timekeeping code and the NOHZ core take care of the timekeeping clocksource wraparound so we can simply use the timekeeping clocksource if possible. clocksources which can be used by the clksrc sched_clock must have the flag CLOCK_SOURCE_SCHED_CLOCK set. This works with the default (weak) sched_clock implementation but the clksrc based function is also provided for architectures where multi platform kernels might override it. If there is no CLOCK_SOURCE_SCHED_CLOCK available, then the function returns the jiffies based sched clock. Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 1 include/linux/sched.h | 1 kernel/sched_clock.c | 12 ---- kernel/time/Kconfig | 4 + kernel/time/timekeeping.c | 125 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 131 insertions(+), 12 deletions(-) Index: linux-2.6-tip/include/linux/clocksource.h =================================================================== --- linux-2.6-tip.orig/include/linux/clocksource.h +++ linux-2.6-tip/include/linux/clocksource.h @@ -205,6 +205,7 @@ struct clocksource { #define CLOCK_SOURCE_WATCHDOG 0x10 #define CLOCK_SOURCE_VALID_FOR_HRES 0x20 #define CLOCK_SOURCE_UNSTABLE 0x40 +#define CLOCK_SOURCE_SCHED_CLOCK 0x80 /* simplify initialization of mask field */ #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) Index: linux-2.6-tip/include/linux/sched.h =================================================================== --- linux-2.6-tip.orig/include/linux/sched.h +++ linux-2.6-tip/include/linux/sched.h @@ -1836,6 +1836,7 @@ static inline int set_cpus_allowed(struc * Please use one of the three interfaces below. */ extern unsigned long long notrace sched_clock(void); +extern unsigned long long notrace sched_clock_clksrc(void); /* * See the comment in kernel/sched_clock.c */ Index: linux-2.6-tip/kernel/sched_clock.c =================================================================== --- linux-2.6-tip.orig/kernel/sched_clock.c +++ linux-2.6-tip/kernel/sched_clock.c @@ -67,18 +67,6 @@ #include #include -/* - * Scheduler clock - returns current time in nanosec units. - * This is default implementation. - * Architectures and sub-architectures can override this. - */ -unsigned long long __attribute__((weak)) sched_clock(void) -{ - return (unsigned long long)(jiffies - INITIAL_JIFFIES) - * (NSEC_PER_SEC / HZ); -} -EXPORT_SYMBOL_GPL(sched_clock); - __read_mostly int sched_clock_running; #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK Index: linux-2.6-tip/kernel/time/Kconfig =================================================================== --- linux-2.6-tip.orig/kernel/time/Kconfig +++ linux-2.6-tip/kernel/time/Kconfig @@ -27,3 +27,7 @@ config GENERIC_CLOCKEVENTS_BUILD default y depends on GENERIC_CLOCKEVENTS || GENERIC_CLOCKEVENTS_MIGR +# Selectable by architectures which want to reuse the clocksource as +# sched_clock +config HAVE_CLKSRC_SCHED_CLOCK + bool \ No newline at end of file Index: linux-2.6-tip/kernel/time/timekeeping.c =================================================================== --- linux-2.6-tip.orig/kernel/time/timekeeping.c +++ linux-2.6-tip/kernel/time/timekeeping.c @@ -21,6 +21,9 @@ #include #include +static void notrace sched_clock_clksrc_install(struct clocksource *clock); +static void notrace sched_clock_clksrc_update(void); + /* Structure holding internal timekeeping values. */ struct timekeeper { /* Current clocksource used for timekeeping. */ @@ -66,6 +69,9 @@ static void timekeeper_setup_internals(s cycle_t interval; u64 tmp, ntpinterval; + if (clock->flags & CLOCK_SOURCE_SCHED_CLOCK) + sched_clock_clksrc_install(clock); + timekeeper.clock = clock; clock->cycle_last = clock->read(clock); @@ -1025,6 +1031,7 @@ void do_timer(unsigned long ticks) { jiffies_64 += ticks; update_wall_time(); + sched_clock_clksrc_update(); calc_global_load(ticks); } @@ -1060,3 +1067,121 @@ void xtime_update(unsigned long ticks) do_timer(ticks); write_sequnlock(&xtime_lock); } + +/** + * struct sched_clksrc - clocksource based sched_clock + * @clock: Pointer to the clocksource + * @nsecs: Nanoseconds base value + * @seqcnt: Sequence counter for sched_clock + * @last_update: Counter value at last update + * @mult: Multiplier for nsec conversion + * @shift: Shift value (divisor) for nsec conversion + * @mask: Mask for the delta + * @update_cycles: Cycles after which we update nsecs and last_update + * @update_nsesc: Nanoseconds value corresponding to @update_cycles + */ +struct sched_clksrc { + struct clocksource *clock; + u64 nsecs; + struct seqcount seqcnt; + u64 last_update; + u32 mult; + u32 shift; + u64 mask; + u64 update_cycles; + u64 update_nsecs; +}; + +static struct sched_clksrc sched_clksrc; + +/* + * Called from clocksource code when a clocksource usable for + * sched_clock is installed. + */ +static void notrace sched_clock_clksrc_install(struct clocksource *clock) +{ + u64 nsecs, cyc = clock->mask; + + if (sched_clksrc.clock) + return; + + /* Make sure we get the wraparounds */ + cyc >>= 2; + + /* Use the raw mult/shift values */ + sched_clksrc.mult = clock->mult; + sched_clksrc.shift = clock->shift; + sched_clksrc.mask = clock->mask; + sched_clksrc.update_cycles = cyc; + nsecs = clocksource_cyc2ns(cyc, sched_clksrc.mult, sched_clksrc.shift); + sched_clksrc.update_nsecs = nsecs; + /* Establish the base line */ + sched_clksrc.nsecs = (u64)(jiffies - INITIAL_JIFFIES) * + (NSEC_PER_SEC / HZ); + sched_clksrc.last_update = clock->read(clock) & sched_clksrc.mask; + sched_clksrc.clock = clock; +} + +/* + * Called from timekeeping code with xtime lock held and interrupts + * disabled, so we have only one updater at a time. Note that readers + * of sched_clock are _NOT_ affected by xtime_lock. We have our own + * sequence counter for sched_clksrc. + */ +static void notrace sched_clock_clksrc_update(void) +{ + struct clocksource *clock = sched_clksrc.clock; + u64 delta; + + if (!clock) + return; + + delta = clock->read(clock) - sched_clksrc.last_update; + delta &= sched_clksrc.mask; + while (delta >= sched_clksrc.update_cycles) { + delta -= sched_clksrc.update_cycles; + write_seqcount_begin(&sched_clksrc.seqcnt); + sched_clksrc.last_update += sched_clksrc.update_cycles; + sched_clksrc.nsecs += sched_clksrc.update_nsecs; + write_seqcount_end(&sched_clksrc.seqcnt); + } +} + +/* + * Scheduler clock clocksource based - returns current time in nanosec units. + * + * Can be called from the default implementation below or from + * architecture code if it overrides the default implementation. + */ +unsigned long long notrace sched_clock_clksrc(void) +{ + struct clocksource *clock = sched_clksrc.clock; + unsigned int seq; + u64 nsecs, last, delta; + + if (!sched_clksrc.clock) + return (unsigned long long)(jiffies - INITIAL_JIFFIES) * + (NSEC_PER_SEC / HZ); + + do { + seq = read_seqcount_begin(&sched_clksrc.seqcnt); + last = sched_clksrc.last_update; + nsecs = sched_clksrc.nsecs; + } while (read_seqcount_retry(&sched_clksrc.seqcnt, seq)); + + delta = (clock->read(clock) - last) & sched_clksrc.mask; + + return nsecs + clocksource_cyc2ns(delta, sched_clksrc.mult, + sched_clksrc.shift); +} + +/* + * Scheduler clock - returns current time in nanosec units. + * This is default implementation. + * Architectures and sub-architectures can override this. + */ +unsigned long long __attribute__((weak)) sched_clock(void) +{ + return sched_clock_clksrc(); +} +EXPORT_SYMBOL_GPL(sched_clock);