[RFC] Shrink sched_clock some more
Marc Zyngier
marc.zyngier at arm.com
Fri Sep 23 04:52:50 EDT 2011
On 22/09/11 16:36, Russell King - ARM Linux wrote:
> ... by getting rid of the fixed-constant optimization, and moving the
> update code into arch/arm/kernel/sched_clock.c.
>
> Platforms now only have to supply a function to read the sched_clock
> register, and some basic information such as the number of significant
> bits and the tick rate.
This looks similar to a patch I posted a while ago:
http://patchwork.ozlabs.org/patch/112318/
> Signed-off-by: Russell King <rmk+kernel at arm.linux.org.uk>
> ---
> arch/arm/include/asm/sched_clock.h | 98 +--------------------------------
> arch/arm/kernel/sched_clock.c | 91 ++++++++++++++++++++++++++++--
> arch/arm/mach-ixp4xx/common.c | 15 +----
> arch/arm/mach-mmp/time.c | 15 +----
> arch/arm/mach-omap1/time.c | 27 +--------
> arch/arm/mach-omap2/timer.c | 21 +------
> arch/arm/mach-pxa/time.c | 23 +-------
> arch/arm/mach-sa1100/time.c | 27 +--------
> arch/arm/mach-tegra/timer.c | 23 +-------
> arch/arm/mach-u300/timer.c | 22 +------
> arch/arm/plat-iop/time.c | 15 +----
> arch/arm/plat-mxc/time.c | 15 +----
> arch/arm/plat-nomadik/timer.c | 25 +-------
> arch/arm/plat-omap/counter_32k.c | 39 +------------
> arch/arm/plat-orion/time.c | 16 +----
> arch/arm/plat-s5p/s5p-time.c | 29 +---------
> arch/arm/plat-versatile/sched-clock.c | 26 +--------
> 17 files changed, 131 insertions(+), 396 deletions(-)
[...]
> diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
> index 9a46370..dfee812 100644
> --- a/arch/arm/kernel/sched_clock.c
> +++ b/arch/arm/kernel/sched_clock.c
> @@ -14,28 +14,107 @@
>
> #include <asm/sched_clock.h>
>
> +struct clock_data {
> + u64 epoch_ns;
> + u32 epoch_cyc;
> + u32 epoch_cyc_copy;
> + u32 mult;
> + u32 shift;
> + u32 mask;
> +};
> +
> static void sched_clock_poll(unsigned long wrap_ticks);
> static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0);
> -static void (*sched_clock_update_fn)(void);
> +static u32 (*sched_clock_read_fn)(void);
> +static struct clock_data sched_clock_data;
> +
> +static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift)
> +{
> + return (cyc * mult) >> shift;
> +}
> +
> +/*
> + * Atomically update the sched_clock epoch. Your update callback will
> + * be called from a timer before the counter wraps - read the current
> + * counter value, and call this function to safely move the epochs
> + * forward. Only use this from the update callback.
> + */
> +static inline void update_sched_clock(struct clock_data *cd, u32 cyc, u32 mask)
> +{
> + unsigned long flags;
> + u64 ns = cd->epoch_ns +
> + cyc_to_ns((cyc - cd->epoch_cyc) & mask, cd->mult, cd->shift);
> +
> + /*
> + * Write epoch_cyc and epoch_ns in a way that the update is
> + * detectable in cyc_to_sched_clock().
> + */
> + raw_local_irq_save(flags);
> + cd->epoch_cyc = cyc;
> + smp_wmb();
> + cd->epoch_ns = ns;
> + smp_wmb();
> + cd->epoch_cyc_copy = cyc;
> + raw_local_irq_restore(flags);
> +}
> +
> +static inline unsigned long long cyc_to_sched_clock(struct clock_data *cd,
> + u32 cyc, u32 mask)
> +{
> + u64 epoch_ns;
> + u32 epoch_cyc;
> +
> + /*
> + * Load the epoch_cyc and epoch_ns atomically. We do this by
> + * ensuring that we always write epoch_cyc, epoch_ns and
> + * epoch_cyc_copy in strict order, and read them in strict order.
> + * If epoch_cyc and epoch_cyc_copy are not equal, then we're in
> + * the middle of an update, and we should repeat the load.
> + */
> + do {
> + epoch_cyc = cd->epoch_cyc;
> + smp_rmb();
> + epoch_ns = cd->epoch_ns;
> + smp_rmb();
> + } while (epoch_cyc != cd->epoch_cyc_copy);
> +
> + return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask,
> + cd->mult, cd->shift);
> +}
>
> static void sched_clock_poll(unsigned long wrap_ticks)
> {
> + struct clock_data *cd = &sched_clock_data;
> mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks));
> - sched_clock_update_fn();
> + update_sched_clock(cd, sched_clock_read_fn(), cd->mask);
> }
>
> -void __init init_sched_clock(struct clock_data *cd, void (*update)(void),
> +unsigned long long notrace sched_clock(void)
> +{
> + struct clock_data *cd = &sched_clock_data;
> + u32 cyc = 0;
> +
> + if (sched_clock_read_fn)
> + cyc = sched_clock_read_fn();
In my patch, I tried to avoid having to test the validity of
sched_clock_read_fn by providing a default jiffy based read function (as
suggested by Nicolas). Could we do something similar here?
It otherwise looks good to me.
M.
--
Jazz is not dead. It just smells funny...
More information about the linux-arm-kernel
mailing list