[PATCH 1/2] sched: Fix the irqtime code to deal with u64 wraps

Venkatesh Pallipadi venki at google.com
Wed Dec 15 13:16:25 EST 2010


Peter,

This looks like something that happened while splitting this into two
patches. I needed a trivial change like below before I could apply
these two patches on linus-git.

Thanks,
Venki
---
@@ -641,17 +641,18 @@ static void sched_irq_time_avg_update(struct rq
*rq, u64 irq_time);

 inline void update_rq_clock(struct rq *rq)
 {
-	if (!rq->skip_clock_update) {
-		int cpu = cpu_of(rq);
-		u64 irq_time;
+	int cpu = cpu_of(rq);
+	u64 irq_time;

-		rq->clock = sched_clock_cpu(cpu);
-		irq_time = irq_time_cpu(cpu);
-		if (rq->clock - irq_time > rq->clock_task)
-			rq->clock_task = rq->clock - irq_time;
+	if (rq->skip_clock_update)
+		return;

-		sched_irq_time_avg_update(rq, irq_time);
-	}
+	rq->clock = sched_clock_cpu(cpu);
+	irq_time = irq_time_cpu(cpu);
+	if (rq->clock - irq_time > rq->clock_task)
+		rq->clock_task = rq->clock - irq_time;
+
+	sched_irq_time_avg_update(rq, irq_time);
 }

 /*

---

On Mon, Dec 13, 2010 at 4:13 AM, Peter Zijlstra <peterz at infradead.org> wrote:
> Subject: sched: Fix the irqtime code to deal with u64 wraps
> From: Peter Zijlstra <a.p.zijlstra at chello.nl>
> Date: Thu Dec 09 14:15:34 CET 2010
>
> Some ARM systems have a short sched_clock() [ which needs to be fixed
> too ], but this exposed a bug in the irq_time code as well, it doesn't
> deal with wraps at all.
>
> Fix the irq_time code to deal with u64 wraps by re-writing the code to
> only use delta increments, which avoids the whole issue.
>
> Reviewed-by: Venkatesh Pallipadi <venki at google.com>
> Reported-by: Mikael Pettersson <mikpe at it.uu.se>
> Tested-by: Mikael Pettersson <mikpe at it.uu.se>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra at chello.nl>
> LKML-Reference: <new-submission>
> ---
>  kernel/sched.c |   83 ++++++++++++++++++++++++++++++++++-----------------------
>  1 file changed, 50 insertions(+), 33 deletions(-)
>
> Index: linux-2.6/kernel/sched.c
> ===================================================================
> --- linux-2.6.orig/kernel/sched.c
> +++ linux-2.6/kernel/sched.c
> @@ -636,23 +636,18 @@ static inline struct task_group *task_gr
>
>  #endif /* CONFIG_CGROUP_SCHED */
>
> -static u64 irq_time_cpu(int cpu);
> -static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time);
> +static void update_rq_clock_task(struct rq *rq, s64 delta);
>
> -inline void update_rq_clock(struct rq *rq)
> +static void update_rq_clock(struct rq *rq)
>  {
> -       int cpu = cpu_of(rq);
> -       u64 irq_time;
> +       s64 delta;
>
>        if (rq->skip_clock_update)
>                return;
>
> -       rq->clock = sched_clock_cpu(cpu);
> -       irq_time = irq_time_cpu(cpu);
> -       if (rq->clock - irq_time > rq->clock_task)
> -               rq->clock_task = rq->clock - irq_time;
> -
> -       sched_irq_time_avg_update(rq, irq_time);
> +       delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
> +       rq->clock += delta;
> +       update_rq_clock_task(rq, delta);
>  }
>
>  /*
> @@ -1946,19 +1941,20 @@ void disable_sched_clock_irqtime(void)
>        sched_clock_irqtime = 0;
>  }
>
> -static u64 irq_time_cpu(int cpu)
> +static inline u64 irq_time_cpu(int cpu)
>  {
> -       if (!sched_clock_irqtime)
> -               return 0;
> -
>        return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
>  }
>
> +/*
> + * Called before incrementing preempt_count on {soft,}irq_enter
> + * and before decrementing preempt_count on {soft,}irq_exit.
> + */
>  void account_system_vtime(struct task_struct *curr)
>  {
>        unsigned long flags;
> +       s64 delta;
>        int cpu;
> -       u64 now, delta;
>
>        if (!sched_clock_irqtime)
>                return;
> @@ -1966,9 +1962,9 @@ void account_system_vtime(struct task_st
>        local_irq_save(flags);
>
>        cpu = smp_processor_id();
> -       now = sched_clock_cpu(cpu);
> -       delta = now - per_cpu(irq_start_time, cpu);
> -       per_cpu(irq_start_time, cpu) = now;
> +       delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
> +       __this_cpu_add(irq_start_time, delta);
> +
>        /*
>         * We do not account for softirq time from ksoftirqd here.
>         * We want to continue accounting softirq time to ksoftirqd thread
> @@ -1976,33 +1972,54 @@ void account_system_vtime(struct task_st
>         * that do not consume any time, but still wants to run.
>         */
>        if (hardirq_count())
> -               per_cpu(cpu_hardirq_time, cpu) += delta;
> +               __this_cpu_add(cpu_hardirq_time, delta);
>        else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
> -               per_cpu(cpu_softirq_time, cpu) += delta;
> +               __this_cpu_add(cpu_softirq_time, delta);
>
>        local_irq_restore(flags);
>  }
>  EXPORT_SYMBOL_GPL(account_system_vtime);
>
> -static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time)
> +static void update_rq_clock_task(struct rq *rq, s64 delta)
>  {
> -       if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) {
> -               u64 delta_irq = curr_irq_time - rq->prev_irq_time;
> -               rq->prev_irq_time = curr_irq_time;
> -               sched_rt_avg_update(rq, delta_irq);
> -       }
> +       s64 irq_delta;
> +
> +       irq_delta = irq_time_cpu(cpu_of(rq)) - rq->prev_irq_time;
> +
> +       /*
> +        * Since irq_time is only updated on {soft,}irq_exit, we might run into
> +        * this case when a previous update_rq_clock() happened inside a
> +        * {soft,}irq region.
> +        *
> +        * When this happens, we stop ->clock_task and only update the
> +        * prev_irq_time stamp to account for the part that fit, so that a next
> +        * update will consume the rest. This ensures ->clock_task is
> +        * monotonic.
> +        *
> +        * It does however cause some slight miss-attribution of {soft,}irq
> +        * time, a more accurate solution would be to update the irq_time using
> +        * the current rq->clock timestamp, except that would require using
> +        * atomic ops.
> +        */
> +       if (irq_delta > delta)
> +               irq_delta = delta;
> +
> +       rq->prev_irq_time += irq_delta;
> +       delta -= irq_delta;
> +       rq->clock_task += delta;
> +
> +       if (irq_delta && sched_feat(NONIRQ_POWER))
> +               sched_rt_avg_update(rq, irq_delta);
>  }
>
> -#else
> +#else /* CONFIG_IRQ_TIME_ACCOUNTING */
>
> -static u64 irq_time_cpu(int cpu)
> +static void update_rq_clock_task(struct rq *rq, s64 delta)
>  {
> -       return 0;
> +       rq->clock_task += delta;
>  }
>
> -static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { }
> -
> -#endif
> +#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
>
>  #include "sched_idletask.c"
>  #include "sched_fair.c"
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>



More information about the linux-arm-kernel mailing list