[PATCH] irqbalance: fix irq_info->load miscalculation for cache domain and others

Neil Horman nhorman at tuxdriver.com
Tue Aug 4 07:05:48 PDT 2015


On Tue, Aug 04, 2015 at 07:25:52PM +0900, Seiichi Ikarashi wrote:
> I found miscalculation of irq_info->load for any irqs assigned to
> topo_obj other than CPUs. With the debug option, you can see it as
> follows:
> 
> --
> Package 0:  numa_node is 0 cpu mask is 0000003f (load 1770000000)
> (snip)
>                   Interrupt 102 node_num is -1 (ethernet/459999226:11117) 
>           Interrupt 33 node_num is -1 (legacy/1:0) 
>   Interrupt 0 node_num is -1 (other/1770000000:1) 
>   Interrupt 64 node_num is -1 (other/1:0) 
> --
> 
> Though IRQ0 had just one interruption during the period, its load value
> was wrongly calculated to be 1,770,000,000 nsec.
> This is because compute_irq_branch_load_share() does not take into
> account any irq counts of children and grandchildren, and does mis-
> understand that only IRQ0 has consumed all the (irq + softirq) CPU time.
> 
> To fix the problem, I introduce following two changes:
> 
>  - Add topo_obj->irq_count in order to accumulate interrupts effectively,
>  - Change topo_obj->load from the average of children's load to the sum
>    of them. I believe it would make sense from the topological point of
>    view.
> 
> With the modification, the debug log shows:
> 
> --
> Package 0:  numa_node is 0 cpu mask is 0000003f (load 7340266498)
> (snip)
>                   Interrupt 102 node_num is -1 (ethernet/2459985766:61406) 
>           Interrupt 33 node_num is -1 (legacy/1:0) 
>   Interrupt 64 node_num is -1 (other/1:0) 
>   Interrupt 62 node_num is -1 (other/1:0) 
>   Interrupt 60 node_num is -1 (other/1:0) 
>   Interrupt 58 node_num is -1 (other/1:0) 
>   Interrupt 9 node_num is -1 (other/1:0) 
>   Interrupt 4 node_num is -1 (other/1:0) 
>   Interrupt 0 node_num is -1 (other/20815:1) 
> --
> 
> The load of IRQ0 is now calculated to be 20,815 nsec. It looks more
> accurate.
> 
> Signed-off-by: Seiichi Ikarashi <s.ikarashi at jp.fujitsu.com>
> 
> ---
>  procinterrupts.c |   70 ++++++++++++++++++++++++++++++++++++++---------------
>  types.h          |    1 +
>  2 files changed, 51 insertions(+), 20 deletions(-)
> 
> diff --git a/procinterrupts.c b/procinterrupts.c
> index f4dcdc7..b7fb8ee 100644
> --- a/procinterrupts.c
> +++ b/procinterrupts.c
> @@ -211,13 +211,6 @@ void parse_proc_interrupts(void)
>  }
>  
>  
> -static void accumulate_irq_count(struct irq_info *info, void *data)
> -{
> -	uint64_t *acc = data;
> -
> -	*acc += (info->irq_count - info->last_irq_count);
> -}
> -
>  static void assign_load_slice(struct irq_info *info, void *data)
>  {
>  	uint64_t *load_slice = data;
> @@ -243,36 +236,68 @@ static uint64_t get_parent_branch_irq_count_share(struct topo_obj *d)
>  		total_irq_count /= g_list_length((d->parent)->children);
>  	}
>  
> -	if (g_list_length(d->interrupts) > 0)
> -		for_each_irq(d->interrupts, accumulate_irq_count, &total_irq_count);
> +	total_irq_count += d->irq_count;
>  
>  	return total_irq_count;
>  }
>  
> +static void get_children_branch_irq_count(struct topo_obj *d, void *data)
> +{
> +	uint64_t *total_irq_count = data;
> +
> +	if (g_list_length(d->children) > 0)
> +		for_each_object(d->children, get_children_branch_irq_count, total_irq_count);
> +
> +	*total_irq_count += d->irq_count;
> +}
> +
>  static void compute_irq_branch_load_share(struct topo_obj *d, void *data __attribute__((unused)))
>  {
>  	uint64_t local_irq_counts = 0;
>  	uint64_t load_slice;
> -	int	load_divisor = g_list_length(d->children);
> -
> -	d->load /= (load_divisor ? load_divisor : 1);
>  
>  	if (g_list_length(d->interrupts) > 0) {
>  		local_irq_counts = get_parent_branch_irq_count_share(d);
> +		if (g_list_length(d->children) > 0)
> +			for_each_object(d->children, get_children_branch_irq_count, &local_irq_counts);
>  		load_slice = local_irq_counts ? (d->load / local_irq_counts) : 1;
>  		for_each_irq(d->interrupts, assign_load_slice, &load_slice);
>  	}
>  
> -	if (d->parent)
> -		d->parent->load += d->load;
>  }
>  
> -static void reset_load(struct topo_obj *d, void *data __attribute__((unused)))
> +static void accumulate_irq_count(struct irq_info *info, void *data)
> +{
> +	uint64_t *acc = data;
> +
> +	*acc += (info->irq_count - info->last_irq_count);
> +}
> +
> +static void accumulate_interrupts(struct topo_obj *d, void *data __attribute__((unused)))
> +{
> +	if (g_list_length(d->children) > 0) {
> +		for_each_object(d->children, accumulate_interrupts, NULL);
> +	}
> +
> +	d->irq_count = 0;
> +	if (g_list_length(d->interrupts) > 0)
> +		for_each_irq(d->interrupts, accumulate_irq_count, &(d->irq_count));
> +}
> +
> +static void accumulate_load(struct topo_obj *d, void *data)
>  {
> -	if (d->parent)
> -		reset_load(d->parent, NULL);
> +	uint64_t *load = data;
> +
> +	*load += d->load;
> +}
>  
> -	d->load = 0;
> +static void set_load(struct topo_obj *d, void *data __attribute__((unused)))
> +{
> +	if (g_list_length(d->children) > 0) {
> +		for_each_object(d->children, set_load, NULL);
> +		d->load = 0;
> +		for_each_object(d->children, accumulate_load, &(d->load));
> +	}
>  }
>  
>  void parse_proc_stat(void)
> @@ -347,9 +372,14 @@ void parse_proc_stat(void)
>  	}
>  
>  	/*
> - 	 * Reset the load values for all objects above cpus
> + 	 * Set the load values for all objects above cpus
> + 	 */
> +	for_each_object(numa_nodes, set_load, NULL);
> +
> +	/*
> + 	 * Collect local irq_count on each object
>   	 */
> -	for_each_object(cache_domains, reset_load, NULL);
> +	for_each_object(numa_nodes, accumulate_interrupts, NULL);
>  
>  	/*
>   	 * Now that we have load for each cpu attribute a fair share of the load
> diff --git a/types.h b/types.h
> index 71ce6a2..d022154 100644
> --- a/types.h
> +++ b/types.h
> @@ -46,6 +46,7 @@ enum obj_type_e {
>  struct topo_obj {
>  	uint64_t load;
>  	uint64_t last_load;
> +	uint64_t irq_count;
>  	enum obj_type_e obj_type;
>  	int number;
>  	int powersave_mode;
> 
Applied, thanks
Neil




More information about the irqbalance mailing list