[PATCH] irqbalance: fix irq_info->load miscalculation for cache domain and others
Seiichi Ikarashi
s.ikarashi at jp.fujitsu.com
Tue Aug 4 03:25:52 PDT 2015
I found miscalculation of irq_info->load for any irqs assigned to
topo_obj other than CPUs. With the debug option, you can see it as
follows:
--
Package 0: numa_node is 0 cpu mask is 0000003f (load 1770000000)
(snip)
Interrupt 102 node_num is -1 (ethernet/459999226:11117)
Interrupt 33 node_num is -1 (legacy/1:0)
Interrupt 0 node_num is -1 (other/1770000000:1)
Interrupt 64 node_num is -1 (other/1:0)
--
Though IRQ0 had just one interruption during the period, its load value
was wrongly calculated to be 1,770,000,000 nsec.
This is because compute_irq_branch_load_share() does not take into
account any irq counts of children and grandchildren, and does mis-
understand that only IRQ0 has consumed all the (irq + softirq) CPU time.
To fix the problem, I introduce following two changes:
- Add topo_obj->irq_count in order to accumulate interrupts effectively,
- Change topo_obj->load from the average of children's load to the sum
of them. I believe it would make sense from the topological point of
view.
With the modification, the debug log shows:
--
Package 0: numa_node is 0 cpu mask is 0000003f (load 7340266498)
(snip)
Interrupt 102 node_num is -1 (ethernet/2459985766:61406)
Interrupt 33 node_num is -1 (legacy/1:0)
Interrupt 64 node_num is -1 (other/1:0)
Interrupt 62 node_num is -1 (other/1:0)
Interrupt 60 node_num is -1 (other/1:0)
Interrupt 58 node_num is -1 (other/1:0)
Interrupt 9 node_num is -1 (other/1:0)
Interrupt 4 node_num is -1 (other/1:0)
Interrupt 0 node_num is -1 (other/20815:1)
--
The load of IRQ0 is now calculated to be 20,815 nsec. It looks more
accurate.
Signed-off-by: Seiichi Ikarashi <s.ikarashi at jp.fujitsu.com>
---
procinterrupts.c | 70 ++++++++++++++++++++++++++++++++++++++---------------
types.h | 1 +
2 files changed, 51 insertions(+), 20 deletions(-)
diff --git a/procinterrupts.c b/procinterrupts.c
index f4dcdc7..b7fb8ee 100644
--- a/procinterrupts.c
+++ b/procinterrupts.c
@@ -211,13 +211,6 @@ void parse_proc_interrupts(void)
}
-static void accumulate_irq_count(struct irq_info *info, void *data)
-{
- uint64_t *acc = data;
-
- *acc += (info->irq_count - info->last_irq_count);
-}
-
static void assign_load_slice(struct irq_info *info, void *data)
{
uint64_t *load_slice = data;
@@ -243,36 +236,68 @@ static uint64_t get_parent_branch_irq_count_share(struct topo_obj *d)
total_irq_count /= g_list_length((d->parent)->children);
}
- if (g_list_length(d->interrupts) > 0)
- for_each_irq(d->interrupts, accumulate_irq_count, &total_irq_count);
+ total_irq_count += d->irq_count;
return total_irq_count;
}
+static void get_children_branch_irq_count(struct topo_obj *d, void *data)
+{
+ uint64_t *total_irq_count = data;
+
+ if (g_list_length(d->children) > 0)
+ for_each_object(d->children, get_children_branch_irq_count, total_irq_count);
+
+ *total_irq_count += d->irq_count;
+}
+
static void compute_irq_branch_load_share(struct topo_obj *d, void *data __attribute__((unused)))
{
uint64_t local_irq_counts = 0;
uint64_t load_slice;
- int load_divisor = g_list_length(d->children);
-
- d->load /= (load_divisor ? load_divisor : 1);
if (g_list_length(d->interrupts) > 0) {
local_irq_counts = get_parent_branch_irq_count_share(d);
+ if (g_list_length(d->children) > 0)
+ for_each_object(d->children, get_children_branch_irq_count, &local_irq_counts);
load_slice = local_irq_counts ? (d->load / local_irq_counts) : 1;
for_each_irq(d->interrupts, assign_load_slice, &load_slice);
}
- if (d->parent)
- d->parent->load += d->load;
}
-static void reset_load(struct topo_obj *d, void *data __attribute__((unused)))
+static void accumulate_irq_count(struct irq_info *info, void *data)
+{
+ uint64_t *acc = data;
+
+ *acc += (info->irq_count - info->last_irq_count);
+}
+
+static void accumulate_interrupts(struct topo_obj *d, void *data __attribute__((unused)))
+{
+ if (g_list_length(d->children) > 0) {
+ for_each_object(d->children, accumulate_interrupts, NULL);
+ }
+
+ d->irq_count = 0;
+ if (g_list_length(d->interrupts) > 0)
+ for_each_irq(d->interrupts, accumulate_irq_count, &(d->irq_count));
+}
+
+static void accumulate_load(struct topo_obj *d, void *data)
{
- if (d->parent)
- reset_load(d->parent, NULL);
+ uint64_t *load = data;
+
+ *load += d->load;
+}
- d->load = 0;
+static void set_load(struct topo_obj *d, void *data __attribute__((unused)))
+{
+ if (g_list_length(d->children) > 0) {
+ for_each_object(d->children, set_load, NULL);
+ d->load = 0;
+ for_each_object(d->children, accumulate_load, &(d->load));
+ }
}
void parse_proc_stat(void)
@@ -347,9 +372,14 @@ void parse_proc_stat(void)
}
/*
- * Reset the load values for all objects above cpus
+ * Set the load values for all objects above cpus
+ */
+ for_each_object(numa_nodes, set_load, NULL);
+
+ /*
+ * Collect local irq_count on each object
*/
- for_each_object(cache_domains, reset_load, NULL);
+ for_each_object(numa_nodes, accumulate_interrupts, NULL);
/*
* Now that we have load for each cpu attribute a fair share of the load
diff --git a/types.h b/types.h
index 71ce6a2..d022154 100644
--- a/types.h
+++ b/types.h
@@ -46,6 +46,7 @@ enum obj_type_e {
struct topo_obj {
uint64_t load;
uint64_t last_load;
+ uint64_t irq_count;
enum obj_type_e obj_type;
int number;
int powersave_mode;
More information about the irqbalance
mailing list