arm64 torture test hotplug failures (offlining causes -EBUSY)

Joel Fernandes joel at joelfernandes.org
Thu Jan 19 12:25:22 PST 2023


On Thu, Jan 19, 2023 at 02:57:59PM +0100, Frederic Weisbecker wrote:
> On Wed, Jan 18, 2023 at 10:37:08PM +0000, Joel Fernandes wrote:
> > 
> > That's a great idea. I found a way to do that without having to do the
> > EXPORT_SYMBOL (like in Zhouyi's patch).
> > 
> > Would the following be acceptable (only build-tested)?
> > 
> > I can run more tests and submit a patch:
> > 
> > diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
> > index 55405ebf23ab..f73bc520b70e 100644
> > --- a/drivers/base/cpu.c
> > +++ b/drivers/base/cpu.c
> > @@ -487,7 +487,8 @@ static const struct attribute_group *cpu_root_attr_groups[] = {
> >  bool cpu_is_hotpluggable(unsigned int cpu)
> >  {
> >  	struct device *dev = get_cpu_device(cpu);
> > -	return dev && container_of(dev, struct cpu, dev)->hotpluggable;
> > +	return dev && container_of(dev, struct cpu, dev)->hotpluggable
> > +		&& !tick_nohz_cpu_hotpluggable(cpu);
> >  }
> >  EXPORT_SYMBOL_GPL(cpu_is_hotpluggable);
> >  
> > diff --git a/include/linux/tick.h b/include/linux/tick.h
> > index bfd571f18cfd..9459fef5b857 100644
> > --- a/include/linux/tick.h
> > +++ b/include/linux/tick.h
> > @@ -216,6 +216,7 @@ extern void tick_nohz_dep_set_signal(struct task_struct *tsk,
> >  				     enum tick_dep_bits bit);
> >  extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
> >  				       enum tick_dep_bits bit);
> > +extern bool tick_nohz_cpu_hotpluggable(unsigned int cpu);
> >  
> >  /*
> >   * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
> > @@ -280,6 +281,7 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
> >  
> >  static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
> >  static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
> > +static inline bool tick_nohz_cpu_hotpluggable(unsigned int cpu) { return true; }
> >  
> >  static inline void tick_dep_set(enum tick_dep_bits bit) { }
> >  static inline void tick_dep_clear(enum tick_dep_bits bit) { }
> > diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> > index 9c6f661fb436..d1cc7525240e 100644
> > --- a/kernel/time/tick-sched.c
> > +++ b/kernel/time/tick-sched.c
> > @@ -522,6 +522,11 @@ static int tick_nohz_cpu_down(unsigned int cpu)
> >  	return 0;
> >  }
> >  
> > +bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
> > +{
> > +	return tick_nohz_cpu_down(cpu) == 0;
> > +}
> > +
> 
> Can you make it the opposite? Have tick_nohz_cpu_down() call
> tick_nohz_cpu_hotpluggable()? To avoid future accidents.
> 
> Thanks.

You mean move the logic of tick_nohz_cpu_down() into
tick_nohz_cpu_hotpluggable()? That wont work because
tick_nohz_cpu_hotpluggable() returns a boolean, while tick_nohz_cpu_down(cpu)
returns an integer.

I could do something like the following and that should prevent the accident
you mentioned, which I think is that someone accidentally adds some code with
side-effects to tick_nohz_cpu_down() and ends up changing the behavior of
tick_nohz_cpu_hotpluggable(). Or was there a different accident you were
referring to?

I will submit a patch like the following, then. Thanks.

---8<-----------------------

diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 4c98849577d4..7af8e33735a3 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -487,7 +487,8 @@ static const struct attribute_group *cpu_root_attr_groups[] = {
 bool cpu_is_hotpluggable(unsigned int cpu)
 {
 	struct device *dev = get_cpu_device(cpu);
-	return dev && container_of(dev, struct cpu, dev)->hotpluggable;
+	return dev && container_of(dev, struct cpu, dev)->hotpluggable
+		&& tick_nohz_cpu_hotpluggable(cpu);
 }
 EXPORT_SYMBOL_GPL(cpu_is_hotpluggable);
 
diff --git a/include/linux/tick.h b/include/linux/tick.h
index bfd571f18cfd..9459fef5b857 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -216,6 +216,7 @@ extern void tick_nohz_dep_set_signal(struct task_struct *tsk,
 				     enum tick_dep_bits bit);
 extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
 				       enum tick_dep_bits bit);
+extern bool tick_nohz_cpu_hotpluggable(unsigned int cpu);
 
 /*
  * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
@@ -280,6 +281,7 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
 
 static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
 static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
+static inline bool tick_nohz_cpu_hotpluggable(unsigned int cpu) { return true; }
 
 static inline void tick_dep_set(enum tick_dep_bits bit) { }
 static inline void tick_dep_clear(enum tick_dep_bits bit) { }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index ba2ac1469d47..6a2e52d5f0d0 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -532,7 +532,7 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask)
 	tick_nohz_full_running = true;
 }
 
-static int tick_nohz_cpu_down(unsigned int cpu)
+static int tick_nohz_cpu_hotplug_ret(unsigned int cpu)
 {
 	/*
 	 * The tick_do_timer_cpu CPU handles housekeeping duty (unbound
@@ -544,6 +544,16 @@ static int tick_nohz_cpu_down(unsigned int cpu)
 	return 0;
 }
 
+static int tick_nohz_cpu_down(unsigned int cpu)
+{
+	return tick_nohz_cpu_hotplug_ret(cpu);
+}
+
+bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
+{
+	return tick_nohz_cpu_hotplug_ret(cpu) == 0;
+}
+
 void __init tick_nohz_init(void)
 {
 	int cpu, ret;
-- 
2.39.0.246.g2a6d74b583-goog




More information about the linux-arm-kernel mailing list