[PATCH v3 1/2] nmi_backtrace: Allow excluding an arbitrary CPU

Michal Hocko mhocko at suse.com
Fri Aug 4 00:50:32 PDT 2023


On Thu 03-08-23 16:07:57, Douglas Anderson wrote:
> The APIs that allow backtracing across CPUs have always had a way to
> exclude the current CPU. This convenience means callers didn't need to
> find a place to allocate a CPU mask just to handle the common case.
> 
> Let's extend the API to take a CPU ID to exclude instead of just a
> boolean. This isn't any more complex for the API to handle and allows
> the hardlockup detector to exclude a different CPU (the one it already
> did a trace for) without needing to find space for a CPU mask.
> 
> Arguably, this new API also encourages safer behavior. Specifically if
> the caller wants to avoid tracing the current CPU (maybe because they
> already traced the current CPU) this makes it more obvious to the
> caller that they need to make sure that the current CPU ID can't
> change.

Yes, this looks like the best way forward.

It would have been slightly safer to modify arch_trigger_cpumask_backtrace
by switching arguments so that some leftovers are captured easier.

You also have this leftover
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 00982b133dc1..9f1743ee2b28 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -190,10 +190,6 @@ static inline bool trigger_all_cpu_backtrace(void)
 {
 	return false;
 }
-static inline bool trigger_allbutself_cpu_backtrace(void)
-{
-	return false;
-}
 static inline bool trigger_cpumask_backtrace(struct cpumask *mask)
 {
 	return false;

> Signed-off-by: Douglas Anderson <dianders at chromium.org>

Anyway
Acked-by: Michal Hocko <mhocko at suse.com>

> ---
> 
> Changes in v3:
> - ("nmi_backtrace: Allow excluding an arbitrary CPU") new for v3.
> 
>  arch/arm/include/asm/irq.h       |  2 +-
>  arch/arm/kernel/smp.c            |  4 ++--
>  arch/loongarch/include/asm/irq.h |  2 +-
>  arch/loongarch/kernel/process.c  |  4 ++--
>  arch/mips/include/asm/irq.h      |  2 +-
>  arch/mips/kernel/process.c       |  4 ++--
>  arch/powerpc/include/asm/irq.h   |  2 +-
>  arch/powerpc/kernel/stacktrace.c |  4 ++--
>  arch/powerpc/kernel/watchdog.c   |  4 ++--
>  arch/sparc/include/asm/irq_64.h  |  2 +-
>  arch/sparc/kernel/process_64.c   |  6 +++---
>  arch/x86/include/asm/irq.h       |  2 +-
>  arch/x86/kernel/apic/hw_nmi.c    |  4 ++--
>  include/linux/nmi.h              | 12 ++++++------
>  kernel/watchdog.c                |  2 +-
>  lib/nmi_backtrace.c              |  6 +++---
>  16 files changed, 31 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h
> index 18605f1b3580..26c1d2ced4ce 100644
> --- a/arch/arm/include/asm/irq.h
> +++ b/arch/arm/include/asm/irq.h
> @@ -32,7 +32,7 @@ void handle_IRQ(unsigned int, struct pt_regs *);
>  #include <linux/cpumask.h>
>  
>  extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
> -					   bool exclude_self);
> +					   int exclude_cpu);
>  #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
>  #endif
>  
> diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
> index 6756203e45f3..3431c0553f45 100644
> --- a/arch/arm/kernel/smp.c
> +++ b/arch/arm/kernel/smp.c
> @@ -846,7 +846,7 @@ static void raise_nmi(cpumask_t *mask)
>  	__ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask);
>  }
>  
> -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
>  {
> -	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_nmi);
> +	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_nmi);
>  }
> diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
> index a115e8999c69..218b4da0ea90 100644
> --- a/arch/loongarch/include/asm/irq.h
> +++ b/arch/loongarch/include/asm/irq.h
> @@ -40,7 +40,7 @@ void spurious_interrupt(void);
>  #define NR_IRQS_LEGACY 16
>  
>  #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
> -void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self);
> +void arch_trigger_cpumask_backtrace(const struct cpumask *mask, int exclude_cpu);
>  
>  #define MAX_IO_PICS 2
>  #define NR_IRQS	(64 + (256 * MAX_IO_PICS))
> diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
> index 2e04eb07abb6..778e8d09953e 100644
> --- a/arch/loongarch/kernel/process.c
> +++ b/arch/loongarch/kernel/process.c
> @@ -345,9 +345,9 @@ static void raise_backtrace(cpumask_t *mask)
>  	}
>  }
>  
> -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
>  {
> -	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace);
> +	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace);
>  }
>  
>  #ifdef CONFIG_64BIT
> diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
> index 75abfa834ab7..3a848e7e69f7 100644
> --- a/arch/mips/include/asm/irq.h
> +++ b/arch/mips/include/asm/irq.h
> @@ -77,7 +77,7 @@ extern int cp0_fdc_irq;
>  extern int get_c0_fdc_int(void);
>  
>  void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
> -				    bool exclude_self);
> +				    int exclude_cpu);
>  #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
>  
>  #endif /* _ASM_IRQ_H */
> diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
> index a3225912c862..5387ed0a5186 100644
> --- a/arch/mips/kernel/process.c
> +++ b/arch/mips/kernel/process.c
> @@ -750,9 +750,9 @@ static void raise_backtrace(cpumask_t *mask)
>  	}
>  }
>  
> -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
>  {
> -	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace);
> +	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace);
>  }
>  
>  int mips_get_process_fp_mode(struct task_struct *task)
> diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
> index f257cacb49a9..ba1a5974e714 100644
> --- a/arch/powerpc/include/asm/irq.h
> +++ b/arch/powerpc/include/asm/irq.h
> @@ -55,7 +55,7 @@ int irq_choose_cpu(const struct cpumask *mask);
>  
>  #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
>  extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
> -					   bool exclude_self);
> +					   int exclude_cpu);
>  #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
>  #endif
>  
> diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
> index 5de8597eaab8..b15f15dcacb5 100644
> --- a/arch/powerpc/kernel/stacktrace.c
> +++ b/arch/powerpc/kernel/stacktrace.c
> @@ -221,8 +221,8 @@ static void raise_backtrace_ipi(cpumask_t *mask)
>  	}
>  }
>  
> -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
>  {
> -	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
> +	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace_ipi);
>  }
>  #endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */
> diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
> index edb2dd1f53eb..8c464a5d8246 100644
> --- a/arch/powerpc/kernel/watchdog.c
> +++ b/arch/powerpc/kernel/watchdog.c
> @@ -245,7 +245,7 @@ static void watchdog_smp_panic(int cpu)
>  			__cpumask_clear_cpu(c, &wd_smp_cpus_ipi);
>  		}
>  	} else {
> -		trigger_allbutself_cpu_backtrace();
> +		trigger_allbutcpu_cpu_backtrace(cpu);
>  		cpumask_clear(&wd_smp_cpus_ipi);
>  	}
>  
> @@ -416,7 +416,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
>  		xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
>  
>  		if (sysctl_hardlockup_all_cpu_backtrace)
> -			trigger_allbutself_cpu_backtrace();
> +			trigger_allbutcpu_cpu_backtrace(cpu);
>  
>  		if (hardlockup_panic)
>  			nmi_panic(regs, "Hard LOCKUP");
> diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
> index b436029f1ced..8c4c0c87f998 100644
> --- a/arch/sparc/include/asm/irq_64.h
> +++ b/arch/sparc/include/asm/irq_64.h
> @@ -87,7 +87,7 @@ static inline unsigned long get_softint(void)
>  }
>  
>  void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
> -				    bool exclude_self);
> +				    int exclude_cpu);
>  #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
>  
>  extern void *hardirq_stack[NR_CPUS];
> diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
> index b51d8fb0ecdc..1ea3f37fa985 100644
> --- a/arch/sparc/kernel/process_64.c
> +++ b/arch/sparc/kernel/process_64.c
> @@ -236,7 +236,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp)
>  	}
>  }
>  
> -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
>  {
>  	struct thread_info *tp = current_thread_info();
>  	struct pt_regs *regs = get_irq_regs();
> @@ -252,7 +252,7 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
>  
>  	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
>  
> -	if (cpumask_test_cpu(this_cpu, mask) && !exclude_self)
> +	if (cpumask_test_cpu(this_cpu, mask) && this_cpu != exclude_cpu)
>  		__global_reg_self(tp, regs, this_cpu);
>  
>  	smp_fetch_global_regs();
> @@ -260,7 +260,7 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
>  	for_each_cpu(cpu, mask) {
>  		struct global_reg_snapshot *gp;
>  
> -		if (exclude_self && cpu == this_cpu)
> +		if (cpu == exclude_cpu)
>  			continue;
>  
>  		gp = &global_cpu_snapshot[cpu].reg;
> diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
> index 29e083b92813..836c170d3087 100644
> --- a/arch/x86/include/asm/irq.h
> +++ b/arch/x86/include/asm/irq.h
> @@ -42,7 +42,7 @@ extern void init_ISA_irqs(void);
>  
>  #ifdef CONFIG_X86_LOCAL_APIC
>  void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
> -				    bool exclude_self);
> +				    int exclude_cpu);
>  
>  #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
>  #endif
> diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
> index 34a992e275ef..d6e01f924299 100644
> --- a/arch/x86/kernel/apic/hw_nmi.c
> +++ b/arch/x86/kernel/apic/hw_nmi.c
> @@ -34,9 +34,9 @@ static void nmi_raise_cpu_backtrace(cpumask_t *mask)
>  	apic->send_IPI_mask(mask, NMI_VECTOR);
>  }
>  
> -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
>  {
> -	nmi_trigger_cpumask_backtrace(mask, exclude_self,
> +	nmi_trigger_cpumask_backtrace(mask, exclude_cpu,
>  				      nmi_raise_cpu_backtrace);
>  }
>  
> diff --git a/include/linux/nmi.h b/include/linux/nmi.h
> index e3e6a64b98e0..00982b133dc1 100644
> --- a/include/linux/nmi.h
> +++ b/include/linux/nmi.h
> @@ -157,31 +157,31 @@ static inline void touch_nmi_watchdog(void)
>  #ifdef arch_trigger_cpumask_backtrace
>  static inline bool trigger_all_cpu_backtrace(void)
>  {
> -	arch_trigger_cpumask_backtrace(cpu_online_mask, false);
> +	arch_trigger_cpumask_backtrace(cpu_online_mask, -1);
>  	return true;
>  }
>  
> -static inline bool trigger_allbutself_cpu_backtrace(void)
> +static inline bool trigger_allbutcpu_cpu_backtrace(int exclude_cpu)
>  {
> -	arch_trigger_cpumask_backtrace(cpu_online_mask, true);
> +	arch_trigger_cpumask_backtrace(cpu_online_mask, exclude_cpu);
>  	return true;
>  }
>  
>  static inline bool trigger_cpumask_backtrace(struct cpumask *mask)
>  {
> -	arch_trigger_cpumask_backtrace(mask, false);
> +	arch_trigger_cpumask_backtrace(mask, -1);
>  	return true;
>  }
>  
>  static inline bool trigger_single_cpu_backtrace(int cpu)
>  {
> -	arch_trigger_cpumask_backtrace(cpumask_of(cpu), false);
> +	arch_trigger_cpumask_backtrace(cpumask_of(cpu), -1);
>  	return true;
>  }
>  
>  /* generic implementation */
>  void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
> -				   bool exclude_self,
> +				   int exclude_cpu,
>  				   void (*raise)(cpumask_t *mask));
>  bool nmi_cpu_backtrace(struct pt_regs *regs);
>  
> diff --git a/kernel/watchdog.c b/kernel/watchdog.c
> index be38276a365f..085d7a78f62f 100644
> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -523,7 +523,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
>  			dump_stack();
>  
>  		if (softlockup_all_cpu_backtrace) {
> -			trigger_allbutself_cpu_backtrace();
> +			trigger_allbutcpu_cpu_backtrace(smp_processor_id());
>  			clear_bit_unlock(0, &soft_lockup_nmi_warn);
>  		}
>  
> diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
> index 5274bbb026d7..33c154264bfe 100644
> --- a/lib/nmi_backtrace.c
> +++ b/lib/nmi_backtrace.c
> @@ -34,7 +34,7 @@ static unsigned long backtrace_flag;
>   * they are passed being updated as a side effect of this call.
>   */
>  void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
> -				   bool exclude_self,
> +				   int exclude_cpu,
>  				   void (*raise)(cpumask_t *mask))
>  {
>  	int i, this_cpu = get_cpu();
> @@ -49,8 +49,8 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
>  	}
>  
>  	cpumask_copy(to_cpumask(backtrace_mask), mask);
> -	if (exclude_self)
> -		cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
> +	if (exclude_cpu != -1)
> +		cpumask_clear_cpu(exclude_cpu, to_cpumask(backtrace_mask));
>  
>  	/*
>  	 * Don't try to send an NMI to this cpu; it may work on some
> -- 
> 2.41.0.585.gd2178a4bd4-goog

-- 
Michal Hocko
SUSE Labs



More information about the linux-arm-kernel mailing list