[PATCH v1 1/1] arm_pmu: acpi: Pre-allocate pmu structures

Mark Rutland mark.rutland at arm.com
Wed Sep 28 08:47:21 PDT 2022


Hi Pierre,

Thanks for this, and sorry for the delayed reply.

On Mon, Sep 12, 2022 at 05:51:04PM +0200, Pierre Gondois wrote:
> On an Ampere Altra,
> Running a preemp_rt kernel based on v5.19-rc3-rt4 on an Ampere Altra
> triggers:
> [   12.642389] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
> [   12.642402] in_atomic(): 0, irqs_disabled(): 128, non_block: 0, pid: 24, name: cpuhp/0
> [   12.642406] preempt_count: 0, expected: 0
> [   12.642409] RCU nest depth: 0, expected: 0
> [   12.642411] 3 locks held by cpuhp/0/24:
> [   12.642414] #0: ffffd8a22c8870d0 (cpu_hotplug_lock){++++}-{0:0}, at: cpuhp_thread_fun (linux/kernel/cpu.c:754)
> [   12.642429] #1: ffffd8a22c887120 (cpuhp_state-up){+.+.}-{0:0}, at: cpuhp_thread_fun (linux/kernel/cpu.c:754)
> [   12.642436] #2: ffff083e7f0d97b8 ((&c->lock)){+.+.}-{3:3}, at: ___slab_alloc (linux/mm/slub.c:2954)
> [   12.642458] irq event stamp: 42
> [   12.642460] hardirqs last enabled at (41): finish_task_switch (linux/./arch/arm64/include/asm/irqflags.h:35)
> [   12.642471] hardirqs last disabled at (42): cpuhp_thread_fun (linux/kernel/cpu.c:776 (discriminator 1))
> [   12.642476] softirqs last enabled at (0): copy_process (linux/./include/linux/lockdep.h:191)
> [   12.642484] softirqs last disabled at (0): 0x0
> [   12.642495] CPU: 0 PID: 24 Comm: cpuhp/0 Tainted: G        W         5.19.0-rc3-rt4-custom-piegon01-rt_0 #142
> [   12.642500] Hardware name: WIWYNN Mt.Jade Server System B81.03001.0005/Mt.Jade Motherboard, BIOS 1.08.20220218 (SCP: 1.08.20220218) 2022/02/18
> [   12.642506] Call trace:
> [   12.642508] dump_backtrace (linux/arch/arm64/kernel/stacktrace.c:200)
> [   12.642514] show_stack (linux/arch/arm64/kernel/stacktrace.c:207)
> [   12.642517] dump_stack_lvl (linux/lib/dump_stack.c:107)
> [   12.642523] dump_stack (linux/lib/dump_stack.c:114)
> [   12.642527] __might_resched (linux/kernel/sched/core.c:9929)
> [   12.642531] rt_spin_lock (linux/kernel/locking/rtmutex.c:1732 (discriminator 4))
> [   12.642536] ___slab_alloc (linux/mm/slub.c:2954)
> [   12.642539] __slab_alloc.isra.0 (linux/mm/slub.c:3116)
> [   12.642543] kmem_cache_alloc_trace (linux/mm/slub.c:3207)
> [   12.642549] __armpmu_alloc (linux/./include/linux/slab.h:600)
> [   12.642558] armpmu_alloc_atomic (linux/drivers/perf/arm_pmu.c:927)
> [   12.642562] arm_pmu_acpi_cpu_starting (linux/drivers/perf/arm_pmu_acpi.c:204)
> [   12.642568] cpuhp_invoke_callback (linux/kernel/cpu.c:192)
> [   12.642571] cpuhp_thread_fun (linux/kernel/cpu.c:777 (discriminator 3))
> [   12.642573] smpboot_thread_fn (linux/kernel/smpboot.c:164 (discriminator 3))
> [   12.642580] kthread (linux/kernel/kthread.c:376)
> [   12.642584] ret_from_fork (linux/arch/arm64/kernel/entry.S:868)
> 
> arm_pmu_acpi_cpu_starting() is called in the STARTING hotplug section,
> which runs with interrupts disabled. To avoid allocating memory and
> sleeping in this function, the pmu structures must be pre-allocated.
> 
> On ACPI systems, the count of PMUs is unknown until CPUs are
> hotplugged, cf:
> commit 0dc1a1851af1 ("arm_pmu: add armpmu_alloc_atomic()")
> 
> At most #PMU_IRQs pmu structures will be used and thus need to be
> pre-allocated.
> In arm_pmu_acpi_cpu_starting() subcalls, after checking the cpuid,
> decide to use or re-use a pre-allocated pmu structure. Thus the
> pre-allocated pmu struct can be seen as a pool.
> When probing, search and free unused pmu structures.

I think in retrospect I was trying to be too clever with
arm_pmu_acpi_cpu_starting() handling boot-time CPUs and late hotplug, and we
can make this simpler by handling the boot-time probing synchronously within 
arm_pmu_acpi_probe(), removing a bunch of state.

I had a go at that, and in testing (in a QEMU TCG VM) atop arm64/for-next/core,
that seems to work (even with a faked-up heterogenous config). I've pushed that
to my `arm_pmu/acpi/rework` branch at:

  https://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git/log/?h=arm_pmu/acpi/rework
  git://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git arm_pmu/acpi/rework

... does that work for you?

Thanks,
Mark.

> 
> Platforms used to test this patch:
> - Juno-r2 (2 clusters: 2 big and 4 little CPUs) with 2 PMUs and
>   one interrupt for each CPU
> - Ampere Altra with 1 PMU and one interrupt for the 160 CPUs
> 
> Link: https://lore.kernel.org/all/20210810134127.1394269-2-valentin.schneider@arm.com
> Reported-by: Valentin Schneider <vschneid at redhat.com>
> Signed-off-by: Pierre Gondois <pierre.gondois at arm.com>
> ---
>  drivers/perf/arm_pmu.c       |  17 +-----
>  drivers/perf/arm_pmu_acpi.c  | 114 ++++++++++++++++++++++++++++++-----
>  include/linux/perf/arm_pmu.h |   1 -
>  3 files changed, 103 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
> index 59d3980b8ca2..731e793dfef5 100644
> --- a/drivers/perf/arm_pmu.c
> +++ b/drivers/perf/arm_pmu.c
> @@ -861,16 +861,16 @@ static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
>  					    &cpu_pmu->node);
>  }
>  
> -static struct arm_pmu *__armpmu_alloc(gfp_t flags)
> +struct arm_pmu *armpmu_alloc(void)
>  {
>  	struct arm_pmu *pmu;
>  	int cpu;
>  
> -	pmu = kzalloc(sizeof(*pmu), flags);
> +	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
>  	if (!pmu)
>  		goto out;
>  
> -	pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags);
> +	pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, GFP_KERNEL);
>  	if (!pmu->hw_events) {
>  		pr_info("failed to allocate per-cpu PMU data.\n");
>  		goto out_free_pmu;
> @@ -916,17 +916,6 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags)
>  	return NULL;
>  }
>  
> -struct arm_pmu *armpmu_alloc(void)
> -{
> -	return __armpmu_alloc(GFP_KERNEL);
> -}
> -
> -struct arm_pmu *armpmu_alloc_atomic(void)
> -{
> -	return __armpmu_alloc(GFP_ATOMIC);
> -}
> -
> -
>  void armpmu_free(struct arm_pmu *pmu)
>  {
>  	free_percpu(pmu->hw_events);
> diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
> index 96ffadd654ff..599d8be78950 100644
> --- a/drivers/perf/arm_pmu_acpi.c
> +++ b/drivers/perf/arm_pmu_acpi.c
> @@ -17,6 +17,8 @@
>  
>  static DEFINE_PER_CPU(struct arm_pmu *, probed_pmus);
>  static DEFINE_PER_CPU(int, pmu_irqs);
> +static unsigned int preallocated_pmus_count;
> +static struct arm_pmu **preallocated_pmus;
>  
>  static int arm_pmu_acpi_register_irq(int cpu)
>  {
> @@ -187,30 +189,108 @@ static int arm_pmu_acpi_parse_irqs(void)
>  	return err;
>  }
>  
> +/* Count the number of different PMU IRQs for the input PMU. */
> +static int count_pmu_irqs(struct arm_pmu *pmu)
> +{
> +	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
> +	unsigned int num_irqs = 0;
> +	int cpu, probed_cpu, irq;
> +
> +	for_each_cpu(cpu, &pmu->supported_cpus) {
> +		irq = per_cpu(hw_events->irq, cpu);
> +		for_each_cpu(probed_cpu, &pmu->supported_cpus) {
> +			if (irq == per_cpu(hw_events->irq, probed_cpu)) {
> +				if (probed_cpu == cpu)
> +					num_irqs++;
> +				break;
> +			}
> +		}
> +	}
> +
> +	return num_irqs;
> +}
> +
> +/*
> + * Count the number of different PMU IRQs across all the PMUs of the system
> + * to get an upper bound of the number of struct arm_pmu to pre-allocate.
> + */
> +static int count_all_pmu_irqs(void)
> +{
> +	unsigned int num_irqs = 0;
> +	int cpu, probed_cpu, irq;
> +
> +	for_each_possible_cpu(cpu) {
> +		irq = per_cpu(pmu_irqs, cpu);
> +		for_each_possible_cpu(probed_cpu) {
> +			if (irq == per_cpu(pmu_irqs, probed_cpu)) {
> +				if (probed_cpu == cpu)
> +					num_irqs++;
> +				break;
> +			}
> +		}
> +	}
> +
> +	return num_irqs;
> +}
> +
> +static unsigned int pmu_preallocate(void)
> +{
> +	unsigned int i, num_irqs = count_all_pmu_irqs();
> +
> +	preallocated_pmus = kcalloc(num_irqs, sizeof(*preallocated_pmus),
> +					GFP_KERNEL);
> +	if (!preallocated_pmus) {
> +		pr_err("Failed to pre-allocate %d pmu struct\n", num_irqs);
> +		return -ENOMEM;
> +	}
> +
> +	for (i = 0; i < num_irqs; i++) {
> +		preallocated_pmus[i] = armpmu_alloc();
> +		if (!preallocated_pmus[i])
> +			return -ENOMEM;
> +	}
> +
> +	preallocated_pmus_count = num_irqs;
> +	return 0;
> +}
> +
>  static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
>  {
>  	unsigned long cpuid = read_cpuid_id();
>  	struct arm_pmu *pmu;
> -	int cpu;
> +	unsigned int i;
>  
> -	for_each_possible_cpu(cpu) {
> -		pmu = per_cpu(probed_pmus, cpu);
> -		if (!pmu || pmu->acpi_cpuid != cpuid)
> -			continue;
> +	for (i = 0; i < preallocated_pmus_count; i++) {
> +		pmu = preallocated_pmus[i];
>  
> -		return pmu;
> +		if (!pmu->acpi_cpuid) {
> +			pmu->acpi_cpuid = cpuid;
> +			return pmu;
> +		} else if (pmu->acpi_cpuid == cpuid)
> +			return pmu;
>  	}
>  
> -	pmu = armpmu_alloc_atomic();
> -	if (!pmu) {
> -		pr_warn("Unable to allocate PMU for CPU%d\n",
> -			smp_processor_id());
> -		return NULL;
> -	}
> +	pr_err("Unable to find pre-allocated PMU for CPU%d\n",
> +		smp_processor_id());
> +	return NULL;
> +}
>  
> -	pmu->acpi_cpuid = cpuid;
> +static void pmu_free_unused_preallocated(struct arm_pmu *pmu)
> +{
> +	int i, unused_num_irqs = count_pmu_irqs(pmu);
>  
> -	return pmu;
> +	if (unused_num_irqs <= 1)
> +		return;
> +	else if (unused_num_irqs >= preallocated_pmus_count) {
> +		pr_err("Trying to free %d pmu struct when %d are allocated\n",
> +				unused_num_irqs, preallocated_pmus_count);
> +		return;
> +	}
> +
> +	unused_num_irqs--;
> +	for (i = 0; i < unused_num_irqs; i++)
> +		armpmu_free(preallocated_pmus[preallocated_pmus_count - i - 1]);
> +	preallocated_pmus_count -= unused_num_irqs;
>  }
>  
>  /*
> @@ -311,6 +391,8 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
>  		if (!pmu || pmu->name)
>  			continue;
>  
> +		pmu_free_unused_preallocated(pmu);
> +
>  		ret = init_fn(pmu);
>  		if (ret == -ENODEV) {
>  			/* PMU not handled by this driver, or not present */
> @@ -351,6 +433,10 @@ static int arm_pmu_acpi_init(void)
>  	if (ret)
>  		return ret;
>  
> +	ret = pmu_preallocate();
> +	if (ret)
> +		return ret;
> +
>  	ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_ACPI_STARTING,
>  				"perf/arm/pmu_acpi:starting",
>  				arm_pmu_acpi_cpu_starting, NULL);
> diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
> index 0407a38b470a..049908af3595 100644
> --- a/include/linux/perf/arm_pmu.h
> +++ b/include/linux/perf/arm_pmu.h
> @@ -173,7 +173,6 @@ void kvm_host_pmu_init(struct arm_pmu *pmu);
>  
>  /* Internal functions only for core arm_pmu code */
>  struct arm_pmu *armpmu_alloc(void);
> -struct arm_pmu *armpmu_alloc_atomic(void);
>  void armpmu_free(struct arm_pmu *pmu);
>  int armpmu_register(struct arm_pmu *pmu);
>  int armpmu_request_irq(int irq, int cpu);
> -- 
> 2.25.1
> 



More information about the linux-arm-kernel mailing list