[PATCH] perf: Rewrite core context handling

Peter Zijlstra peterz at infradead.org
Mon Oct 10 03:23:12 PDT 2022


On Tue, Sep 06, 2022 at 11:20:53AM +0530, Ravi Bangoria wrote:

> This one was simple enough so I prepared a patch for this. Let
> me know if you see any issues with below diff.

I've extraed this as a separate patch since it's not strictly required
for correctness and the patch is a quite large enough.

> ---
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 820c56c66b26..e0232e0bb74e 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -9807,6 +9807,44 @@ static struct pmu perf_swevent = {
>  
>  #ifdef CONFIG_EVENT_TRACING
>  
> +static void tp_perf_event_destroy(struct perf_event *event)
> +{
> +	perf_trace_destroy(event);
> +}
> +
> +static int perf_tp_event_init(struct perf_event *event)
> +{
> +	int err;
> +
> +	if (event->attr.type != PERF_TYPE_TRACEPOINT)
> +		return -ENOENT;
> +
> +	/*
> +	 * no branch sampling for tracepoint events
> +	 */
> +	if (has_branch_stack(event))
> +		return -EOPNOTSUPP;
> +
> +	err = perf_trace_init(event);
> +	if (err)
> +		return err;
> +
> +	event->destroy = tp_perf_event_destroy;
> +
> +	return 0;
> +}
> +
> +static struct pmu perf_tracepoint = {
> +	.task_ctx_nr	= perf_sw_context,
> +
> +	.event_init	= perf_tp_event_init,
> +	.add		= perf_trace_add,
> +	.del		= perf_trace_del,
> +	.start		= perf_swevent_start,
> +	.stop		= perf_swevent_stop,
> +	.read		= perf_swevent_read,
> +};
> +
>  static int perf_tp_filter_match(struct perf_event *event,
>  				struct perf_sample_data *data)
>  {
> @@ -9856,6 +9894,49 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
>  }
>  EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit);
>  
> +
> +static void __perf_tp_event_target_task(u64 count, void *record,
> +					struct pt_regs *regs,
> +					struct perf_sample_data *data,
> +					struct perf_event *event)
> +{
> +	struct trace_entry *entry = record;
> +
> +	if (event->attr.config != entry->type)
> +		return;
> +	/* Cannot deliver synchronous signal to other task. */
> +	if (event->attr.sigtrap)
> +		return;
> +	if (perf_tp_event_match(event, data, regs))
> +		perf_swevent_event(event, count, data, regs);
> +}
> +
> +static void perf_tp_event_target_task(u64 count, void *record,
> +				      struct pt_regs *regs,
> +				      struct perf_sample_data *data,
> +				      struct perf_event_context *ctx)
> +{
> +	struct perf_event *event, *sibling;
> +
> +	event = perf_event_groups_first(&ctx->pinned_groups, smp_processor_id(),
> +					&perf_tracepoint, NULL);
> +	for (; event; event = perf_event_groups_next(event, &perf_tracepoint)) {
> +		__perf_tp_event_target_task(count, record, regs, data, event);
> +		for_each_sibling_event(sibling, event) {
> +			__perf_tp_event_target_task(count, record, regs, data, sibling);
> +		}
> +	}
> +
> +	event = perf_event_groups_first(&ctx->flexible_groups, smp_processor_id(),
> +					&perf_tracepoint, NULL);
> +	for (; event; event = perf_event_groups_next(event, &perf_tracepoint)) {
> +		__perf_tp_event_target_task(count, record, regs, data, event);
> +		for_each_sibling_event(sibling, event) {
> +			__perf_tp_event_target_task(count, record, regs, data, sibling);
> +		}
> +	}
> +}
> +
>  void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
>  		   struct pt_regs *regs, struct hlist_head *head, int rctx,
>  		   struct task_struct *task)
> @@ -9886,29 +9967,15 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
>  	 */
>  	if (task && task != current) {
>  		struct perf_event_context *ctx;
> -		struct trace_entry *entry = record;
>  
>  		rcu_read_lock();
>  		ctx = rcu_dereference(task->perf_event_ctxp);
>  		if (!ctx)
>  			goto unlock;
>  
> -		// XXX iterate groups instead, we should be able to
> -		// find the subtree for the perf_tracepoint pmu and CPU.
> -
> -		list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
> -			if (event->cpu != smp_processor_id())
> -				continue;
> -			if (event->attr.type != PERF_TYPE_TRACEPOINT)
> -				continue;
> -			if (event->attr.config != entry->type)
> -				continue;
> -			/* Cannot deliver synchronous signal to other task. */
> -			if (event->attr.sigtrap)
> -				continue;
> -			if (perf_tp_event_match(event, &data, regs))
> -				perf_swevent_event(event, count, &data, regs);
> -		}
> +		raw_spin_lock(&ctx->lock);
> +		perf_tp_event_target_task(count, record, regs, &data, ctx);
> +		raw_spin_unlock(&ctx->lock);
>  unlock:
>  		rcu_read_unlock();
>  	}
> @@ -9917,44 +9984,6 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
>  }
>  EXPORT_SYMBOL_GPL(perf_tp_event);
>  
> -static void tp_perf_event_destroy(struct perf_event *event)
> -{
> -	perf_trace_destroy(event);
> -}
> -
> -static int perf_tp_event_init(struct perf_event *event)
> -{
> -	int err;
> -
> -	if (event->attr.type != PERF_TYPE_TRACEPOINT)
> -		return -ENOENT;
> -
> -	/*
> -	 * no branch sampling for tracepoint events
> -	 */
> -	if (has_branch_stack(event))
> -		return -EOPNOTSUPP;
> -
> -	err = perf_trace_init(event);
> -	if (err)
> -		return err;
> -
> -	event->destroy = tp_perf_event_destroy;
> -
> -	return 0;
> -}
> -
> -static struct pmu perf_tracepoint = {
> -	.task_ctx_nr	= perf_sw_context,
> -
> -	.event_init	= perf_tp_event_init,
> -	.add		= perf_trace_add,
> -	.del		= perf_trace_del,
> -	.start		= perf_swevent_start,
> -	.stop		= perf_swevent_stop,
> -	.read		= perf_swevent_read,
> -};
> -
>  #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
>  /*
>   * Flags in config, used by dynamic PMU kprobe and uprobe
> 
> ---



More information about the linux-arm-kernel mailing list