[PATCH] perf: Rewrite core context handling
Peter Zijlstra
peterz at infradead.org
Thu Sep 1 04:29:46 PDT 2022
On Thu, Sep 01, 2022 at 04:05:53PM +0530, Ravi Bangoria wrote:
> On 29-Aug-22 8:10 PM, Peter Zijlstra wrote:
> > On Mon, Aug 29, 2022 at 02:04:33PM +0200, Peter Zijlstra wrote:
> >> On Mon, Aug 29, 2022 at 05:03:47PM +0530, Ravi Bangoria wrote:
> >>> @@ -12598,6 +12590,7 @@ EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
> >>>
> >>> void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
> >>> {
> >>> +#if 0 // XXX buggered - cpu hotplug, who cares
> >>> struct perf_event_context *src_ctx;
> >>> struct perf_event_context *dst_ctx;
> >>> struct perf_event *event, *tmp;
> >>> @@ -12658,6 +12651,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
> >>> }
> >>> mutex_unlock(&dst_ctx->mutex);
> >>> mutex_unlock(&src_ctx->mutex);
> >>> +#endif
> >>> }
> >>> EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
> >>>
> >>
> >> Note to self; fix this :-) I'll see if I have time for that later today.
> >
> > Urgh, while going through that it appears the whole refcounting thing
> > isn't fully done either.
>
> Not sure I follow. Can you please elaborate.
Sure, and sorry, I've not been feeling too well the past few days; I
meant to post something, but I just couldn't find it in me to finish it
let alone make it work.
So the basic issue I mentioned is that:
/*
* ,------------------------[1:n]---------------------.
* V V
* perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event
* ^ ^ | |
* `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-'
*
*
* XXX destroy epc when empty
* refcount, !rcu
*
* XXX epc locking
*
* event->pmu_ctx ctx->mutex && inactive
* ctx->pmu_ctx_list ctx->mutex && ctx->lock
*
*/
struct perf_event_pmu_context {
...
atomic_t refcount; /* event <-> epc */
...
}
But that then also suggests that:
struct perf_event_context {
...
refcount_t refcount;
...
}
should be: ctx <-> epc, and that is not so, the ctx::refcount still
counts the number of events.
Now this might not be bad, but it is confusing.
Anywya; below is what I have, but it is entirely unfinished untested
might eat your ganny etc..
---
Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -883,7 +883,7 @@ struct perf_event_context {
int nr_freq;
int rotate_disable;
- refcount_t refcount;
+ refcount_t refcount; /* ctx <-> epc */
struct task_struct *task;
/*
Index: linux-2.6/kernel/events/core.c
===================================================================
--- linux-2.6.orig/kernel/events/core.c
+++ linux-2.6/kernel/events/core.c
@@ -2750,7 +2750,7 @@ perf_install_in_context(struct perf_even
WARN_ON_ONCE(!exclusive_event_installable(event, ctx));
if (event->cpu != -1)
- event->cpu = cpu;
+ WARN_ON_ONCE(event->cpu != cpu);
/*
* Ensures that if we can observe event->ctx, both the event and ctx
@@ -4764,6 +4764,7 @@ find_get_pmu_context(struct pmu *pmu, st
list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
epc->ctx = ctx;
raw_spin_unlock_irq(&ctx->lock);
+ // XXX get_ctx(ctx);
} else {
WARN_ON_ONCE(epc->ctx != ctx);
atomic_inc(&epc->refcount);
@@ -4800,6 +4801,7 @@ find_get_pmu_context(struct pmu *pmu, st
list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
epc->ctx = ctx;
+ // XXX get_ctx(ctx)
found_epc:
if (task_ctx_data && !epc->task_ctx_data) {
@@ -4837,6 +4839,8 @@ static void put_pmu_ctx(struct perf_even
list_del_init(&epc->pmu_ctx_entry);
epc->ctx = NULL;
raw_spin_unlock_irqrestore(&ctx->lock, flags);
+
+ // XXX put_ctx(ctx)
}
WARN_ON_ONCE(!list_empty(&epc->pinned_active));
@@ -12444,6 +12448,8 @@ SYSCALL_DEFINE5(perf_event_open,
perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);
+
+ // XXX put_ctx(ctx);
if (task) {
up_read(&task->signal->exec_update_lock);
@@ -12588,34 +12594,44 @@ err:
}
EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
-void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
+static void __perf_pmu_remove(struct perf_event_context *ctx,
+ int cpu, struct pmu *pmu,
+ struct perf_event_groups *groups,
+ struct list_head *events)
{
-#if 0 // XXX buggered - cpu hotplug, who cares
- struct perf_event_context *src_ctx;
- struct perf_event_context *dst_ctx;
- struct perf_event *event, *tmp;
- LIST_HEAD(events);
+ struct perf_event *event;
- src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
- dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
+ for (event = perf_event_groups_first(groups, cpu, pmu, NULL);
+ event;
+ event = perf_event_groups_next(event, pmu)) {
- /*
- * See perf_event_ctx_lock() for comments on the details
- * of swizzling perf_event::ctx.
- */
- mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
- list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
- event_entry) {
perf_remove_from_context(event, 0);
- unaccount_event_cpu(event, src_cpu);
- put_ctx(src_ctx);
- list_add(&event->migrate_entry, &events);
+ unaccount_event_cpu(event, cpu);
+ put_pmu_ctx(event->pmu_ctx);
+ list_add(&event->migrate_event, events);
}
+}
- /*
- * Wait for the events to quiesce before re-instating them.
- */
- synchronize_rcu();
+static void __perf_pmu_install_event(struct pmu *pmu,
+ struct perf_event_context *ctx,
+ int cpu, struct perf_event *event)
+{
+ struct perf_event_pmu_context *epc;
+
+ event->cpu = cpu;
+ epc = find_get_pmu_context(pmu, ctx, event);
+ event->pmu_ctx = epc;
+
+ if (event->state >= PERF_EVENT_STATE_OFF)
+ event->state = PERF_EVENT_STATE_INACTIVE;
+ account_event_cpu(event, cpu);
+ perf_install_in_context(ctx, event, cpu);
+}
+
+static void __perf_pmu_install(struct perf_event_context *ctx,
+ int cpu, struct pmu *pmu, struct list_head *events)
+{
+ struct perf_event *event, *tmp;
/*
* Re-instate events in 2 passes.
@@ -12625,33 +12641,50 @@ void perf_pmu_migrate_context(struct pmu
* leader will enable its siblings, even if those are still on the old
* context.
*/
- list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
+ list_for_each_entry_safe(event, tmp, events, migrate_entry) {
if (event->group_leader == event)
continue;
list_del(&event->migrate_entry);
- if (event->state >= PERF_EVENT_STATE_OFF)
- event->state = PERF_EVENT_STATE_INACTIVE;
- account_event_cpu(event, dst_cpu);
- perf_install_in_context(dst_ctx, event, dst_cpu);
- get_ctx(dst_ctx);
+ __perf_pmu_install_event(pmu, ctx, cpu, event);
}
/*
* Once all the siblings are setup properly, install the group leaders
* to make it go.
*/
- list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
+ list_for_each_entry_safe(event, tmp, events, migrate_entry) {
list_del(&event->migrate_entry);
- if (event->state >= PERF_EVENT_STATE_OFF)
- event->state = PERF_EVENT_STATE_INACTIVE;
- account_event_cpu(event, dst_cpu);
- perf_install_in_context(dst_ctx, event, dst_cpu);
- get_ctx(dst_ctx);
+ __perf_pmu_install_event(pmu, ctx, cpu, event);
}
+}
+
+void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
+{
+ struct perf_event_context *src_ctx, *dst_ctx;
+ LIST_HEAD(events);
+
+ src_ctx = &per_cpu_ptr(&cpu_context, src_cpu)->ctx;
+ dst_ctx = &per_cpu_ptr(&cpu_context, dst_cpu)->ctx;
+
+ /*
+ * See perf_event_ctx_lock() for comments on the details
+ * of swizzling perf_event::ctx.
+ */
+ mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
+
+ __perf_pmu_remove(src_ctx, src_cpu, pmu, &src_src->pinned_groups, &events);
+ __perf_pmu_remove(src_ctx, src_cpu, pmu, &src_src->flexible_groups, &events);
+
+ /*
+ * Wait for the events to quiesce before re-instating them.
+ */
+ synchronize_rcu();
+
+ __perf_pmu_install(dst_ctx, dst_cpu, pmu, &events);
+
mutex_unlock(&dst_ctx->mutex);
mutex_unlock(&src_ctx->mutex);
-#endif
}
EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
More information about the linux-arm-kernel
mailing list