[PATCH 4/4] coresight: Add ETR-PERF polling.

Denis Nikitin denik at chromium.org
Wed May 5 00:21:58 PDT 2021


On Wed, Apr 21, 2021 at 02:04:13PM +0200, Daniel Kiss wrote:
> ETR might fill up the buffer sooner than an event makes perf to trigger
> the synchronisation especially in system wide trace. Polling runs
> periodically to sync the ETR buffer. Period is configurable via sysfs,
> disabled by default.
> 
> Signed-off-by: Daniel Kiss <daniel.kiss at arm.com>
> Signed-off-by: Branislav Rankov <Branislav.Rankov at arm.com>

Tested-by: Denis Nikitin <denik at chromium.org>

Thanks,
Denis

> ---
>  .../testing/sysfs-bus-coresight-devices-tmc   |   8 +
>  drivers/hwtracing/coresight/Makefile          |   2 +-
>  .../hwtracing/coresight/coresight-etm-perf.c  |   8 +
>  .../coresight/coresight-etr-perf-polling.c    | 316 ++++++++++++++++++
>  .../coresight/coresight-etr-perf-polling.h    |  42 +++
>  .../hwtracing/coresight/coresight-tmc-core.c  |   2 +
>  .../hwtracing/coresight/coresight-tmc-etr.c   |   9 +
>  7 files changed, 386 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/hwtracing/coresight/coresight-etr-perf-polling.c
>  create mode 100644 drivers/hwtracing/coresight/coresight-etr-perf-polling.h
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc
> index 6aa527296c710..4ca7af22a3686 100644
> --- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc
> +++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc
> @@ -91,3 +91,11 @@ Contact:	Mathieu Poirier <mathieu.poirier at linaro.org>
>  Description:	(RW) Size of the trace buffer for TMC-ETR when used in SYSFS
>  		mode. Writable only for TMC-ETR configurations. The value
>  		should be aligned to the kernel pagesize.
> +
> +What:		/sys/bus/coresight/devices/<memory_map>.tmc/polling/period
> +Date:		April 2021
> +KernelVersion:	5.13
> +Contact:	Daniel Kiss <daniel.kiss at arm.com>
> +Description:	(RW) Time in milliseconds when the TMC-ETR is synced.
> +		Default value is 0, means the feature is disabled.
> +		Writable only for TMC-ETR configurations.
> diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile
> index d60816509755c..4df90b71d98cd 100644
> --- a/drivers/hwtracing/coresight/Makefile
> +++ b/drivers/hwtracing/coresight/Makefile
> @@ -4,7 +4,7 @@
>  #
>  obj-$(CONFIG_CORESIGHT) += coresight.o
>  coresight-y := coresight-core.o  coresight-etm-perf.o coresight-platform.o \
> -		coresight-sysfs.o
> +		coresight-sysfs.o  coresight-etr-perf-polling.o
>  obj-$(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) += coresight-tmc.o
>  coresight-tmc-y := coresight-tmc-core.o coresight-tmc-etf.o \
>  		      coresight-tmc-etr.o
> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
> index 78a55fc2bcab5..910a99944eea8 100644
> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c
> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
> @@ -19,6 +19,7 @@
>  #include <linux/workqueue.h>
>  
>  #include "coresight-etm-perf.h"
> +#include "coresight-etr-perf-polling.h"
>  #include "coresight-priv.h"
>  
>  static struct pmu etm_pmu;
> @@ -438,6 +439,8 @@ static void etm_event_start(struct perf_event *event, int flags)
>  	/* Tell the perf core the event is alive */
>  	event->hw.state = 0;
>  
> +	etr_perf_polling_event_start(event, event_data, handle);
> +
>  	/* Finally enable the tracer */
>  	if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF))
>  		goto fail_disable_path;
> @@ -497,6 +500,8 @@ static void etm_event_stop(struct perf_event *event, int mode)
>  	if (!sink)
>  		return;
>  
> +	etr_perf_polling_event_stop(event, event_data);
> +
>  	/* stop tracer */
>  	source_ops(csdev)->disable(csdev, event);
>  
> @@ -741,6 +746,8 @@ int __init etm_perf_init(void)
>  	etm_pmu.addr_filters_validate	= etm_addr_filters_validate;
>  	etm_pmu.nr_addr_filters		= ETM_ADDR_CMP_MAX;
>  
> +	etr_perf_polling_init();
> +
>  	ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1);
>  	if (ret == 0)
>  		etm_perf_up = true;
> @@ -750,5 +757,6 @@ int __init etm_perf_init(void)
>  
>  void __exit etm_perf_exit(void)
>  {
> +	etr_perf_polling_exit();
>  	perf_pmu_unregister(&etm_pmu);
>  }
> diff --git a/drivers/hwtracing/coresight/coresight-etr-perf-polling.c b/drivers/hwtracing/coresight/coresight-etr-perf-polling.c
> new file mode 100644
> index 0000000000000..aa0352908873a
> --- /dev/null
> +++ b/drivers/hwtracing/coresight/coresight-etr-perf-polling.c
> @@ -0,0 +1,316 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright(C) 2021 Arm Limited. All rights reserved.
> + * Author: Daniel Kiss <daniel.kiss at arm.com>
> + */
> +
> +#include <linux/coresight.h>
> +#include <linux/coresight-pmu.h>
> +#include <linux/cpumask.h>
> +#include <linux/device.h>
> +#include <linux/init.h>
> +#include <linux/list.h>
> +#include <linux/percpu-defs.h>
> +#include <linux/perf_event.h>
> +#include <linux/slab.h>
> +#include <linux/stringhash.h>
> +#include <linux/types.h>
> +#include <linux/workqueue.h>
> +
> +#include "coresight-etr-perf-polling.h"
> +#include "coresight-priv.h"
> +#include "coresight-tmc.h"
> +
> +#if defined(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) || \
> +    defined(CONFIG_CORESIGHT_LINK_AND_SINK_TMC_MODULE)
> +
> +struct polling_event_list {
> +	struct perf_event *perf_event;
> +	struct etm_event_data *etm_event_data;
> +	struct perf_output_handle *ctx_handle;
> +	void (*tmc_etr_reset_hw)(struct tmc_drvdata *);
> +	struct list_head list;
> +};
> +
> +struct polling {
> +	int cpu;
> +	struct list_head polled_events;
> +	struct delayed_work delayed_work;
> +};
> +
> +static atomic_t period;
> +static spinlock_t spinlock_re;
> +static struct list_head registered_events;
> +
> +static DEFINE_PER_CPU(struct polling, polling);
> +
> +static ssize_t period_show(struct device *dev, struct device_attribute *attr,
> +			   char *buf)
> +{
> +	int temp;
> +	struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
> +
> +	if (drvdata->config_type != TMC_CONFIG_TYPE_ETR)
> +		return -EPERM;
> +
> +	temp = atomic_read(&period);
> +	return sprintf(buf, "%i\n", temp);
> +}
> +
> +static ssize_t period_store(struct device *dev, struct device_attribute *attr,
> +			    const char *buf, size_t count)
> +{
> +	int temp = 0;
> +	struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
> +
> +	if (drvdata->config_type != TMC_CONFIG_TYPE_ETR)
> +		return -EPERM;
> +
> +	if ((1 == sscanf(buf, "%i", &temp)) && (temp >= 0))
> +		atomic_set(&period, temp);
> +	return count;
> +}
> +
> +static DEVICE_ATTR_RW(period);
> +
> +static struct attribute *coresight_tmc_polling_attrs[] = {
> +	&dev_attr_period.attr,
> +	NULL,
> +};
> +const struct attribute_group coresight_tmc_polling_group = {
> +	.attrs = coresight_tmc_polling_attrs,
> +	.name = "polling",
> +};
> +EXPORT_SYMBOL_GPL(coresight_tmc_polling_group);
> +
> +static inline void polling_sched_worker(struct polling *p)
> +{
> +	int tickrate = atomic_read(&period);
> +	if (!list_empty(&p->polled_events) && (tickrate > 0))
> +		schedule_delayed_work_on(p->cpu, &p->delayed_work,
> +					 msecs_to_jiffies(tickrate));
> +}
> +
> +static inline bool is_etr_related(struct etm_event_data *etm_event_data, int cpu)
> +{
> +	struct list_head *path;
> +	struct coresight_device *sink;
> +	struct tmc_drvdata *drvdata;
> +	path = etm_event_cpu_path(etm_event_data, cpu);
> +	if (WARN_ON(!path))
> +		return false;
> +	sink = coresight_get_sink(path);
> +	if (WARN_ON(!sink))
> +		return false;
> +	drvdata = dev_get_drvdata(sink->dev.parent);
> +	if (drvdata->config_type != TMC_CONFIG_TYPE_ETR)
> +		return false;
> +	return true;
> +}
> +
> +/*
> + * Adds the event to the polled events list.
> + */
> +void etr_perf_polling_event_start(struct perf_event *event,
> +				  struct etm_event_data *etm_event_data,
> +				  struct perf_output_handle *ctx_handle)
> +{
> +	int cpu = smp_processor_id();
> +	struct polling *p = per_cpu_ptr(&polling, cpu);
> +	struct polling_event_list *element;
> +	struct list_head *i, *tmp;
> +
> +	if (!is_etr_related(etm_event_data, cpu))
> +		return;
> +
> +	spin_lock(&spinlock_re);
> +	list_for_each_safe (i, tmp, &registered_events) {
> +		element = list_entry(i, struct polling_event_list, list);
> +		if (element->ctx_handle == ctx_handle) {
> +			element->perf_event = event;
> +			element->etm_event_data = etm_event_data;
> +			list_del(&element->list);
> +			spin_unlock(&spinlock_re);
> +			list_add(&element->list, &p->polled_events);
> +			polling_sched_worker(p);
> +			return;
> +		}
> +	}
> +	spin_unlock(&spinlock_re);
> +}
> +EXPORT_SYMBOL_GPL(etr_perf_polling_event_start);
> +
> +/*
> + * Removes the event from the to be polled events list.
> + */
> +void etr_perf_polling_event_stop(struct perf_event *event,
> +				 struct etm_event_data *etm_event_data)
> +{
> +	int cpu = smp_processor_id();
> +	struct list_head *i, *tmp;
> +	struct polling *p = per_cpu_ptr(&polling, cpu);
> +
> +	if (!is_etr_related(etm_event_data, cpu))
> +		return;
> +
> +	list_for_each_safe (i, tmp, &p->polled_events) {
> +		struct polling_event_list *element =
> +			list_entry(i, struct polling_event_list, list);
> +		if (element->perf_event == event) {
> +			list_del(&element->list);
> +			element->perf_event = NULL;
> +			element->etm_event_data = NULL;
> +			spin_lock(&spinlock_re);
> +			list_add(&element->list, &registered_events);
> +			spin_unlock(&spinlock_re);
> +			if (list_empty(&p->polled_events)) {
> +				cancel_delayed_work(&p->delayed_work);
> +			}
> +			return;
> +		}
> +	}
> +}
> +EXPORT_SYMBOL_GPL(etr_perf_polling_event_stop);
> +
> +/*
> + * The polling worker is a workqueue job which is periodically
> + * woken up to update the perf aux buffer from the etr shrink.
> + */
> +static void etr_perf_polling_worker(struct work_struct *work)
> +{
> +	unsigned long flags;
> +	int cpu = smp_processor_id();
> +	struct polling *p = per_cpu_ptr(&polling, cpu);
> +	struct list_head *i, *tmp;
> +
> +	if (!atomic_read(&period))
> +		return;
> +
> +	/*
> +	 * Scheduling would do the same from the perf hooks,
> +	 * this should be done in one go.
> +	 */
> +	local_irq_save(flags);
> +	preempt_disable();
> +	/* Perf requires rcu lock. */
> +	rcu_read_lock();
> +
> +	polling_sched_worker(p);
> +
> +	list_for_each_safe (i, tmp, &p->polled_events) {
> +		struct list_head *path;
> +		struct coresight_device *sink;
> +		struct polling_event_list *element =
> +			list_entry(i, struct polling_event_list, list);
> +
> +		path = etm_event_cpu_path(element->etm_event_data, cpu);
> +		if (WARN_ON(!path))
> +			continue;
> +		sink = coresight_get_sink(path);
> +		if (WARN_ON(!sink))
> +			continue;
> +		if (sink_ops(sink)->update_buffer) {
> +			int size, refcnt;
> +			struct tmc_drvdata *drvdata = dev_get_drvdata(sink->dev.parent);
> +
> +			/*
> +			 * Act as now we are the only users of the sink. Due to the locks
> +			 * we are safe.
> +			 */
> +			refcnt = atomic_xchg(sink->refcnt, 1);
> +			size = sink_ops(sink)->update_buffer(
> +				sink, element->ctx_handle,
> +				element->etm_event_data->snk_config);
> +			refcnt = atomic_xchg(sink->refcnt, refcnt);
> +			/*
> +			 * Restart the trace.
> +			 */
> +			if (element->tmc_etr_reset_hw)
> +				element->tmc_etr_reset_hw(drvdata);
> +
> +			WARN_ON(size < 0);
> +			if (size > 0) {
> +				struct etm_event_data *new_event_data;
> +
> +				perf_aux_output_end(element->ctx_handle, size);
> +				new_event_data = perf_aux_output_begin(
> +					element->ctx_handle,
> +					element->perf_event);
> +				if (WARN_ON(new_event_data == NULL))
> +					continue;
> +				element->etm_event_data = new_event_data;
> +				WARN_ON(new_event_data->snk_config !=
> +					element->etm_event_data->snk_config);
> +			}
> +		}
> +	}
> +
> +	rcu_read_unlock();
> +	preempt_enable();
> +	local_irq_restore(flags);
> +}
> +
> +void etr_perf_polling_handle_register(struct perf_output_handle *handle,
> +				      void (*tmc_etr_reset_hw)(struct tmc_drvdata *drvdata))
> +{
> +	struct polling_event_list *element;
> +
> +	element = kmalloc(sizeof(*element), GFP_KERNEL);
> +	if (WARN_ON(!element))
> +		return;
> +	memset(element, 0, sizeof(*element));
> +	element->ctx_handle = handle;
> +	element->tmc_etr_reset_hw = tmc_etr_reset_hw;
> +	spin_lock(&spinlock_re);
> +	list_add(&element->list, &registered_events);
> +	spin_unlock(&spinlock_re);
> +}
> +EXPORT_SYMBOL_GPL(etr_perf_polling_handle_register);
> +
> +void etr_perf_polling_handle_deregister(struct perf_output_handle *handle)
> +{
> +	struct list_head *i, *tmp;
> +
> +	spin_lock(&spinlock_re);
> +	list_for_each_safe (i, tmp, &registered_events) {
> +		struct polling_event_list *element =
> +			list_entry(i, struct polling_event_list, list);
> +		if (element->ctx_handle == handle) {
> +			list_del(&element->list);
> +			spin_unlock(&spinlock_re);
> +			kfree(element);
> +			return;
> +		}
> +	}
> +	spin_unlock(&spinlock_re);
> +}
> +EXPORT_SYMBOL_GPL(etr_perf_polling_handle_deregister);
> +
> +void etr_perf_polling_init(void)
> +{
> +	int cpu;
> +	spin_lock_init(&spinlock_re);
> +	INIT_LIST_HEAD(&registered_events);
> +	atomic_set(&period, 0);
> +	for_each_possible_cpu (cpu) {
> +		struct polling *p = per_cpu_ptr(&polling, cpu);
> +		p->cpu = cpu;
> +		INIT_LIST_HEAD(&p->polled_events);
> +		INIT_DELAYED_WORK(&p->delayed_work, etr_perf_polling_worker);
> +	}
> +}
> +EXPORT_SYMBOL_GPL(etr_perf_polling_init);
> +
> +void etr_perf_polling_exit(void)
> +{
> +	int cpu;
> +	for_each_possible_cpu (cpu) {
> +		struct polling *p = per_cpu_ptr(&polling, cpu);
> +		cancel_delayed_work_sync(&p->delayed_work);
> +		WARN_ON(!list_empty(&p->polled_events));
> +	}
> +	WARN_ON(!list_empty(&registered_events));
> +}
> +EXPORT_SYMBOL_GPL(etr_perf_polling_exit);
> +
> +#endif
> diff --git a/drivers/hwtracing/coresight/coresight-etr-perf-polling.h b/drivers/hwtracing/coresight/coresight-etr-perf-polling.h
> new file mode 100644
> index 0000000000000..5917e1fa408bb
> --- /dev/null
> +++ b/drivers/hwtracing/coresight/coresight-etr-perf-polling.h
> @@ -0,0 +1,42 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright(C) 2021 Arm Limited. All rights reserved.
> + * Author: Daniel Kiss <daniel.kiss at arm.com>
> + */
> +
> +#ifndef _CORESIGHT_ETM_PERF_POLLING_H
> +#define _CORESIGHT_ETM_PERF_POLLING_H
> +
> +#include <linux/coresight.h>
> +#include <linux/perf_event.h>
> +#include "coresight-etm-perf.h"
> +#include "coresight-tmc.h"
> +
> +#if defined(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) || \
> +    defined(CONFIG_CORESIGHT_LINK_AND_SINK_TMC_MODULE)
> +
> +void etr_perf_polling_init(void);
> +void etr_perf_polling_exit(void);
> +void etr_perf_polling_handle_register(struct perf_output_handle *handle,
> +				      void (*tmc_etr_reset_hw)(struct tmc_drvdata *drvdata));
> +void etr_perf_polling_handle_deregister(struct perf_output_handle *handle);
> +void etr_perf_polling_event_start(struct perf_event *event,
> +				  struct etm_event_data *etm_event_data,
> +				  struct perf_output_handle *ctx_handle);
> +void etr_perf_polling_event_stop(struct perf_event *event,
> +				 struct etm_event_data *etm_event_data);
> +
> +extern const struct attribute_group coresight_tmc_polling_group;
> +#define CORESIGHT_TMP_POLLING_GROUP &coresight_tmc_polling_group,
> +
> +#else /* !CONFIG_CORESIGHT_LINK_AND_SINK_TMC */
> +#define etr_perf_polling_init()
> +#define etr_perf_polling_exit()
> +#define etr_perf_polling_handle_register(...)
> +#define etr_perf_polling_handle_deregister(...)
> +#define etr_perf_polling_event_start(...)
> +#define etr_perf_polling_event_stop(...)
> +#define CORESIGHT_TMP_POLLING_GROUP
> +#endif
> +
> +#endif
> diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c
> index 74c6323d4d6ab..51e705ef3ffa3 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc-core.c
> +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c
> @@ -26,6 +26,7 @@
>  
>  #include "coresight-priv.h"
>  #include "coresight-tmc.h"
> +#include "coresight-etr-perf-polling.h"
>  
>  DEFINE_CORESIGHT_DEVLIST(etb_devs, "tmc_etb");
>  DEFINE_CORESIGHT_DEVLIST(etf_devs, "tmc_etf");
> @@ -365,6 +366,7 @@ static const struct attribute_group coresight_tmc_mgmt_group = {
>  static const struct attribute_group *coresight_tmc_groups[] = {
>  	&coresight_tmc_group,
>  	&coresight_tmc_mgmt_group,
> +	CORESIGHT_TMP_POLLING_GROUP
>  	NULL,
>  };
>  
> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> index bf9f6311d8663..021b594e38e71 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
> +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> @@ -16,6 +16,7 @@
>  #include <linux/vmalloc.h>
>  #include "coresight-catu.h"
>  #include "coresight-etm-perf.h"
> +#include "coresight-etr-perf-polling.h"
>  #include "coresight-priv.h"
>  #include "coresight-tmc.h"
>  
> @@ -1139,6 +1140,12 @@ void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
>  	drvdata->etr_buf = NULL;
>  }
>  
> +static void tmc_etr_reset_hw(struct tmc_drvdata *drvdata)
> +{
> +	__tmc_etr_disable_hw(drvdata);
> +	__tmc_etr_enable_hw(drvdata);
> +}
> +
>  static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
>  {
>  	int ret = 0;
> @@ -1630,6 +1637,7 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
>  		drvdata->mode = CS_MODE_PERF;
>  		drvdata->perf_buf = etr_perf->etr_buf;
>  		drvdata->perf_handle = handle;
> +		etr_perf_polling_handle_register(handle, tmc_etr_reset_hw);
>  		atomic_inc(csdev->refcnt);
>  	}
>  
> @@ -1677,6 +1685,7 @@ static int tmc_disable_etr_sink(struct coresight_device *csdev)
>  	drvdata->mode = CS_MODE_DISABLED;
>  	/* Reset perf specific data */
>  	drvdata->perf_buf = NULL;
> +	etr_perf_polling_handle_deregister(drvdata->perf_handle);
>  	drvdata->perf_handle = NULL;
>  
>  	spin_unlock_irqrestore(&drvdata->spinlock, flags);
> -- 
> 2.25.1
> 
> _______________________________________________
> CoreSight mailing list
> CoreSight at lists.linaro.org
> https://lists.linaro.org/mailman/listinfo/coresight



More information about the linux-arm-kernel mailing list