[PATCH v5 03/17] perf: cs-etm: Move traceid_list to each queue
Mike Leach
mike.leach at linaro.org
Thu Jul 18 06:24:10 PDT 2024
On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark at linaro.org> wrote:
>
> From: James Clark <james.clark at arm.com>
>
> The global list won't work for per-sink trace ID allocations, so put a
> list in each queue where the IDs will be unique to that queue.
>
> To keep the same behavior as before, for version 0 of the HW_ID packets,
> copy all the HW_ID mappings into all queues.
>
> This change doesn't effect the decoders, only trace ID lookups on the
> Perf side. The decoders are still created with global mappings which
> will be fixed in a later commit.
>
> Signed-off-by: James Clark <james.clark at arm.com>
> Signed-off-by: James Clark <james.clark at linaro.org>
> ---
> .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 28 ++-
> tools/perf/util/cs-etm.c | 215 +++++++++++-------
> tools/perf/util/cs-etm.h | 2 +-
> 3 files changed, 147 insertions(+), 98 deletions(-)
>
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> index e917985bbbe6..0c9c48cedbf1 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> @@ -388,7 +388,8 @@ cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
> }
>
> static ocsd_datapath_resp_t
> -cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
> +cs_etm_decoder__buffer_packet(struct cs_etm_queue *etmq,
> + struct cs_etm_packet_queue *packet_queue,
> const u8 trace_chan_id,
> enum cs_etm_sample_type sample_type)
> {
> @@ -398,7 +399,7 @@ cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
> if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1)
> return OCSD_RESP_FATAL_SYS_ERR;
>
> - if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
> + if (cs_etm__get_cpu(etmq, trace_chan_id, &cpu) < 0)
> return OCSD_RESP_FATAL_SYS_ERR;
>
> et = packet_queue->tail;
> @@ -436,7 +437,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq,
> int ret = 0;
> struct cs_etm_packet *packet;
>
> - ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id,
> + ret = cs_etm_decoder__buffer_packet(etmq, packet_queue, trace_chan_id,
> CS_ETM_RANGE);
> if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
> return ret;
> @@ -496,7 +497,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq,
> }
>
> static ocsd_datapath_resp_t
> -cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
> +cs_etm_decoder__buffer_discontinuity(struct cs_etm_queue *etmq,
> + struct cs_etm_packet_queue *queue,
> const uint8_t trace_chan_id)
> {
> /*
> @@ -504,18 +506,19 @@ cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
> * reset time statistics.
> */
> cs_etm_decoder__reset_timestamp(queue);
> - return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
> + return cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
> CS_ETM_DISCONTINUITY);
> }
>
> static ocsd_datapath_resp_t
> -cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
> +cs_etm_decoder__buffer_exception(struct cs_etm_queue *etmq,
> + struct cs_etm_packet_queue *queue,
> const ocsd_generic_trace_elem *elem,
> const uint8_t trace_chan_id)
> { int ret = 0;
> struct cs_etm_packet *packet;
>
> - ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id,
> + ret = cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
> CS_ETM_EXCEPTION);
> if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
> return ret;
> @@ -527,10 +530,11 @@ cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
> }
>
> static ocsd_datapath_resp_t
> -cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue,
> +cs_etm_decoder__buffer_exception_ret(struct cs_etm_queue *etmq,
> + struct cs_etm_packet_queue *queue,
> const uint8_t trace_chan_id)
> {
> - return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
> + return cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
> CS_ETM_EXCEPTION_RET);
> }
>
> @@ -599,7 +603,7 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
> case OCSD_GEN_TRC_ELEM_EO_TRACE:
> case OCSD_GEN_TRC_ELEM_NO_SYNC:
> case OCSD_GEN_TRC_ELEM_TRACE_ON:
> - resp = cs_etm_decoder__buffer_discontinuity(packet_queue,
> + resp = cs_etm_decoder__buffer_discontinuity(etmq, packet_queue,
> trace_chan_id);
> break;
> case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
> @@ -607,11 +611,11 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
> trace_chan_id);
> break;
> case OCSD_GEN_TRC_ELEM_EXCEPTION:
> - resp = cs_etm_decoder__buffer_exception(packet_queue, elem,
> + resp = cs_etm_decoder__buffer_exception(etmq, packet_queue, elem,
> trace_chan_id);
> break;
> case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
> - resp = cs_etm_decoder__buffer_exception_ret(packet_queue,
> + resp = cs_etm_decoder__buffer_exception_ret(etmq, packet_queue,
> trace_chan_id);
> break;
> case OCSD_GEN_TRC_ELEM_TIMESTAMP:
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 1a95c4bb898f..0cd7d3843411 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -116,16 +116,18 @@ struct cs_etm_queue {
> /* Conversion between traceID and index in traceid_queues array */
> struct intlist *traceid_queues_list;
> struct cs_etm_traceid_queue **traceid_queues;
> + /* Conversion between traceID and metadata pointers */
> + struct intlist *traceid_list;
> };
>
> -/* RB tree for quick conversion between traceID and metadata pointers */
> -static struct intlist *traceid_list;
> -
> static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
> static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
> pid_t tid);
> static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
> static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
> +static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
> +static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
> +static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
>
> /* PTMs ETMIDR [11:8] set to b0011 */
> #define ETMIDR_PTM_VERSION 0x00000300
> @@ -151,12 +153,12 @@ static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
> return CS_ETM_PROTO_ETMV3;
> }
>
> -static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
> +static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
> {
> struct int_node *inode;
> u64 *metadata;
>
> - inode = intlist__find(traceid_list, trace_chan_id);
> + inode = intlist__find(etmq->traceid_list, trace_chan_id);
> if (!inode)
> return -EINVAL;
>
> @@ -165,12 +167,12 @@ static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
> return 0;
> }
>
> -int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
> +int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
> {
> struct int_node *inode;
> u64 *metadata;
>
> - inode = intlist__find(traceid_list, trace_chan_id);
> + inode = intlist__find(etmq->traceid_list, trace_chan_id);
> if (!inode)
> return -EINVAL;
>
> @@ -222,30 +224,108 @@ enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
> return etmq->etm->pid_fmt;
> }
>
> -static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
> +static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
> + u8 trace_chan_id, u64 *cpu_metadata)
> {
> - struct int_node *inode;
> -
> /* Get an RB node for this CPU */
> - inode = intlist__findnew(traceid_list, trace_chan_id);
> + struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
>
> /* Something went wrong, no need to continue */
> if (!inode)
> return -ENOMEM;
>
> + /* Disallow re-mapping a different traceID to metadata pair. */
> + if (inode->priv) {
> + u64 *curr_cpu_data = inode->priv;
> + u8 curr_chan_id;
> + int err;
> +
> + if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
> + pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
> + return -EINVAL;
> + }
> +
> + /* check that the mapped ID matches */
> + err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
> + if (err)
> + return err;
> +
> + if (curr_chan_id != trace_chan_id) {
> + pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
> + return -EINVAL;
> + }
> +
> + /* Skip re-adding the same mappings if everything matched */
> + return 0;
> + }
> +
> + /* Not one we've seen before, associate the traceID with the metadata pointer */
> + inode->priv = cpu_metadata;
> +
> + return 0;
> +}
> +
> +static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
> +{
> + if (etm->per_thread_decoding)
> + return etm->queues.queue_array[0].priv;
> + else
> + return etm->queues.queue_array[cpu].priv;
> +}
> +
> +static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
> + u64 *cpu_metadata)
> +{
> + struct cs_etm_queue *etmq;
> +
> /*
> - * The node for that CPU should not be taken.
> - * Back out if that's the case.
> + * If the queue is unformatted then only save one mapping in the
> + * queue associated with that CPU so only one decoder is made.
> */
> - if (inode->priv)
> - return -EINVAL;
> + etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
> + if (etmq->format == UNFORMATTED)
> + return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
> + cpu_metadata);
>
> - /* All good, associate the traceID with the metadata pointer */
> - inode->priv = cpu_metadata;
> + /*
> + * Otherwise, version 0 trace IDs are global so save them into every
> + * queue.
> + */
> + for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
> + int ret;
> +
> + etmq = etm->queues.queue_array[i].priv;
> + ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
> + cpu_metadata);
> + if (ret)
> + return ret;
> + }
>
> return 0;
> }
>
> +static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
> + u64 hw_id)
> +{
> + int err;
> + u64 *cpu_data;
> + u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
> +
> + cpu_data = get_cpu_data(etm, cpu);
> + if (cpu_data == NULL)
> + return -EINVAL;
> +
> + err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
> + if (err)
> + return err;
> +
> + /*
> + * if we are picking up the association from the packet, need to plug
> + * the correct trace ID into the metadata for setting up decoders later.
> + */
> + return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
> +}
> +
> static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
> {
> u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
> @@ -329,17 +409,13 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
> {
> struct cs_etm_auxtrace *etm;
> struct perf_sample sample;
> - struct int_node *inode;
> struct evsel *evsel;
> - u64 *cpu_data;
> u64 hw_id;
> int cpu, version, err;
> - u8 trace_chan_id, curr_chan_id;
>
> /* extract and parse the HW ID */
> hw_id = event->aux_output_hw_id.hw_id;
> version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
> - trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
>
> /* check that we can handle this version */
> if (version > CS_AUX_HW_ID_CURR_VERSION) {
> @@ -367,43 +443,7 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
> return -EINVAL;
> }
>
> - /* See if the ID is mapped to a CPU, and it matches the current CPU */
> - inode = intlist__find(traceid_list, trace_chan_id);
> - if (inode) {
> - cpu_data = inode->priv;
> - if ((int)cpu_data[CS_ETM_CPU] != cpu) {
> - pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
> - return -EINVAL;
> - }
> -
> - /* check that the mapped ID matches */
> - err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
> - if (err)
> - return err;
> - if (curr_chan_id != trace_chan_id) {
> - pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
> - return -EINVAL;
> - }
> -
> - /* mapped and matched - return OK */
> - return 0;
> - }
> -
> - cpu_data = get_cpu_data(etm, cpu);
> - if (cpu_data == NULL)
> - return err;
> -
> - /* not one we've seen before - lets map it */
> - err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
> - if (err)
> - return err;
> -
> - /*
> - * if we are picking up the association from the packet, need to plug
> - * the correct trace ID into the metadata for setting up decoders later.
> - */
> - err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
> - return err;
> + return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
> }
>
> void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
> @@ -856,6 +896,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
>
> static void cs_etm__free_queue(void *priv)
> {
> + struct int_node *inode, *tmp;
> struct cs_etm_queue *etmq = priv;
>
> if (!etmq)
> @@ -863,6 +904,14 @@ static void cs_etm__free_queue(void *priv)
>
> cs_etm_decoder__free(etmq->decoder);
> cs_etm__free_traceid_queues(etmq);
> +
> + /* First remove all traceID/metadata nodes for the RB tree */
> + intlist__for_each_entry_safe(inode, tmp, etmq->traceid_list)
> + intlist__remove(etmq->traceid_list, inode);
> +
> + /* Then the RB tree itself */
> + intlist__delete(etmq->traceid_list);
> +
> free(etmq);
> }
>
> @@ -885,19 +934,12 @@ static void cs_etm__free_events(struct perf_session *session)
> static void cs_etm__free(struct perf_session *session)
> {
> int i;
> - struct int_node *inode, *tmp;
> struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
> struct cs_etm_auxtrace,
> auxtrace);
> cs_etm__free_events(session);
> session->auxtrace = NULL;
>
> - /* First remove all traceID/metadata nodes for the RB tree */
> - intlist__for_each_entry_safe(inode, tmp, traceid_list)
> - intlist__remove(traceid_list, inode);
> - /* Then the RB tree itself */
> - intlist__delete(traceid_list);
> -
> for (i = 0; i < aux->num_cpu; i++)
> zfree(&aux->metadata[i]);
>
> @@ -1055,9 +1097,24 @@ static struct cs_etm_queue *cs_etm__alloc_queue(void)
>
> etmq->traceid_queues_list = intlist__new(NULL);
> if (!etmq->traceid_queues_list)
> - free(etmq);
> + goto out_free;
> +
> + /*
> + * Create an RB tree for traceID-metadata tuple. Since the conversion
> + * has to be made for each packet that gets decoded, optimizing access
> + * in anything other than a sequential array is worth doing.
> + */
> + etmq->traceid_list = intlist__new(NULL);
> + if (!etmq->traceid_list)
> + goto out_free;
>
> return etmq;
> +
> +out_free:
> + intlist__delete(etmq->traceid_queues_list);
> + free(etmq);
> +
> + return NULL;
> }
>
> static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
> @@ -2207,7 +2264,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
> PERF_IP_FLAG_TRACE_END;
> break;
> case CS_ETM_EXCEPTION:
> - ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
> + ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
> if (ret)
> return ret;
>
> @@ -3124,7 +3181,8 @@ static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
> }
>
> /* map trace ids to correct metadata block, from information in metadata */
> -static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
> +static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
> + u64 **metadata)
> {
> u64 cs_etm_magic;
> u8 trace_chan_id;
> @@ -3146,7 +3204,7 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
> /* unknown magic number */
> return -EINVAL;
> }
> - err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
> + err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
> if (err)
> return err;
> }
> @@ -3277,23 +3335,12 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> u64 *ptr = NULL;
> u64 **metadata = NULL;
>
> - /*
> - * Create an RB tree for traceID-metadata tuple. Since the conversion
> - * has to be made for each packet that gets decoded, optimizing access
> - * in anything other than a sequential array is worth doing.
> - */
> - traceid_list = intlist__new(NULL);
> - if (!traceid_list)
> - return -ENOMEM;
> -
> /* First the global part */
> ptr = (u64 *) auxtrace_info->priv;
> num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
> metadata = zalloc(sizeof(*metadata) * num_cpu);
> - if (!metadata) {
> - err = -ENOMEM;
> - goto err_free_traceid_list;
> - }
> + if (!metadata)
> + return -ENOMEM;
>
> /* Start parsing after the common part of the header */
> i = CS_HEADER_VERSION_MAX;
> @@ -3472,7 +3519,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
> /* otherwise, this is a file with metadata values only, map from metadata */
> else
> - err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
> + err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
>
> if (err)
> goto err_free_queues;
> @@ -3494,7 +3541,5 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> for (int j = 0; j < num_cpu; j++)
> zfree(&metadata[j]);
> zfree(&metadata);
> -err_free_traceid_list:
> - intlist__delete(traceid_list);
> return err;
> }
> diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
> index 4696267a32f0..f4f69f7cc0f3 100644
> --- a/tools/perf/util/cs-etm.h
> +++ b/tools/perf/util/cs-etm.h
> @@ -252,7 +252,7 @@ enum cs_etm_pid_fmt {
>
> #ifdef HAVE_CSTRACE_SUPPORT
> #include <opencsd/ocsd_if_types.h>
> -int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
> +int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu);
> enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq);
> int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
> u8 trace_chan_id, ocsd_ex_level el);
> --
> 2.34.1
>
Reviewed-by: Mike Leach <mike.leach at linaro.org>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
More information about the linux-arm-kernel
mailing list