[PATCH 2/7] perf cs-etm: Only search timestamp in current sample's queue.

James Clark james.clark at arm.com
Thu May 6 03:45:59 PDT 2021



On 12/02/2021 16:45, James Clark wrote:
> Change initial timestamp search to only operate on the queue
> related to the current event. In a later change the bounds
> of the aux record will also be used to reset the decoder and
> the record is only relevant to a single queue.
> 
> This change makes some files that had coresight data
> but didn't syntesise any events start working and generating
> events. I'm not sure of the reason for that. I'd expect this
> change to only affect the ordering of events.

I've gotten to the bottom of this mystery of why decoding starts working because of this change.
Currently:

 * _All_ decoding happens on the first AUX record
 * Decoding depends on binary data (so also depends on MMAP records)
 * Ordering of AUX records and MMAP records is timing sensitive

So there are two scenarios:
 1) The perf.data file contains MMAPs followed by AUX records. Everything works
 2) The perf.data file contains an AUX record, followed by MMAPS, then further AUX records. Decoding never worked.

Per-thread mode (timeless) always worked because we wait for EXIT rather than AUX to start the decode, which is after MMAPS.
Per-cpu mode was always at the mercy of the ordering of events. So it's not a regression that this patchset changes the behaviour
here and it's doing more of 'the right thing' now.

As a separate change I will add a warning to cs_etm__mem_access() when it fails to find the right binary as this is a current sore point.
 
James

> 
> Signed-off-by: James Clark <james.clark at arm.com>
> ---
>  tools/perf/util/cs-etm.c | 30 ++++++++++++++----------------
>  1 file changed, 14 insertions(+), 16 deletions(-)
> 
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 27894facae5e..8f8b448632fb 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -97,7 +97,7 @@ struct cs_etm_queue {
>  /* RB tree for quick conversion between traceID and metadata pointers */
>  static struct intlist *traceid_list;
>  
> -static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
> +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm, int cpu);
>  static int cs_etm__process_queues(struct cs_etm_auxtrace *etm);
>  static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
>  					   pid_t tid);
> @@ -524,7 +524,6 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
>  static int cs_etm__flush_events(struct perf_session *session,
>  				struct perf_tool *tool)
>  {
> -	int ret;
>  	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
>  						   struct cs_etm_auxtrace,
>  						   auxtrace);
> @@ -534,11 +533,6 @@ static int cs_etm__flush_events(struct perf_session *session,
>  	if (!tool->ordered_events)
>  		return -EINVAL;
>  
> -	ret = cs_etm__update_queues(etm);
> -
> -	if (ret < 0)
> -		return ret;
> -
>  	if (etm->timeless_decoding)
>  		return cs_etm__process_timeless_queues(etm, -1);
>  
> @@ -851,10 +845,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
>  	etmq->queue_nr = queue_nr;
>  	etmq->offset = 0;
>  
> -	if (etm->timeless_decoding)
> -		return 0;
> -	else
> -		return cs_etm__search_first_timestamp(etmq);
> +	return 0;
>  }
>  
>  static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
> @@ -874,14 +865,20 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
>  	return 0;
>  }
>  
> -static int cs_etm__update_queues(struct cs_etm_auxtrace *etm)
> +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm, int cpu)
>  {
> +	int ret;
>  	if (etm->queues.new_data) {
>  		etm->queues.new_data = false;
> -		return cs_etm__setup_queues(etm);
> +		ret = cs_etm__setup_queues(etm);
> +		if (ret)
> +			return ret;
>  	}
>  
> -	return 0;
> +	if (!etm->timeless_decoding)
> +		return cs_etm__search_first_timestamp(etm->queues.queue_array[cpu].priv);
> +	else
> +		return 0;
>  }
>  
>  static inline
> @@ -2358,8 +2355,9 @@ static int cs_etm__process_event(struct perf_session *session,
>  	else
>  		timestamp = 0;
>  
> -	if (timestamp || etm->timeless_decoding) {
> -		err = cs_etm__update_queues(etm);
> +	if ((timestamp || etm->timeless_decoding)
> +			&& event->header.type == PERF_RECORD_AUX) {
> +		err = cs_etm__update_queues(etm, sample->cpu);
>  		if (err)
>  			return err;
>  	}
> 



More information about the linux-arm-kernel mailing list