[PATCH 3/7] perf cs-etm: Save aux records in each etm queue

Leo Yan leo.yan at linaro.org
Tue Mar 2 12:03:25 GMT 2021


On Mon, Mar 01, 2021 at 05:43:43PM +0200, James Clark wrote:

[...]

> > I'd like to propose to add a new field "cs_etm_queue::buf_rec_len", it
> > stands for the record length based on the RECORD_AUX event.  In
> > theory, this value should be always less than "cs_etm_queue::buf_len".
> > 
> > When every time the "PERF_RECORD_AUX" event is coming, we find out the
> > corresponding queue (so this can be applied for "1:1" or "N:1" models
> > for source and sink), and accumulate "perf_record_aux::aux_size" into
> > "cs_etm_queue::buf_rec_len".
> > 
> > At the decoder side, it decreases "etmq->buf_rec_len" until to zero for
> > the current round of decoding (see cs_etm__decode_data_block()).  Since
> > all the "PERF_RECORD_AUX" event will be processed before
> > "PERF_RECORD_EXIT" event, so we don't worry the tail trace data will be
> > ignored.
> > 
> > The main reason for this suggestion is it don't need to change the
> > significant logic in current code.  I will try to do experiment for this
> > idea and share back.
> > 
> > James, if you think I miss anything, please correct me as needed.
> > Thanks!
> > 
> 
> This is an interesting idea, I think we could push decoded packets into the
> min heap as the aux records are received, and not do anything with them until
> the end of the data is reached. That way instead of saving aux records, we'd
> save the result of the decode for each aux record.
> 
> Currently each cs_etm_queue has a cs_etm_traceid_queue/cs_etm_packet_queue for each
> stream, but that would have to be changed to have multiple ones because multiple
> packets could be decoded to get through the whole aux record.
> 
> It would be a similarly sized change, and could also have a bigger impact on
> memory. So I'm not sure if it would help to reduce the changes, but it is possible.

Below change is still very coarse and I just did very basic testing for
it, so didn't cover all cases; so simply use it to demonstrate the basic
idea.

Before the event PERF_RECORD_AUX arrives, we don't decode any trace
data.  And after PERF_RECORD_AUX coming, the aux buffer size will be
accumulated into the queue, and decode the trace data for the queue
based on the accumulated buffer length.

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index b9c1d329a7f1..3bd5609b6de4 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -89,7 +89,7 @@ struct cs_etm_queue {
 	u8 pending_timestamp;
 	u64 offset;
 	const unsigned char *buf;
-	size_t buf_len, buf_used;
+	size_t aux_buf_len, buf_len, buf_used;
 	/* Conversion between traceID and index in traceid_queues array */
 	struct intlist *traceid_queues_list;
 	struct cs_etm_traceid_queue **traceid_queues;
@@ -1085,6 +1085,7 @@ cs_etm__get_trace(struct cs_etm_queue *etmq)
 		if (old_buffer)
 			auxtrace_buffer__drop_data(old_buffer);
 		etmq->buf_len = 0;
+		etmq->aux_buf_len = 0;
 		return 0;
 	}
 
@@ -2052,6 +2053,7 @@ static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
 	etmq->offset += processed;
 	etmq->buf_used += processed;
 	etmq->buf_len -= processed;
+	etmq->aux_buf_len -= processed;
 
 out:
 	return ret;
@@ -2177,7 +2179,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
 			 */
 			err = cs_etm__process_traceid_queue(etmq, tidq);
 
-		} while (etmq->buf_len);
+		} while (etmq->aux_buf_len > 0);
 
 		if (err == 0)
 			/* Flush any remaining branch stack entries */
@@ -2216,6 +2218,27 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
 	return 0;
 }
 
+static void cs_etm__update_aux_buf_len(struct cs_etm_auxtrace *etm,
+				      struct perf_record_aux *aux)
+{
+	unsigned int cs_queue_nr, queue_nr;
+	struct auxtrace_queue *queue;
+	struct cs_etm_queue *etmq;
+
+	if (!etm->heap.heap_cnt)
+		return;
+
+	/* Take the entry at the top of the min heap */
+	cs_queue_nr = etm->heap.heap_array[0].queue_nr;
+	queue_nr = TO_QUEUE_NR(cs_queue_nr);
+	queue = &etm->queues.queue_array[queue_nr];
+	etmq = queue->priv;
+
+	etmq->aux_buf_len += aux->aux_size;
+	fprintf(stderr, "%s: aux_buf_len=%ld\n", __func__, etmq->aux_buf_len);
+	return;
+}
+
 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
 {
 	int ret = 0;
@@ -2272,6 +2295,9 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
 		if (ret < 0)
 			goto out;
 
+		if (etmq->aux_buf_len <= 0)
+			goto out;
+
 		/*
 		 * No more auxtrace_buffers to process in this etmq, simply
 		 * move on to another entry in the auxtrace_heap.
@@ -2414,9 +2440,15 @@ static int cs_etm__process_event(struct perf_session *session,
 	else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
 		return cs_etm__process_switch_cpu_wide(etm, event);
 
+	fprintf(stderr, "%s: event->header.type=%d\n", __func__, event->header.type);
+
 	if (!etm->timeless_decoding &&
-	    event->header.type == PERF_RECORD_AUX)
+	    event->header.type == PERF_RECORD_AUX) {
+
+		fprintf(stderr, "%s: aux_size=%lld\n", __func__, event->aux.aux_size);
+		cs_etm__update_aux_buf_len(etm, &event->aux);
 		return cs_etm__process_queues(etm);
+	}
 
 	return 0;
 }



More information about the linux-arm-kernel mailing list