[PATCH 3/7] perf cs-etm: Save aux records in each etm queue
Leo Yan
leo.yan at linaro.org
Tue Mar 2 12:03:25 GMT 2021
On Mon, Mar 01, 2021 at 05:43:43PM +0200, James Clark wrote:
[...]
> > I'd like to propose to add a new field "cs_etm_queue::buf_rec_len", it
> > stands for the record length based on the RECORD_AUX event. In
> > theory, this value should be always less than "cs_etm_queue::buf_len".
> >
> > When every time the "PERF_RECORD_AUX" event is coming, we find out the
> > corresponding queue (so this can be applied for "1:1" or "N:1" models
> > for source and sink), and accumulate "perf_record_aux::aux_size" into
> > "cs_etm_queue::buf_rec_len".
> >
> > At the decoder side, it decreases "etmq->buf_rec_len" until to zero for
> > the current round of decoding (see cs_etm__decode_data_block()). Since
> > all the "PERF_RECORD_AUX" event will be processed before
> > "PERF_RECORD_EXIT" event, so we don't worry the tail trace data will be
> > ignored.
> >
> > The main reason for this suggestion is it don't need to change the
> > significant logic in current code. I will try to do experiment for this
> > idea and share back.
> >
> > James, if you think I miss anything, please correct me as needed.
> > Thanks!
> >
>
> This is an interesting idea, I think we could push decoded packets into the
> min heap as the aux records are received, and not do anything with them until
> the end of the data is reached. That way instead of saving aux records, we'd
> save the result of the decode for each aux record.
>
> Currently each cs_etm_queue has a cs_etm_traceid_queue/cs_etm_packet_queue for each
> stream, but that would have to be changed to have multiple ones because multiple
> packets could be decoded to get through the whole aux record.
>
> It would be a similarly sized change, and could also have a bigger impact on
> memory. So I'm not sure if it would help to reduce the changes, but it is possible.
Below change is still very coarse and I just did very basic testing for
it, so didn't cover all cases; so simply use it to demonstrate the basic
idea.
Before the event PERF_RECORD_AUX arrives, we don't decode any trace
data. And after PERF_RECORD_AUX coming, the aux buffer size will be
accumulated into the queue, and decode the trace data for the queue
based on the accumulated buffer length.
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index b9c1d329a7f1..3bd5609b6de4 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -89,7 +89,7 @@ struct cs_etm_queue {
u8 pending_timestamp;
u64 offset;
const unsigned char *buf;
- size_t buf_len, buf_used;
+ size_t aux_buf_len, buf_len, buf_used;
/* Conversion between traceID and index in traceid_queues array */
struct intlist *traceid_queues_list;
struct cs_etm_traceid_queue **traceid_queues;
@@ -1085,6 +1085,7 @@ cs_etm__get_trace(struct cs_etm_queue *etmq)
if (old_buffer)
auxtrace_buffer__drop_data(old_buffer);
etmq->buf_len = 0;
+ etmq->aux_buf_len = 0;
return 0;
}
@@ -2052,6 +2053,7 @@ static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
etmq->offset += processed;
etmq->buf_used += processed;
etmq->buf_len -= processed;
+ etmq->aux_buf_len -= processed;
out:
return ret;
@@ -2177,7 +2179,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
*/
err = cs_etm__process_traceid_queue(etmq, tidq);
- } while (etmq->buf_len);
+ } while (etmq->aux_buf_len > 0);
if (err == 0)
/* Flush any remaining branch stack entries */
@@ -2216,6 +2218,27 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
return 0;
}
+static void cs_etm__update_aux_buf_len(struct cs_etm_auxtrace *etm,
+ struct perf_record_aux *aux)
+{
+ unsigned int cs_queue_nr, queue_nr;
+ struct auxtrace_queue *queue;
+ struct cs_etm_queue *etmq;
+
+ if (!etm->heap.heap_cnt)
+ return;
+
+ /* Take the entry at the top of the min heap */
+ cs_queue_nr = etm->heap.heap_array[0].queue_nr;
+ queue_nr = TO_QUEUE_NR(cs_queue_nr);
+ queue = &etm->queues.queue_array[queue_nr];
+ etmq = queue->priv;
+
+ etmq->aux_buf_len += aux->aux_size;
+ fprintf(stderr, "%s: aux_buf_len=%ld\n", __func__, etmq->aux_buf_len);
+ return;
+}
+
static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
{
int ret = 0;
@@ -2272,6 +2295,9 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
if (ret < 0)
goto out;
+ if (etmq->aux_buf_len <= 0)
+ goto out;
+
/*
* No more auxtrace_buffers to process in this etmq, simply
* move on to another entry in the auxtrace_heap.
@@ -2414,9 +2440,15 @@ static int cs_etm__process_event(struct perf_session *session,
else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
return cs_etm__process_switch_cpu_wide(etm, event);
+ fprintf(stderr, "%s: event->header.type=%d\n", __func__, event->header.type);
+
if (!etm->timeless_decoding &&
- event->header.type == PERF_RECORD_AUX)
+ event->header.type == PERF_RECORD_AUX) {
+
+ fprintf(stderr, "%s: aux_size=%lld\n", __func__, event->aux.aux_size);
+ cs_etm__update_aux_buf_len(etm, &event->aux);
return cs_etm__process_queues(etm);
+ }
return 0;
}
More information about the linux-arm-kernel
mailing list