[PATCH v1 00/20] Reference count checking for thread

Ian Rogers irogers at google.com
Wed Jun 7 00:07:21 PDT 2023


On Tue, Jun 6, 2023 at 6:44 PM Ian Rogers <irogers at google.com> wrote:
>
> Add reference count checking to thread after first refactoring bits of
> the code, such as making the thread red-black tree non-invasive (so
> the thread it references is easier to reference count, rather than
> having 3 potential references). Part of this refactoring also removes
> the dead thread list because if we held a reference here the threads
> would never die and anything else has questionable
> correctness. addr_location is made into its own C/header file to
> capture the init, exit and copy code.
>
> Fix additional outstanding memory leak and reference count issues to
> the point that "perf test" compiled with address sanitizer but without
> libtraceevent passes all but one test - libtraceevent reports leaks
> within its own code, most likely as it isn't compiled with
> sanitizers. The remaining failing test is "68: Test dwarf unwind" and
> that has address sanitizer issues as it uses memcpy to access the
> stack within the process - we likely want to skip parts of the test
> with sanitizers enabled.

So I tried a bit harder to break things and ran into crashes/leaks
around callchains fixed by the patch below - I used  'perf top -g' and
I'll merge the patch into a v2 for the series. The largest remaining
leak is caused by map/maps retaining dsos retaining symbols. The map
and maps are being retained by the TLS call chain cursors [1]
allocated in callchain_cursor_append. For perf report there are
similar retention issues via "struct hists", but as this maybe merged
with evsels it is messy to cleanup.

Thanks,
Ian

```
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 437325d19ad3..909f62b3b266 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -590,6 +590,7 @@ fill_node(struct callchain_node *node, struct
callchain_cursor *cursor)
               call->ip = cursor_node->ip;
               call->ms = cursor_node->ms;
               call->ms.map = map__get(call->ms.map);
+               call->ms.maps = maps__get(call->ms.maps);
               call->srcline = cursor_node->srcline;

               if (cursor_node->branch) {
@@ -649,6 +650,7 @@ add_child(struct callchain_node *parent,
               list_for_each_entry_safe(call, tmp, &new->val, list) {
                       list_del_init(&call->list);
                       map__zput(call->ms.map);
+                       maps__zput(call->ms.maps);
                       free(call);
               }
               free(new);
@@ -1010,10 +1012,16 @@ merge_chain_branch(struct callchain_cursor *cursor,
       int err = 0;

       list_for_each_entry_safe(list, next_list, &src->val, list) {
-               callchain_cursor_append(cursor, list->ip, &list->ms,
-                                       false, NULL, 0, 0, 0, list->srcline);
+               struct map_symbol ms = {
+                       .maps = maps__get(list->ms.maps),
+                       .map = map__get(list->ms.map),
+               };
+               callchain_cursor_append(cursor, list->ip, &ms, false,
NULL, 0, 0, 0, list->srcline);
               list_del_init(&list->list);
+               map__zput(ms.map);
+               maps__zput(ms.maps);
               map__zput(list->ms.map);
+               maps__zput(list->ms.maps);
               free(list);
       }

@@ -1068,8 +1076,8 @@ int callchain_cursor_append(struct
callchain_cursor *cursor,
       maps__zput(node->ms.maps);
       map__zput(node->ms.map);
       node->ms = *ms;
-       node->ms.maps = maps__get(node->ms.maps);
-       node->ms.map = map__get(node->ms.map);
+       node->ms.maps = maps__get(ms->maps);
+       node->ms.map = map__get(ms->map);
       node->branch = branch;
       node->nr_loop_iter = nr_loop_iter;
       node->iter_cycles = iter_cycles;
@@ -1463,12 +1471,14 @@ static void free_callchain_node(struct
callchain_node *node)
       list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
               list_del_init(&list->list);
               map__zput(list->ms.map);
+               maps__zput(list->ms.maps);
               free(list);
       }

       list_for_each_entry_safe(list, tmp, &node->val, list) {
               list_del_init(&list->list);
               map__zput(list->ms.map);
+               maps__zput(list->ms.maps);
               free(list);
       }

@@ -1554,6 +1564,7 @@ int callchain_node__make_parent_list(struct
callchain_node *node)
       list_for_each_entry_safe(chain, new, &head, list) {
               list_del_init(&chain->list);
               map__zput(chain->ms.map);
+               maps__zput(chain->ms.maps);
               free(chain);
       }
       return -ENOMEM;
@@ -1599,8 +1610,10 @@ void callchain_cursor_reset(struct
callchain_cursor *cursor)
       cursor->nr = 0;
       cursor->last = &cursor->first;

-       for (node = cursor->first; node != NULL; node = node->next)
+       for (node = cursor->first; node != NULL; node = node->next) {
               map__zput(node->ms.map);
+               maps__zput(node->ms.maps);
+       }
}

void callchain_param_setup(u64 sample_type, const char *arch)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 16dc49876e87..bdad4b8bf77d 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2321,7 +2321,7 @@ static int add_callchain_ip(struct thread *thread,
                           struct iterations *iter,
                           u64 branch_from)
{
-       struct map_symbol ms;
+       struct map_symbol ms = {};
       struct addr_location al;
       int nr_loop_iter = 0, err = 0;
       u64 iter_cycles = 0;
@@ -3091,6 +3091,7 @@ static int append_inlines(struct
callchain_cursor *cursor, struct map_symbol *
ms
       struct dso *dso;
       u64 addr;
       int ret = 1;
+       struct map_symbol ilist_ms;

       if (!symbol_conf.inline_name || !map || !sym)
               return ret;
@@ -3107,18 +3108,20 @@ static int append_inlines(struct
callchain_cursor *cursor, struct map_symbol
*ms
               inlines__tree_insert(&dso->inlined_nodes, inline_node);
       }

+       ilist_ms = (struct map_symbol) {
+               .maps = maps__get(ms->maps),
+               .map = map__get(map),
+       };
       list_for_each_entry(ilist, &inline_node->val, list) {
-               struct map_symbol ilist_ms = {
-                       .maps = ms->maps,
-                       .map = map,
-                       .sym = ilist->symbol,
-               };
+               ilist_ms.sym = ilist->symbol;
               ret = callchain_cursor_append(cursor, ip, &ilist_ms, false,
                                             NULL, 0, 0, 0, ilist->srcline);

               if (ret != 0)
                       return ret;
       }
+       map__put(ilist_ms.map);
+       maps__put(ilist_ms.maps);

       return ret;
}
```

[1] http://lkml.kernel.org/r/1338443007-24857-1-git-send-email-namhyung.kim@lge.com

> Ian Rogers (20):
>   perf thread: Remove notion of dead threads
>   perf thread: Make threads rbtree non-invasive
>   perf thread: Add accessor functions for thread
>   perf maps: Make delete static, always use put
>   perf addr_location: Move to its own header
>   perf addr_location: Add init/exit/copy functions
>   perf thread: Add reference count checking
>   perf machine: Make delete_threads part of machine__exit
>   perf report: Avoid thread leak
>   perf header: Ensure bitmaps are freed
>   perf stat: Avoid evlist leak
>   perf intel-pt: Fix missed put and leak
>   perf evlist: Free stats in all evlist destruction
>   perf python: Avoid 2 leak sanitizer issues
>   perf jit: Fix two thread leaks
>   perf symbol-elf: Correct holding a reference
>   perf maps: Fix overlapping memory leak
>   perf machine: Fix leak of kernel dso
>   perf machine: Don't leak module maps
>   perf map/maps/thread: Changes to reference counting
>
>  tools/perf/arch/arm/tests/dwarf-unwind.c      |   2 +-
>  tools/perf/arch/arm64/tests/dwarf-unwind.c    |   2 +-
>  tools/perf/arch/powerpc/tests/dwarf-unwind.c  |   2 +-
>  tools/perf/arch/x86/tests/dwarf-unwind.c      |   2 +-
>  tools/perf/builtin-annotate.c                 |  28 +-
>  tools/perf/builtin-c2c.c                      |  18 +-
>  tools/perf/builtin-diff.c                     |  16 +-
>  tools/perf/builtin-inject.c                   |   4 +-
>  tools/perf/builtin-kmem.c                     |  13 +-
>  tools/perf/builtin-kwork.c                    |  15 +-
>  tools/perf/builtin-mem.c                      |   4 +-
>  tools/perf/builtin-report.c                   |  21 +-
>  tools/perf/builtin-sched.c                    |  80 +++--
>  tools/perf/builtin-script.c                   |  97 +++---
>  tools/perf/builtin-stat.c                     |   1 +
>  tools/perf/builtin-timechart.c                |  11 +-
>  tools/perf/builtin-top.c                      |   8 +-
>  tools/perf/builtin-trace.c                    |  38 ++-
>  .../scripts/python/Perf-Trace-Util/Context.c  |   4 +-
>  tools/perf/tests/code-reading.c               |   6 +-
>  tools/perf/tests/dwarf-unwind.c               |   1 -
>  tools/perf/tests/hists_common.c               |   2 +-
>  tools/perf/tests/hists_cumulate.c             |  18 +-
>  tools/perf/tests/hists_filter.c               |  11 +-
>  tools/perf/tests/hists_link.c                 |  20 +-
>  tools/perf/tests/hists_output.c               |  12 +-
>  tools/perf/tests/maps.c                       |   2 +-
>  tools/perf/tests/mmap-thread-lookup.c         |   5 +-
>  tools/perf/tests/perf-targz-src-pkg           |   5 +-
>  tools/perf/tests/symbols.c                    |   1 -
>  tools/perf/tests/thread-maps-share.c          |  13 +-
>  tools/perf/trace/beauty/pid.c                 |   4 +-
>  tools/perf/ui/browsers/hists.c                |  19 +-
>  tools/perf/ui/hist.c                          |   5 +-
>  tools/perf/ui/stdio/hist.c                    |   2 +-
>  tools/perf/util/Build                         |   1 +
>  tools/perf/util/addr_location.c               |  44 +++
>  tools/perf/util/addr_location.h               |  31 ++
>  tools/perf/util/arm-spe.c                     |   4 +-
>  tools/perf/util/build-id.c                    |   2 +
>  tools/perf/util/callchain.c                   |   7 +-
>  tools/perf/util/cs-etm.c                      |  28 +-
>  tools/perf/util/data-convert-json.c           |  16 +-
>  tools/perf/util/db-export.c                   |  20 +-
>  tools/perf/util/dlfilter.c                    |  17 +-
>  tools/perf/util/event.c                       |  37 +--
>  tools/perf/util/evlist.c                      |   2 +
>  tools/perf/util/evsel_fprintf.c               |   8 +-
>  tools/perf/util/header.c                      |  12 +-
>  tools/perf/util/hist.c                        |  22 +-
>  tools/perf/util/intel-bts.c                   |   2 +-
>  tools/perf/util/intel-pt.c                    |  88 +++---
>  tools/perf/util/jitdump.c                     |  12 +-
>  tools/perf/util/machine.c                     | 277 +++++++++---------
>  tools/perf/util/map.c                         |   2 +-
>  tools/perf/util/maps.c                        |   5 +-
>  tools/perf/util/maps.h                        |   9 +-
>  tools/perf/util/python.c                      |   4 +
>  .../scripting-engines/trace-event-python.c    |  28 +-
>  tools/perf/util/session.c                     |   8 +-
>  tools/perf/util/sort.c                        |  12 +-
>  tools/perf/util/symbol-elf.c                  |   4 +-
>  tools/perf/util/symbol.h                      |  17 +-
>  tools/perf/util/thread-stack.c                |  25 +-
>  tools/perf/util/thread.c                      | 218 +++++++-------
>  tools/perf/util/thread.h                      | 210 +++++++++++--
>  tools/perf/util/unwind-libdw.c                |  27 +-
>  tools/perf/util/unwind-libunwind-local.c      |  19 +-
>  tools/perf/util/unwind-libunwind.c            |   2 +-
>  tools/perf/util/vdso.c                        |   2 +-
>  70 files changed, 1059 insertions(+), 655 deletions(-)
>  create mode 100644 tools/perf/util/addr_location.c
>  create mode 100644 tools/perf/util/addr_location.h
>
> --
> 2.41.0.rc0.172.g3f132b7071-goog
>



More information about the linux-arm-kernel mailing list