[PATCH V4 1/3] tracing: add a possibility of exporting function trace to other places instead of ring buffer only

Chunyan Zhang zhang.chunyan at linaro.org
Wed Aug 17 05:48:07 PDT 2016


Hello Steve,

Please correct me if I'm missing something in my following reply.

On 15 August 2016 at 22:28, Steven Rostedt <rostedt at goodmis.org> wrote:
> On Mon, 15 Aug 2016 19:50:01 +0800
> Chunyan Zhang <zhang.chunyan at linaro.org> wrote:
>
>> +int register_trace_export(struct trace_export *export);
>> +int unregister_trace_export(struct trace_export *export);
>> +
>> +#endif       /* CONFIG_TRACING */
>> +
>> +#endif       /* _LINUX_TRACE_H */
>> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
>> index dade4c9..0247ac2 100644
>> --- a/kernel/trace/trace.c
>> +++ b/kernel/trace/trace.c
>> @@ -40,6 +40,7 @@
>>  #include <linux/poll.h>
>>  #include <linux/nmi.h>
>>  #include <linux/fs.h>
>> +#include <linux/trace.h>
>>  #include <linux/sched/rt.h>
>>
>>  #include "trace.h"
>> @@ -2128,6 +2129,127 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
>>       ftrace_trace_userstack(buffer, flags, pc);
>>  }
>>
>> +static inline void
>> +trace_generic_commit(struct trace_array *tr,
>> +            struct ring_buffer_event *event)
>
> Why is this marked inline? It is never called directly here.
>
>> +{
>> +     struct trace_entry *entry;
>> +     struct trace_export *export = tr->export;
>> +     unsigned int size = 0;
>> +
>> +     entry = ring_buffer_event_data(event);
>> +
>> +     trace_entry_size(size, entry->type);
>> +     if (!size)
>> +             return;
>> +
>> +     if (export->write)
>> +             export->write((char *)entry, size);
>> +}
>> +
>> +static inline void
>
> Same here.
>
>> +trace_rb_commit(struct trace_array *tr,
>> +            struct ring_buffer_event *event)
>> +{
>> +     __buffer_unlock_commit(tr->trace_buffer.buffer, event);
>> +}
>> +
>> +static DEFINE_MUTEX(trace_export_lock);
>> +
>> +static struct trace_export trace_export_rb __read_mostly = {
>> +     .name           = "rb",
>> +     .commit = trace_rb_commit,
>
> Make sure equal signs are lined up.
>
>> +     .next           = NULL,
>> +};
>> +static struct trace_export *trace_exports_list __read_mostly = &trace_export_rb;
>
> trace_exports_list needs to be annotated as rcu, if you are going to
> dereference it via rcu_dereference. That was the kbuild warning you
> received.

Ok, will do.

But I guess 'ftrace_ops_list' in ftrace.c [1] also should be annotated as rcu?

[1] http://lxr.free-electrons.com/source/kernel/trace/ftrace.c#L460

>
>> +
>> +inline void
>> +trace_exports(struct trace_array *tr, struct ring_buffer_event *event)
>> +{
>> +     struct trace_export *export;
>> +
>> +     preempt_disable_notrace();
>> +
>> +     for (export = rcu_dereference_raw_notrace(trace_exports_list);
>> +          export && export->commit;
>> +          export = rcu_dereference_raw_notrace(export->next)) {
>> +             tr->export = export;
>> +             export->commit(tr, event);
>> +     }
>> +
>> +     preempt_enable_notrace();
>> +}
>
> I'm not too happy about the added overhead to normal function tracing
> (it will be noticeable), but I can fix that later.

I think I will try to revise here in the next revision.

>
>> +
>> +static void
>> +add_trace_export(struct trace_export **list, struct trace_export *export)
>> +{
>> +     export->next = *list;
>
> As export->next will most likely need to be marked __rcu as well, this
> may need an rcu_assign_pointer() too.
>
>> +     /*
>> +      * We are entering export into the list but another
>> +      * CPU might be walking that list. We need to make sure
>> +      * the export->next pointer is valid before another CPU sees
>> +      * the export pointer included into the list.
>> +      */
>> +     rcu_assign_pointer(*list, export);
>> +
>> +}
>> +
>> +static int
>> +rm_trace_export(struct trace_export **list, struct trace_export *export)
>> +{
>> +     struct trace_export **p;
>> +
>> +     for (p = list; *p != &trace_export_rb; p = &(*p)->next)
>> +             if (*p == export)
>> +                     break;
>> +
>> +     if (*p != export)
>> +             return -1;
>> +
>> +     *p = (*p)->next;
>
> I believe this requires an rcu_assign_pointer() as well.
>
>> +
>> +     return 0;
>> +}
>> +
>> +int register_trace_export(struct trace_export *export)
>> +{
>> +     if (!export->write) {
>> +             pr_warn("trace_export must have the write() call back.\n");
>
> Probably should be a "WARN_ON_ONCE()".

Yes, will revise.

>
>> +             return -1;
>> +     }
>> +
>> +     if (!export->name) {
>> +             pr_warn("trace_export must have a name.\n");
>> +             return -1;
>> +     }
>
> If name isn't used don't add it till it is. That is, this patch should
> not have "name" in the structure. It's confusing, because I don't see a
> purpose for it.

Ok will remove that.

>
>> +
>> +     mutex_lock(&trace_export_lock);
>> +
>> +     export->tr = trace_exports_list->tr;
>
> I don't see where tr is ever assigned.
>
>> +     export->commit = trace_generic_commit;
>
> Shouldn't the caller pass in the commit function too?

The trace_export::write() callback is for caller, commit function
mainly deal with traces, is it better to keep 'trace_generic_commit'
in trace.c, i.e don't expose it to callers?

>
>> +
>> +     add_trace_export(&trace_exports_list, export);
>> +
>> +     mutex_unlock(&trace_export_lock);
>> +
>> +     return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(register_trace_export);
>> +
>> +int unregister_trace_export(struct trace_export *export)
>> +{
>> +     int ret;
>> +
>> +     mutex_lock(&trace_export_lock);
>> +
>> +     ret = rm_trace_export(&trace_exports_list, export);
>> +
>> +     mutex_unlock(&trace_export_lock);
>> +
>> +     return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(unregister_trace_export);
>> +
>>  void
>>  trace_function(struct trace_array *tr,
>>              unsigned long ip, unsigned long parent_ip, unsigned long flags,
>> @@ -2147,7 +2269,7 @@ trace_function(struct trace_array *tr,
>>       entry->parent_ip                = parent_ip;
>>
>>       if (!call_filter_check_discard(call, entry, buffer, event))
>
> How do you handle the discard? If the entry doesn't match the filter,
> it will try to discard the event. I don't see how the "trace_exports"
> handle that.

I directly used the entries which had already been filtered.
Humm.. sorry, actually you lost me here.

>
>
>> -             __buffer_unlock_commit(buffer, event);
>> +             trace_exports(tr, event);
>>  }
>>
>>  #ifdef CONFIG_STACKTRACE
>> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
>> index f783df4..a40f07c 100644
>> --- a/kernel/trace/trace.h
>> +++ b/kernel/trace/trace.h
>> @@ -260,6 +260,7 @@ struct trace_array {
>>       /* function tracing enabled */
>>       int                     function_enabled;
>>  #endif
>> +     struct trace_export     *export;
>>  };
>>
>>  enum {
>> @@ -301,6 +302,13 @@ static inline struct trace_array *top_trace_array(void)
>>               break;                                  \
>>       }
>>
>> +#undef IF_SIZE
>> +#define IF_SIZE(size, var, etype, id)                \
>> +             if (var == id) {                \
>> +                     size = (sizeof(etype)); \
>> +                     break;                  \
>> +             }
>> +
>>  /* Will cause compile errors if type is not found. */
>>  extern void __ftrace_bad_type(void);
>>
>> @@ -339,6 +347,29 @@ extern void __ftrace_bad_type(void);
>>       } while (0)
>>
>>  /*
>> + * The trace_entry_size return the size of specific trace type
>> + *
>> + * IF_SIZE(size, var);
>> + *
>> + * Where "var" is just the given trace type.
>> + */
>> +#define trace_entry_size(size, var)                                  \
>> +     do {                                                            \
>> +             IF_SIZE(size, var, struct ftrace_entry, TRACE_FN);      \
>> +             IF_SIZE(size, var, struct stack_entry, TRACE_STACK);    \
>> +             IF_SIZE(size, var, struct userstack_entry,              \
>> +                     TRACE_USER_STACK);                              \
>> +             IF_SIZE(size, var, struct print_entry, TRACE_PRINT);    \
>> +             IF_SIZE(size, var, struct bprint_entry, TRACE_BPRINT);  \
>> +             IF_SIZE(size, var, struct bputs_entry, TRACE_BPUTS);    \
>> +             IF_SIZE(size, var, struct trace_branch, TRACE_BRANCH);  \
>> +             IF_SIZE(size, var, struct ftrace_graph_ent_entry,       \
>> +                     TRACE_GRAPH_ENT);                               \
>> +             IF_SIZE(size, var, struct ftrace_graph_ret_entry,       \
>> +                     TRACE_GRAPH_RET);                               \
>
> I really really dislike this. This is a big if statement that all
> needs to go down one by one. Very inefficient!

Ok, will change to other implementation.


Thanks for your comments,
Chunyan

>
> -- Steve
>
>> +     } while (0)
>> +
>> +/*
>>   * An option specific to a tracer. This is a boolean value.
>>   * The bit is the bit index that sets its value on the
>>   * flags value in struct tracer_flags.
>



More information about the linux-arm-kernel mailing list