[PATCH v3 2/4] fprobe: make fprobe_kprobe_handler recursion free

Yafang Shao laoar.shao at gmail.com
Wed Jun 28 00:16:47 PDT 2023


On Wed, May 17, 2023 at 11:45 AM Ze Gao <zegao2021 at gmail.com> wrote:
>
> Current implementation calls kprobe related functions before doing
> ftrace recursion check in fprobe_kprobe_handler, which opens door
> to kernel crash due to stack recursion if preempt_count_{add, sub}
> is traceable in kprobe_busy_{begin, end}.
>
> Things goes like this without this patch quoted from Steven:
> "
> fprobe_kprobe_handler() {
>    kprobe_busy_begin() {
>       preempt_disable() {
>          preempt_count_add() {  <-- trace
>             fprobe_kprobe_handler() {
>                 [ wash, rinse, repeat, CRASH!!! ]
> "
>
> By refactoring the common part out of fprobe_kprobe_handler and
> fprobe_handler and call ftrace recursion detection at the very beginning,
> the whole fprobe_kprobe_handler is free from recursion.
>
> Signed-off-by: Ze Gao <zegao at tencent.com>
> Acked-by: Masami Hiramatsu (Google) <mhiramat at kernel.org>
> Link: https://lore.kernel.org/linux-trace-kernel/20230516071830.8190-3-zegao@tencent.com
> ---
>  kernel/trace/fprobe.c | 59 ++++++++++++++++++++++++++++++++-----------
>  1 file changed, 44 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> index 9abb3905bc8e..097c740799ba 100644
> --- a/kernel/trace/fprobe.c
> +++ b/kernel/trace/fprobe.c
> @@ -20,30 +20,22 @@ struct fprobe_rethook_node {
>         char data[];
>  };
>
> -static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> -                          struct ftrace_ops *ops, struct ftrace_regs *fregs)
> +static inline void __fprobe_handler(unsigned long ip, unsigned long
> +               parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs)
>  {
>         struct fprobe_rethook_node *fpr;
>         struct rethook_node *rh = NULL;
>         struct fprobe *fp;
>         void *entry_data = NULL;
> -       int bit, ret;
> +       int ret;
>
>         fp = container_of(ops, struct fprobe, ops);
> -       if (fprobe_disabled(fp))
> -               return;
> -
> -       bit = ftrace_test_recursion_trylock(ip, parent_ip);
> -       if (bit < 0) {
> -               fp->nmissed++;
> -               return;
> -       }
>
>         if (fp->exit_handler) {
>                 rh = rethook_try_get(fp->rethook);
>                 if (!rh) {
>                         fp->nmissed++;
> -                       goto out;
> +                       return;
>                 }
>                 fpr = container_of(rh, struct fprobe_rethook_node, node);
>                 fpr->entry_ip = ip;
> @@ -61,23 +53,60 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
>                 else
>                         rethook_hook(rh, ftrace_get_regs(fregs), true);
>         }
> -out:
> +}
> +
> +static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> +               struct ftrace_ops *ops, struct ftrace_regs *fregs)
> +{
> +       struct fprobe *fp;
> +       int bit;
> +
> +       fp = container_of(ops, struct fprobe, ops);
> +       if (fprobe_disabled(fp))
> +               return;
> +
> +       /* recursion detection has to go before any traceable function and
> +        * all functions before this point should be marked as notrace
> +        */
> +       bit = ftrace_test_recursion_trylock(ip, parent_ip);
> +       if (bit < 0) {
> +               fp->nmissed++;
> +               return;
> +       }
> +       __fprobe_handler(ip, parent_ip, ops, fregs);
>         ftrace_test_recursion_unlock(bit);
> +
>  }
>  NOKPROBE_SYMBOL(fprobe_handler);
>
>  static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
>                                   struct ftrace_ops *ops, struct ftrace_regs *fregs)
>  {
> -       struct fprobe *fp = container_of(ops, struct fprobe, ops);
> +       struct fprobe *fp;
> +       int bit;
> +
> +       fp = container_of(ops, struct fprobe, ops);
> +       if (fprobe_disabled(fp))
> +               return;
> +
> +       /* recursion detection has to go before any traceable function and
> +        * all functions called before this point should be marked as notrace
> +        */
> +       bit = ftrace_test_recursion_trylock(ip, parent_ip);
> +       if (bit < 0) {
> +               fp->nmissed++;
> +               return;
> +       }
>
>         if (unlikely(kprobe_running())) {
>                 fp->nmissed++;

I have just looked through this patchset, just out of curiosity,
shouldn't we call ftrace_test_recursion_unlock(bit) here ?
We have already locked it successfully, so why should we not unlock it?

>                 return;
>         }
> +
>         kprobe_busy_begin();
> -       fprobe_handler(ip, parent_ip, ops, fregs);
> +       __fprobe_handler(ip, parent_ip, ops, fregs);
>         kprobe_busy_end();
> +       ftrace_test_recursion_unlock(bit);
>  }
>
>  static void fprobe_exit_handler(struct rethook_node *rh, void *data,
> --
> 2.40.1
>
>


-- 
Regards
Yafang



More information about the linux-riscv mailing list