[PATCH v3 3/7] riscv: ftrace: prepare ftrace for atomic code patching

Björn Töpel bjorn at kernel.org
Fri Dec 6 02:02:29 PST 2024


Adding Robbin for input, who's doing much more crazy text patching in
JVM, than what we do in the kernel. ;-)

Andy Chiu <andybnac at gmail.com> writes:

> From: Andy Chiu <andy.chiu at sifive.com>
>
> We use an AUIPC+JALR pair to jump into a ftrace trampoline. Since
> instruction fetch can break down to 4 byte at a time, it is impossible
> to update two instructions without a race. In order to mitigate it, we
> initialize the patchable entry to AUIPC + NOP4. Then, the run-time code
> patching can change NOP4 to JALR to eable/disable ftrcae from a
> function. This limits the reach of each ftrace entry to +-2KB displacing
> from ftrace_caller.
>
> Starting from the trampoline, we add a level of indirection for it to
> reach ftrace caller target. Now, it loads the target address from a
> memory location, then perform the jump. This enable the kernel to update
> the target atomically.
>
> The ordering of reading/updating the targert address should be guarded
> by generic ftrace code, where it sends smp_rmb ipi.

Let's say we're tracing "f". Previously w/ stop_machine() it was
something like:

f:
1: nop
   nop
   ...
   ...

ftrace_caller:
   ...
   auipc	a2, function_trace_op
   ld		a2, function_trace_op(a2)
   ...
2: auipc    ra, ftrace_stub
   jalr     ftrace_stub(ra)

The text was patched by ftrace in 1 and 2.

...and now:
f:
   auipc    t0, ftrace_caller
A: nop
   ...
   ...

ftrace_caller:
   ...
   auipc    a2, function_trace_op
   ld       a2, function_trace_op(a2)
   ...
   auipc    ra, ftrace_call_dest
   ld       ra, ftrace_call_dest(ra)
   jalr     ra 

The text is only patched in A, and the tracer func is loaded via
ftrace_call_dest.

Today, when we enable trace "f" the following is done by ftrace:
  Text patch 2: call ftrace_stub -> call arch_ftrace_ops_list_func
  Text patch 1: nop,nop -> call ftrace_caller
  store function_trace_op
  smp_wmb()
  IPI: smp_rmb()
  Text patch 2: call arch_ftrace_ops_list_func -> call function_trace_call

Disable, would be something like:
  Text patch 2: call function_trace_call -> call arch_ftrace_ops_list_func
  Text patch 1: call ftrace_caller -> nop,nop
  store function_trace_op
  smp_wmb()
  IPI: smp_rmb()
  Text patch 2: call arch_ftrace_ops_list_func -> call ftrace_stub

Now with this change, enable would be:
  store ftrace_call_dest (was: Text patch 2: call ftrace_stub -> call arch_ftrace_ops_list_func)
  <<ORDERING>>
  Text patch A: nop -> jalr ftrace_caller(t0)
  store function_trace_op
  smp_wmb()
  IPI: smp_rmb()
  store ftrace_call_dest (was: Text patch 2: call arch_ftrace_ops_list_func -> call function_trace_call)

Seems like we're missing some data ordering in "<<ORDERING>>", wrt to
the text patching A (The arch specific ftrace_update_ftrace_func())? Or
are we OK with reordering there? Maybe add what's done for
function_trace_op?

[...]

> Signed-off-by: Andy Chiu <andy.chiu at sifive.com>
> ---
>  arch/riscv/include/asm/ftrace.h |  4 ++
>  arch/riscv/kernel/ftrace.c      | 80 +++++++++++++++++++++------------
>  arch/riscv/kernel/mcount-dyn.S  |  9 ++--
>  3 files changed, 62 insertions(+), 31 deletions(-)
>
> diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
> index 4ca7ce7f34d7..36734d285aad 100644
> --- a/arch/riscv/include/asm/ftrace.h
> +++ b/arch/riscv/include/asm/ftrace.h
> @@ -80,6 +80,7 @@ struct dyn_arch_ftrace {
>  #define JALR_T0			(0x000282e7)
>  #define AUIPC_T0		(0x00000297)
>  #define NOP4			(0x00000013)
> +#define JALR_RANGE		(JALR_SIGN_MASK - 1)
>  
>  #define to_jalr_t0(offset)						\
>  	(((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0)
> @@ -117,6 +118,9 @@ do {									\
>   * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here.
>   */
>  #define MCOUNT_INSN_SIZE 8
> +#define MCOUNT_AUIPC_SIZE	4
> +#define MCOUNT_JALR_SIZE	4
> +#define MCOUNT_NOP4_SIZE	4

Align please.

>  
>  #ifndef __ASSEMBLY__
>  struct dyn_ftrace;
> diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
> index 4b95c574fd04..5ebe412280ef 100644
> --- a/arch/riscv/kernel/ftrace.c
> +++ b/arch/riscv/kernel/ftrace.c
> @@ -64,42 +64,64 @@ static int ftrace_check_current_call(unsigned long hook_pos,
>  	return 0;
>  }
>  
> -static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
> -				bool enable, bool ra)
> +static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, bool validate)

While we're updating this function; Can we rename hook_pos to something
that makes sense from an ftrace perspective?

>  {
>  	unsigned int call[2];
> -	unsigned int nops[2] = {NOP4, NOP4};
> +	unsigned int replaced[2];
> +
> +	make_call_t0(hook_pos, target, call);
>  
> -	if (ra)
> -		make_call_ra(hook_pos, target, call);
> -	else
> -		make_call_t0(hook_pos, target, call);
> +	if (validate) {
> +		/*
> +		 * Read the text we want to modify;
> +		 * return must be -EFAULT on read error
> +		 */
> +		if (copy_from_kernel_nofault(replaced, (void *)hook_pos,
> +					     MCOUNT_INSN_SIZE))

Don't wrap this line.

> +			return -EFAULT;
> +
> +		if (replaced[0] != call[0]) {
> +			pr_err("%p: expected (%08x) but got (%08x)\n",
> +			       (void *)hook_pos, call[0], replaced[0]);
> +			return -EINVAL;
> +		}
> +	}
>  
> -	/* Replace the auipc-jalr pair at once. Return -EPERM on write error. */
> -	if (patch_insn_write((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE))
> +	/* Replace the jalr at once. Return -EPERM on write error. */
> +	if (patch_insn_write((void *)(hook_pos + MCOUNT_AUIPC_SIZE), call + 1, MCOUNT_JALR_SIZE))
>  		return -EPERM;
>  
>  	return 0;
>  }
>  
> -int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> +static int __ftrace_modify_call_site(ftrace_func_t *hook_pos, ftrace_func_t target, bool enable)
>  {
> -	unsigned int call[2];
> +	ftrace_func_t call = target;
> +	ftrace_func_t nops = &ftrace_stub;

Confusing to call nops. This is not nops. This is the ftrace_stub. Also
the __ftrace_modify_call_site is not super clear to me. Maybe just ditch
the enable flag, and have two functions? Or just or inline it?

>  
> -	make_call_t0(rec->ip, addr, call);
> -
> -	if (patch_insn_write((void *)rec->ip, call, MCOUNT_INSN_SIZE))
> -		return -EPERM;
> +	WRITE_ONCE(*hook_pos, enable ? call : nops);
>  
>  	return 0;
>  }
>  
> +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> +{
> +	unsigned long distance, orig_addr;
> +
> +	orig_addr = (unsigned long)&ftrace_caller;
> +	distance = addr > orig_addr ? addr - orig_addr : orig_addr - addr;
> +	if (distance > JALR_RANGE)
> +		return -EINVAL;
> +
> +	return __ftrace_modify_call(rec->ip, addr, false);
> +}
> +
>  int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
>  		    unsigned long addr)
>  {
> -	unsigned int nops[2] = {NOP4, NOP4};
> +	unsigned int nops[1] = {NOP4};

It's just one nop, not nops. No biggie, but why array?

>  
> -	if (patch_insn_write((void *)rec->ip, nops, MCOUNT_INSN_SIZE))
> +	if (patch_insn_write((void *)(rec->ip + MCOUNT_AUIPC_SIZE), nops, MCOUNT_NOP4_SIZE))
>  		return -EPERM;
>  
>  	return 0;
> @@ -114,21 +136,23 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
>   */
>  int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
>  {
> +	unsigned int nops[2];
>  	int out;
>  
> +	make_call_t0(rec->ip, &ftrace_caller, nops);
> +	nops[1] = NOP4;
> +
>  	mutex_lock(&text_mutex);
> -	out = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
> +	out = patch_insn_write((void *)rec->ip, nops, MCOUNT_INSN_SIZE);
>  	mutex_unlock(&text_mutex);
>  
>  	return out;
>  }
>  
> +ftrace_func_t ftrace_call_dest = ftrace_stub;
>  int ftrace_update_ftrace_func(ftrace_func_t func)
>  {
> -	int ret = __ftrace_modify_call((unsigned long)&ftrace_call,
> -				       (unsigned long)func, true, true);
> -
> -	return ret;
> +	return __ftrace_modify_call_site(&ftrace_call_dest, func, true);
>  }
>  
>  struct ftrace_modify_param {
> @@ -182,7 +206,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
>  	if (ret)
>  		return ret;
>  
> -	return __ftrace_modify_call(caller, addr, true, false);
> +	return __ftrace_modify_call(caller, addr, true);
>  }
>  #endif
>  
> @@ -217,17 +241,17 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
>  	prepare_ftrace_return(&fregs->ra, ip, fregs->s0);
>  }
>  #else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
> -extern void ftrace_graph_call(void);
> +ftrace_func_t ftrace_graph_call_dest = ftrace_stub;
>  int ftrace_enable_ftrace_graph_caller(void)
>  {
> -	return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
> -				    (unsigned long)&prepare_ftrace_return, true, true);
> +	return __ftrace_modify_call_site(&ftrace_graph_call_dest,
> +					 &prepare_ftrace_return, true);
>  }
>  
>  int ftrace_disable_ftrace_graph_caller(void)
>  {
> -	return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
> -				    (unsigned long)&prepare_ftrace_return, false, true);
> +	return __ftrace_modify_call_site(&ftrace_graph_call_dest,
> +					 &prepare_ftrace_return, false);
>  }
>  #endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
>  #endif /* CONFIG_DYNAMIC_FTRACE */
> diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
> index e988bd26b28b..bc06e8ab81cf 100644
> --- a/arch/riscv/kernel/mcount-dyn.S
> +++ b/arch/riscv/kernel/mcount-dyn.S
> @@ -162,7 +162,8 @@ SYM_FUNC_START(ftrace_caller)
>  	mv	a3, sp
>  
>  SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
> -	call	ftrace_stub
> +	REG_L	ra, ftrace_call_dest
> +	jalr	0(ra)
>  
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
>  	addi	a0, sp, ABI_RA
> @@ -172,7 +173,8 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
>  	mv	a2, s0
>  #endif
>  SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
> -	call	ftrace_stub
> +	REG_L	ra, ftrace_graph_call_dest
> +	jalr	0(ra)
>  #endif
>  	RESTORE_ABI
>  	jr	t0
> @@ -185,7 +187,8 @@ SYM_FUNC_START(ftrace_caller)
>  	PREPARE_ARGS
>  
>  SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)

Not used, please remove.

> -	call	ftrace_stub
> +	REG_L	ra, ftrace_call_dest
> +	jalr	0(ra)
>  
>  	RESTORE_ABI_REGS
>  	bnez	t1, .Ldirect
> -- 
> 2.39.3 (Apple Git-145)



Björn



More information about the linux-riscv mailing list