[PATCH v5 7/9] riscv/kprobe: Prepare detour buffer for optimized kprobe

liaochang (A) liaochang1 at huawei.com
Wed Jan 4 00:35:22 PST 2023



在 2023/1/3 2:04, Björn Töpel 写道:
> Chen Guokai <chenguokai17 at mails.ucas.ac.cn> writes:
> 
>> From: Liao Chang <liaochang1 at huawei.com>
> 
>> diff --git a/arch/riscv/kernel/probes/opt.c b/arch/riscv/kernel/probes/opt.c
>> index 258a283c906d..bc232fce5b39 100644
>> --- a/arch/riscv/kernel/probes/opt.c
>> +++ b/arch/riscv/kernel/probes/opt.c
>> @@ -11,9 +11,37 @@
>>  #include <linux/kprobes.h>
>>  #include <asm/kprobes.h>
>>  #include <asm/patch.h>
>> +#include <asm/asm-offsets.h>
>>  
>>  #include "simulate-insn.h"
>>  #include "decode-insn.h"
>> +#include "../../net/bpf_jit.h"
>> +
>> +static void
> 
> Super-nit, but I really prefer *not* breaking function name and return
> value, for grepability.

OK, i will keep function name and return at the same line.

> 
>> diff --git a/arch/riscv/kernel/probes/opt_trampoline.S b/arch/riscv/kernel/probes/opt_trampoline.S
>> index 16160c4367ff..75e34e373cf2 100644
>> --- a/arch/riscv/kernel/probes/opt_trampoline.S
>> +++ b/arch/riscv/kernel/probes/opt_trampoline.S
>> @@ -1,12 +1,137 @@
>>  /* SPDX-License-Identifier: GPL-2.0-only */
>>  /*
>>   * Copyright (C) 2022 Guokai Chen
>> + * Copyright (C) 2022 Liao, Chang <liaochang1 at huawei.com>
>>   */
>>  
>>  #include <linux/linkage.h>
>>  
>> +#include <asm/asm.h>
>>  #incldue <asm/csr.h>
>>  #include <asm/asm-offsets.h>
>>  
>>  SYM_ENTRY(optprobe_template_entry, SYM_L_GLOBAL, SYM_A_NONE)
>> +	addi  sp, sp, -(PT_SIZE_ON_STACK)
>> +	REG_S x1,  PT_RA(sp)
>> +	REG_S x2,  PT_SP(sp)
>> +	REG_S x3,  PT_GP(sp)
>> +	REG_S x4,  PT_TP(sp)
>> +	REG_S x5,  PT_T0(sp)
>> +	REG_S x6,  PT_T1(sp)
>> +	REG_S x7,  PT_T2(sp)
>> +	REG_S x8,  PT_S0(sp)
>> +	REG_S x9,  PT_S1(sp)
>> +	REG_S x10, PT_A0(sp)
>> +	REG_S x11, PT_A1(sp)
>> +	REG_S x12, PT_A2(sp)
>> +	REG_S x13, PT_A3(sp)
>> +	REG_S x14, PT_A4(sp)
>> +	REG_S x15, PT_A5(sp)
>> +	REG_S x16, PT_A6(sp)
>> +	REG_S x17, PT_A7(sp)
>> +	REG_S x18, PT_S2(sp)
>> +	REG_S x19, PT_S3(sp)
>> +	REG_S x20, PT_S4(sp)
>> +	REG_S x21, PT_S5(sp)
>> +	REG_S x22, PT_S6(sp)
>> +	REG_S x23, PT_S7(sp)
>> +	REG_S x24, PT_S8(sp)
>> +	REG_S x25, PT_S9(sp)
>> +	REG_S x26, PT_S10(sp)
>> +	REG_S x27, PT_S11(sp)
>> +	REG_S x28, PT_T3(sp)
>> +	REG_S x29, PT_T4(sp)
>> +	REG_S x30, PT_T5(sp)
>> +	REG_S x31, PT_T6(sp)
>> +	/* Update fp is friendly for stacktrace */
>> +	addi  s0, sp, (PT_SIZE_ON_STACK)
>> +	j 1f
>> +
>> +SYM_ENTRY(optprobe_template_save, SYM_L_GLOBAL, SYM_A_NONE)
>> +	/*
>> +	 * Step1:
>> +	 * Filled with the pointer to optimized_kprobe data
>> +	 */
>> +	.dword 0
>> +1:
>> +	/* Load optimize_kprobe pointer from .dword below */
>> +	auipc a0, 0
>> +	REG_L a0, -8(a0)
>> +	add   a1, sp, x0
>> +
>> +SYM_ENTRY(optprobe_template_call, SYM_L_GLOBAL, SYM_A_NONE)
>> +	/*
>> +	 * Step2:
>> +	 * <IMME> of AUIPC/JALR are modified to the offset to optimized_callback
>> +	 * jump target is loaded from above .dword.
>> +	 */
>> +	auipc ra, 0
>> +	jalr  ra, 0(ra)
>> +
>> +	REG_L x1,  PT_RA(sp)
>> +	REG_L x3,  PT_GP(sp)
>> +	REG_L x4,  PT_TP(sp)
>> +	REG_L x5,  PT_T0(sp)
>> +	REG_L x6,  PT_T1(sp)
>> +	REG_L x7,  PT_T2(sp)
>> +	REG_L x8,  PT_S0(sp)
>> +	REG_L x9,  PT_S1(sp)
>> +	REG_L x10, PT_A0(sp)
>> +	REG_L x11, PT_A1(sp)
>> +	REG_L x12, PT_A2(sp)
>> +	REG_L x13, PT_A3(sp)
>> +	REG_L x14, PT_A4(sp)
>> +	REG_L x15, PT_A5(sp)
>> +	REG_L x16, PT_A6(sp)
>> +	REG_L x17, PT_A7(sp)
>> +	REG_L x18, PT_S2(sp)
>> +	REG_L x19, PT_S3(sp)
>> +	REG_L x20, PT_S4(sp)
>> +	REG_L x21, PT_S5(sp)
>> +	REG_L x22, PT_S6(sp)
>> +	REG_L x23, PT_S7(sp)
>> +	REG_L x24, PT_S8(sp)
>> +	REG_L x25, PT_S9(sp)
>> +	REG_L x26, PT_S10(sp)
>> +	REG_L x27, PT_S11(sp)
>> +	REG_L x28, PT_T3(sp)
>> +	REG_L x29, PT_T4(sp)
>> +	REG_L x30, PT_T5(sp)
>> +	REG_L x31, PT_T6(sp)
>> +	REG_L x2,  PT_SP(sp)
>> +	addi  sp, sp, (PT_SIZE_ON_STACK)
>> +
>> +SYM_ENTRY(optprobe_template_insn, SYM_L_GLOBAL, SYM_A_NONE)
>> +	/*
>> +	 * Step3:
>> +	 * NOPS will be replaced by the probed instruction, at worst case 3 RVC
>> +	 * and 1 RVI instructions is about to execute out of line.
>> +	 */
>> +	nop
> 
> A nop here will be either a compressed nop or a non-compressed,
> depending on the build (C-enabled or not), right? Maybe be explicit to
> the assembler what you want?
> 

You are right, if CONFIG_RISCV_ISA_C is disabled, two NOP is enough for 2 RVI execute out of line,
if CONFIG_RISCV_ISA_C is enabled, it needs eight C.NOP here for the worst case (3 RVC + 1 RVI).

I will use {C}.NOP explicitly for different configure in next revision, thanks.

> 
> Björn

-- 
BR,
Liao, Chang



More information about the linux-riscv mailing list