[bpf-next v6 4/5] bpf, x86: Emit ENDBR for indirect jump targets

Xu Kuohai xukuohai at huaweicloud.com
Fri Mar 6 19:15:18 PST 2026


On 3/7/2026 9:36 AM, Eduard Zingerman wrote:
> On Fri, 2026-03-06 at 18:23 +0800, Xu Kuohai wrote:
>> From: Xu Kuohai <xukuohai at huawei.com>
>>
>> On CPUs that support CET/IBT, the indirect jump selftest triggers
>> a kernel panic because the indirect jump targets lack ENDBR
>> instructions.
>>
>> To fix it, emit an ENDBR instruction to each indirect jump target. Since
>> the ENDBR instruction shifts the position of original jited instructions,
>> fix the instruction address calculation wherever the addresses are used.
>>
>> For reference, below is a sample panic log.
>>
>>   Missing ENDBR: bpf_prog_2e5f1c71c13ac3e0_big_jump_table+0x97/0xe1
>>   ------------[ cut here ]------------
>>   kernel BUG at arch/x86/kernel/cet.c:133!
>>   Oops: invalid opcode: 0000 [#1] SMP NOPTI
>>
>>   ...
>>
>>    ? 0xffffffffc00fb258
>>    ? bpf_prog_2e5f1c71c13ac3e0_big_jump_table+0x97/0xe1
>>    bpf_prog_test_run_syscall+0x110/0x2f0
>>    ? fdget+0xba/0xe0
>>    __sys_bpf+0xe4b/0x2590
>>    ? __kmalloc_node_track_caller_noprof+0x1c7/0x680
>>    ? bpf_prog_test_run_syscall+0x215/0x2f0
>>    __x64_sys_bpf+0x21/0x30
>>    do_syscall_64+0x85/0x620
>>    ? bpf_prog_test_run_syscall+0x1e2/0x2f0
>>
>> Fixes: 493d9e0d6083 ("bpf, x86: add support for indirect jumps")
>> Signed-off-by: Xu Kuohai <xukuohai at huawei.com>
>> ---
>>   arch/x86/net/bpf_jit_comp.c | 23 +++++++++++++++--------
>>   1 file changed, 15 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
>> index 2c57ee446fc9..752331a64fc0 100644
>> --- a/arch/x86/net/bpf_jit_comp.c
>> +++ b/arch/x86/net/bpf_jit_comp.c
>> @@ -1658,8 +1658,8 @@ static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip,
>>   	return 0;
>>   }
>>   
>> -static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
>> -		  int oldproglen, struct jit_context *ctx, bool jmp_padding)
>> +static int do_jit(struct bpf_verifier_env *env, struct bpf_prog *bpf_prog, int *addrs, u8 *image,
>> +		  u8 *rw_image, int oldproglen, struct jit_context *ctx, bool jmp_padding)
>>   {
>>   	bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
>>   	struct bpf_insn *insn = bpf_prog->insnsi;
>> @@ -1743,6 +1743,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
>>   				dst_reg = X86_REG_R9;
>>   		}
>>   
>> +#ifdef CONFIG_X86_KERNEL_IBT
>> +		if (bpf_insn_is_indirect_target(env, bpf_prog, i - 1))
>> +			EMIT_ENDBR();
>> +#endif
>> +
>>   		switch (insn->code) {
>>   			/* ALU */
>>   		case BPF_ALU | BPF_ADD | BPF_X:
>> @@ -2449,7 +2454,7 @@ st:			if (is_imm8(insn->off))
>>   
>>   			/* call */
>>   		case BPF_JMP | BPF_CALL: {
>> -			u8 *ip = image + addrs[i - 1];
>> +			u8 *ip = image + addrs[i - 1] + (prog - temp);
> 
> Sorry, meant to reply to v5 but got distracted.
> It seems tedious/error prone to have this addend at each location,
> would it be possible to move the 'ip' variable calculation outside
> of the switch? It appears that at each point there would be no
> EMIT invocations between 'ip' computation and usage.
>

Besides the changes shown in this patch, there is another line in the
file computing address using 'image + addrs[i - 1] + (prog - temp)'.

It is at the call to emit_return() in the 'BPF_JMP | BPF_EXIT' case.
But there are indeed EMIT*() invocations before the address copmutation,
so a pre-computed address before the switch statement is stale in
this case.

To fix this, how about introducing a macro for the address computation,
as the following diff (based on this patch) shows:

--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1606,6 +1606,8 @@ static void emit_priv_frame_ptr(u8 **pprog, void __percpu *priv_frame_ptr)

  #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))

+#define CURR_IP      (image + addrs[i - 1] + (prog - temp))
+
  #define __LOAD_TCC_PTR(off)                    \
         EMIT3_off32(0x48, 0x8B, 0x85, off)
  /* mov rax, qword ptr [rbp - rounded_stack_depth - 16] */
@@ -2454,7 +2456,7 @@ st:                       if (is_imm8(insn->off))

                         /* call */
                 case BPF_JMP | BPF_CALL: {
-                       u8 *ip = image + addrs[i - 1] + (prog - temp);
+                       u8 *ip = CURR_IP;

                         func = (u8 *) __bpf_call_base + imm32;
                         if (src_reg == BPF_PSEUDO_CALL && tail_call_reachable) {
@@ -2480,7 +2482,7 @@ st:                       if (is_imm8(insn->off))
                                 emit_bpf_tail_call_direct(bpf_prog,
                                                           &bpf_prog->aux->poke_tab[imm32 - 1],
                                                           &prog,
-                                                         image + addrs[i - 1] + (prog - temp),
+                                                         CURR_IP,
                                                           callee_regs_used,
                                                           stack_depth,
                                                           ctx);
@@ -2489,7 +2491,7 @@ st:                       if (is_imm8(insn->off))
                                                             &prog,
                                                             callee_regs_used,
                                                             stack_depth,
-                                                           image + addrs[i - 1] + (prog - temp),
+                                                           CURR_IP,
                                                             ctx);
                         break;

@@ -2654,8 +2656,7 @@ st:                       if (is_imm8(insn->off))
                         break;

                 case BPF_JMP | BPF_JA | BPF_X:
-                       emit_indirect_jump(&prog, insn->dst_reg,
-                                          image + addrs[i - 1] + (prog - temp));
+                       emit_indirect_jump(&prog, insn->dst_reg, CURR_IP);
                         break;
                 case BPF_JMP | BPF_JA:
                 case BPF_JMP32 | BPF_JA:
@@ -2745,7 +2746,7 @@ st:                       if (is_imm8(insn->off))
                         ctx->cleanup_addr = proglen;
                         if (bpf_prog_was_classic(bpf_prog) &&
                             !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) {
-                               u8 *ip = image + addrs[i - 1] + (prog - temp);
+                               u8 *ip = CURR_IP;

                                 if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog))
                                         return -EINVAL;
@@ -2761,7 +2762,7 @@ st:                       if (is_imm8(insn->off))
                         EMIT1(0xC9);         /* leave */
                         bpf_prog->aux->ksym.fp_end = prog - temp;

-                       emit_return(&prog, image + addrs[i - 1] + (prog - temp));
+                       emit_return(&prog, CURR_IP);
                         break;

Would it make sense?

>>   
>>   			func = (u8 *) __bpf_call_base + imm32;
>>   			if (src_reg == BPF_PSEUDO_CALL && tail_call_reachable) {
>> @@ -2474,7 +2479,8 @@ st:			if (is_imm8(insn->off))
>>   			if (imm32)
>>   				emit_bpf_tail_call_direct(bpf_prog,
>>   							  &bpf_prog->aux->poke_tab[imm32 - 1],
>> -							  &prog, image + addrs[i - 1],
>> +							  &prog,
>> +							  image + addrs[i - 1] + (prog - temp),
>>   							  callee_regs_used,
>>   							  stack_depth,
>>   							  ctx);
>> @@ -2483,7 +2489,7 @@ st:			if (is_imm8(insn->off))
>>   							    &prog,
>>   							    callee_regs_used,
>>   							    stack_depth,
>> -							    image + addrs[i - 1],
>> +							    image + addrs[i - 1] + (prog - temp),
>>   							    ctx);
>>   			break;
>>   
>> @@ -2648,7 +2654,8 @@ st:			if (is_imm8(insn->off))
>>   			break;
>>   
>>   		case BPF_JMP | BPF_JA | BPF_X:
>> -			emit_indirect_jump(&prog, insn->dst_reg, image + addrs[i - 1]);
>> +			emit_indirect_jump(&prog, insn->dst_reg,
>> +					   image + addrs[i - 1] + (prog - temp));
>>   			break;
>>   		case BPF_JMP | BPF_JA:
>>   		case BPF_JMP32 | BPF_JA:
>> @@ -2738,7 +2745,7 @@ st:			if (is_imm8(insn->off))
>>   			ctx->cleanup_addr = proglen;
>>   			if (bpf_prog_was_classic(bpf_prog) &&
>>   			    !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) {
>> -				u8 *ip = image + addrs[i - 1];
>> +				u8 *ip = image + addrs[i - 1] + (prog - temp);
>>   
>>   				if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog))
>>   					return -EINVAL;
>> @@ -3800,7 +3807,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
>>   	for (pass = 0; pass < MAX_PASSES || image; pass++) {
>>   		if (!padding && pass >= PADDING_PASSES)
>>   			padding = true;
>> -		proglen = do_jit(prog, addrs, image, rw_image, oldproglen, &ctx, padding);
>> +		proglen = do_jit(env, prog, addrs, image, rw_image, oldproglen, &ctx, padding);
>>   		if (proglen <= 0) {
>>   out_image:
>>   			image = NULL;




More information about the linux-arm-kernel mailing list