[PATCH 2/7] v2: Turn "zext_thyself" from global into a context var
Shahab Vahedi
list+bpf at vahedi.org
Tue Apr 30 08:09:32 PDT 2024
From: Shahab Vahedi <shahab at synopsys.com>
also:
- Update some comments along the way.
- Refactor the gen_swap()'s "if/else" to present the logic better
- Remove "extern" from the proto-type
---
arch/arc/net/bpf_jit.h | 14 +++++-----
arch/arc/net/bpf_jit_arcv2.c | 51 ++++++++++++++++++------------------
arch/arc/net/bpf_jit_core.c | 25 +++++++++---------
3 files changed, 44 insertions(+), 46 deletions(-)
diff --git a/arch/arc/net/bpf_jit.h b/arch/arc/net/bpf_jit.h
index ecad47b8b796..9fc70d97415b 100644
--- a/arch/arc/net/bpf_jit.h
+++ b/arch/arc/net/bpf_jit.h
@@ -37,10 +37,6 @@
*/
#define BUF(b, n) (((b) != NULL) ? ((b) + (n)) : (b))
-/************* Globals that have effects on code generation ***********/
-/* An indicator if zero-extend must be done for the 32-bit operations. */
-extern bool zext_thyself;
-
/************** Functions that the back-end must provide **************/
/* Extension for 32-bit operations. */
extern inline u8 zext(u8 *buf, u8 rd);
@@ -156,10 +152,12 @@ extern u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 c_off, u32 t_off);
extern u8 gen_func_call(u8 *buf, ARC_ADDR func_addr, bool external_func);
extern u8 arc_to_bpf_return(u8 *buf);
/*
- * Perform byte swaps on "rd" based on the "size". If "force" is
- * set to "true", do it unconditionally. Otherwise, consider the
- * desired "endian"ness and the host endianness.
+ * - Perform byte swaps on "rd" based on the "size".
+ * - If "force" is set, do it unconditionally. Otherwise, consider the
+ * desired "endian"ness and the host endianness.
+ * - For data "size"s up to 32 bits, perform a zero-extension if asked
+ * by the "do_zext" boolean.
*/
-extern u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force);
+u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext);
#endif /* _ARC_BPF_JIT_H */
diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c
index b9e803f04a36..8b7ae2f11f38 100644
--- a/arch/arc/net/bpf_jit_arcv2.c
+++ b/arch/arc/net/bpf_jit_arcv2.c
@@ -1305,7 +1305,7 @@ static u8 arc_b(u8 *buf, s32 offset)
inline u8 zext(u8 *buf, u8 rd)
{
- if (zext_thyself && rd != BPF_REG_FP)
+ if (rd != BPF_REG_FP)
return arc_movi_r(buf, REG_HI(rd), 0);
else
return 0;
@@ -2198,7 +2198,7 @@ u8 arsh_r64_i32(u8 *buf, u8 rd, s32 imm)
return len;
}
-u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force)
+u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext)
{
u8 len = 0;
#ifdef __BIG_ENDIAN
@@ -2206,36 +2206,19 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force)
#else
const u8 host_endian = BPF_FROM_LE;
#endif
- /*
- * If the same endianness, there's not much to do other
- * than zeroing out the upper bytes based on the "size".
- */
- if ((force == false) && (host_endian == endian)) {
- switch (size) {
- case 16:
- len += arc_and_i(BUF(buf, len), REG_LO(rd), 0xffff);
- fallthrough;
- case 32:
- len += zext(BUF(buf, len), rd);
- fallthrough;
- case 64:
- break;
- default:
- /* The caller must have handled this. */
- }
- } else {
+ if (host_endian != endian || force) {
switch (size) {
case 16:
/*
* r = B4B3_B2B1 << 16 --> r = B2B1_0000
- * swape(r) is 0000_B1B2
+ * then, swape(r) would become the desired 0000_B1B2
*/
- len += arc_asli_r(BUF(buf, len),
- REG_LO(rd), REG_LO(rd), 16);
+ len = arc_asli_r(buf, REG_LO(rd), REG_LO(rd), 16);
fallthrough;
case 32:
len += arc_swape_r(BUF(buf, len), REG_LO(rd));
- len += zext(BUF(buf, len), rd);
+ if (do_zext)
+ len += zext(BUF(buf, len), rd);
break;
case 64:
/*
@@ -2245,7 +2228,7 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force)
* hi ^= lo;
* and then swap the bytes in "hi" and "lo".
*/
- len += arc_xor_r(BUF(buf, len), REG_HI(rd), REG_LO(rd));
+ len = arc_xor_r(buf, REG_HI(rd), REG_LO(rd));
len += arc_xor_r(BUF(buf, len), REG_LO(rd), REG_HI(rd));
len += arc_xor_r(BUF(buf, len), REG_HI(rd), REG_LO(rd));
len += arc_swape_r(BUF(buf, len), REG_LO(rd));
@@ -2254,6 +2237,24 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force)
default:
/* The caller must have handled this. */
}
+ } else {
+ /*
+ * If the same endianness, there's not much to do other
+ * than zeroing out the upper bytes based on the "size".
+ */
+ switch (size) {
+ case 16:
+ len = arc_and_i(buf, REG_LO(rd), 0xffff);
+ fallthrough;
+ case 32:
+ if (do_zext)
+ len += zext(BUF(buf, len), rd);
+ break;
+ case 64:
+ break;
+ default:
+ /* The caller must have handled this. */
+ }
}
return len;
diff --git a/arch/arc/net/bpf_jit_core.c b/arch/arc/net/bpf_jit_core.c
index eea1a469a195..79ec0bbf1153 100644
--- a/arch/arc/net/bpf_jit_core.c
+++ b/arch/arc/net/bpf_jit_core.c
@@ -8,9 +8,6 @@
#include <asm/bug.h>
#include "bpf_jit.h"
-/* Sane initial values for the globals */
-bool zext_thyself = true;
-
/*
* Check for the return value. A pattern used oftenly in this file.
* There must be a "ret" variable of type "int" in the scope.
@@ -86,6 +83,7 @@ struct arc_jit_data {
* jit: The JIT buffer and its length.
* bpf_header: The JITed program header. "jit.buf" points inside it.
* emit: If set, opcodes are written to memory; else, a dry-run.
+ * do_zext: If true, 32-bit sub-regs must be zero extended.
* bpf2insn: Maps BPF insn indices to their counterparts in jit.buf.
* bpf2insn_valid: Indicates if "bpf2ins" is populated with the mappings.
* jit_data: A piece of memory to transfer data to the next pass.
@@ -105,6 +103,7 @@ struct jit_context {
struct jit_buffer jit;
struct bpf_binary_header *bpf_header;
bool emit;
+ bool do_zext;
u32 *bpf2insn;
bool bpf2insn_valid;
struct arc_jit_data *jit_data;
@@ -185,7 +184,7 @@ static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
ctx->success = false;
/* If the verifier doesn't zero-extend, then we have to do it. */
- zext_thyself = !ctx->prog->aux->verifier_zext;
+ ctx->do_zext = !ctx->prog->aux->verifier_zext;
return 0;
}
@@ -250,8 +249,7 @@ static void jit_ctx_cleanup(struct jit_context *ctx)
}
ctx->emit = false;
- /* Global booleans set to false. */
- zext_thyself = false;
+ ctx->do_zext = false;
}
/*
@@ -415,7 +413,7 @@ static inline void set_need_for_extra_pass(struct jit_context *ctx)
* the back-end for the swap.
*/
static int handle_swap(u8 *buf, u8 rd, u8 size, u8 endian,
- bool force, u8 *len)
+ bool force, bool do_zext, u8 *len)
{
/* Sanity check on the size. */
switch (size) {
@@ -428,7 +426,7 @@ static int handle_swap(u8 *buf, u8 rd, u8 size, u8 endian,
return -EINVAL;
}
- *len = gen_swap(buf, rd, size, endian, force);
+ *len = gen_swap(buf, rd, size, endian, force, do_zext);
return 0;
}
@@ -866,7 +864,7 @@ static int handle_insn(struct jit_context *ctx, u32 idx)
case BPF_ALU64 | BPF_END | BPF_FROM_LE: {
CHECK_RET(handle_swap(buf, dst, imm, BPF_SRC(code),
BPF_CLASS(code) == BPF_ALU64,
- &len));
+ ctx->do_zext, &len));
break;
}
/* dst += src (64-bit) */
@@ -1049,11 +1047,12 @@ static int handle_insn(struct jit_context *ctx, u32 idx)
if (BPF_CLASS(code) == BPF_ALU) {
/*
- * Even 64-bit swaps are of type BPF_ALU (and not BPF_ALU64).
- * Therefore, the routine responsible for "swap" specifically
- * takes care of calling "zext()" based on the input "size".
+ * Skip the "swap" instructions. Even 64-bit swaps are of type
+ * BPF_ALU (and not BPF_ALU64). Therefore, for the swaps, one
+ * has to look at the "size" of the operations rather than the
+ * ALU type. "gen_swap()" specifically takes care of that.
*/
- if (BPF_OP(code) != BPF_END)
+ if (BPF_OP(code) != BPF_END && ctx->do_zext)
len += zext(BUF(buf, len), dst);
}
--
2.35.8
More information about the linux-snps-arc
mailing list