[PATCH 2/7] v2: Turn "zext_thyself" from global into a context var

Shahab Vahedi list+bpf at vahedi.org
Tue Apr 30 08:09:32 PDT 2024


From: Shahab Vahedi <shahab at synopsys.com>

also:

- Update some comments along the way.
- Refactor the gen_swap()'s "if/else" to present the logic better
- Remove "extern" from the proto-type
---
 arch/arc/net/bpf_jit.h       | 14 +++++-----
 arch/arc/net/bpf_jit_arcv2.c | 51 ++++++++++++++++++------------------
 arch/arc/net/bpf_jit_core.c  | 25 +++++++++---------
 3 files changed, 44 insertions(+), 46 deletions(-)

diff --git a/arch/arc/net/bpf_jit.h b/arch/arc/net/bpf_jit.h
index ecad47b8b796..9fc70d97415b 100644
--- a/arch/arc/net/bpf_jit.h
+++ b/arch/arc/net/bpf_jit.h
@@ -37,10 +37,6 @@
  */
 #define BUF(b, n) (((b) != NULL) ? ((b) + (n)) : (b))
 
-/************* Globals that have effects on code generation ***********/
-/* An indicator if zero-extend must be done for the 32-bit operations. */
-extern bool zext_thyself;
-
 /************** Functions that the back-end must provide **************/
 /* Extension for 32-bit operations. */
 extern inline u8 zext(u8 *buf, u8 rd);
@@ -156,10 +152,12 @@ extern u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 c_off, u32 t_off);
 extern u8 gen_func_call(u8 *buf, ARC_ADDR func_addr, bool external_func);
 extern u8 arc_to_bpf_return(u8 *buf);
 /*
- * Perform byte swaps on "rd" based on the "size". If "force" is
- * set to "true", do it unconditionally. Otherwise, consider the
- * desired "endian"ness and the host endianness.
+ * - Perform byte swaps on "rd" based on the "size".
+ * - If "force" is set, do it unconditionally. Otherwise, consider the
+ *   desired "endian"ness and the host endianness.
+ * - For data "size"s up to 32 bits, perform a zero-extension if asked
+ *   by the "do_zext" boolean.
  */
-extern u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force);
+u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext);
 
 #endif /* _ARC_BPF_JIT_H */
diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c
index b9e803f04a36..8b7ae2f11f38 100644
--- a/arch/arc/net/bpf_jit_arcv2.c
+++ b/arch/arc/net/bpf_jit_arcv2.c
@@ -1305,7 +1305,7 @@ static u8 arc_b(u8 *buf, s32 offset)
 
 inline u8 zext(u8 *buf, u8 rd)
 {
-	if (zext_thyself && rd != BPF_REG_FP)
+	if (rd != BPF_REG_FP)
 		return arc_movi_r(buf, REG_HI(rd), 0);
 	else
 		return 0;
@@ -2198,7 +2198,7 @@ u8 arsh_r64_i32(u8 *buf, u8 rd, s32 imm)
 	return len;
 }
 
-u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force)
+u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext)
 {
 	u8 len = 0;
 #ifdef __BIG_ENDIAN
@@ -2206,36 +2206,19 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force)
 #else
 	const u8 host_endian = BPF_FROM_LE;
 #endif
-	/*
-	 * If the same endianness, there's not much to do other
-	 * than zeroing out the upper bytes based on the "size".
-	 */
-	if ((force == false) && (host_endian == endian)) {
-		switch (size) {
-		case 16:
-			len += arc_and_i(BUF(buf, len), REG_LO(rd), 0xffff);
-			fallthrough;
-		case 32:
-			len += zext(BUF(buf, len), rd);
-			fallthrough;
-		case 64:
-			break;
-		default:
-			/* The caller must have handled this. */
-		}
-	} else {
+	if (host_endian != endian || force) {
 		switch (size) {
 		case 16:
 			/*
 			 * r = B4B3_B2B1 << 16 --> r = B2B1_0000
-			 * swape(r) is 0000_B1B2
+			 * then, swape(r) would become the desired 0000_B1B2
 			 */
-			len += arc_asli_r(BUF(buf, len),
-					  REG_LO(rd), REG_LO(rd), 16);
+			len = arc_asli_r(buf, REG_LO(rd), REG_LO(rd), 16);
 			fallthrough;
 		case 32:
 			len += arc_swape_r(BUF(buf, len), REG_LO(rd));
-			len += zext(BUF(buf, len), rd);
+			if (do_zext)
+				len += zext(BUF(buf, len), rd);
 			break;
 		case 64:
 			/*
@@ -2245,7 +2228,7 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force)
 			 *   hi ^= lo;
 			 * and then swap the bytes in "hi" and "lo".
 			 */
-			len += arc_xor_r(BUF(buf, len), REG_HI(rd), REG_LO(rd));
+			len  = arc_xor_r(buf, REG_HI(rd), REG_LO(rd));
 			len += arc_xor_r(BUF(buf, len), REG_LO(rd), REG_HI(rd));
 			len += arc_xor_r(BUF(buf, len), REG_HI(rd), REG_LO(rd));
 			len += arc_swape_r(BUF(buf, len), REG_LO(rd));
@@ -2254,6 +2237,24 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force)
 		default:
 			/* The caller must have handled this. */
 		}
+	} else {
+		/*
+		 * If the same endianness, there's not much to do other
+		 * than zeroing out the upper bytes based on the "size".
+		 */
+		switch (size) {
+		case 16:
+			len = arc_and_i(buf, REG_LO(rd), 0xffff);
+			fallthrough;
+		case 32:
+			if (do_zext)
+				len += zext(BUF(buf, len), rd);
+			break;
+		case 64:
+			break;
+		default:
+			/* The caller must have handled this. */
+		}
 	}
 
 	return len;
diff --git a/arch/arc/net/bpf_jit_core.c b/arch/arc/net/bpf_jit_core.c
index eea1a469a195..79ec0bbf1153 100644
--- a/arch/arc/net/bpf_jit_core.c
+++ b/arch/arc/net/bpf_jit_core.c
@@ -8,9 +8,6 @@
 #include <asm/bug.h>
 #include "bpf_jit.h"
 
-/* Sane initial values for the globals */
-bool zext_thyself = true;
-
 /*
  * Check for the return value. A pattern used oftenly in this file.
  * There must be a "ret" variable of type "int" in the scope.
@@ -86,6 +83,7 @@ struct arc_jit_data {
  * jit:			The JIT buffer and its length.
  * bpf_header:		The JITed program header. "jit.buf" points inside it.
  * emit:		If set, opcodes are written to memory; else, a dry-run.
+ * do_zext:		If true, 32-bit sub-regs must be zero extended.
  * bpf2insn:		Maps BPF insn indices to their counterparts in jit.buf.
  * bpf2insn_valid:	Indicates if "bpf2ins" is populated with the mappings.
  * jit_data:		A piece of memory to transfer data to the next pass.
@@ -105,6 +103,7 @@ struct jit_context {
 	struct jit_buffer		jit;
 	struct bpf_binary_header	*bpf_header;
 	bool				emit;
+	bool				do_zext;
 	u32				*bpf2insn;
 	bool				bpf2insn_valid;
 	struct arc_jit_data		*jit_data;
@@ -185,7 +184,7 @@ static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
 	ctx->success            = false;
 
 	/* If the verifier doesn't zero-extend, then we have to do it. */
-	zext_thyself = !ctx->prog->aux->verifier_zext;
+	ctx->do_zext = !ctx->prog->aux->verifier_zext;
 
 	return 0;
 }
@@ -250,8 +249,7 @@ static void jit_ctx_cleanup(struct jit_context *ctx)
 	}
 
 	ctx->emit = false;
-	/* Global booleans set to false. */
-	zext_thyself = false;
+	ctx->do_zext = false;
 }
 
 /*
@@ -415,7 +413,7 @@ static inline void set_need_for_extra_pass(struct jit_context *ctx)
  * the back-end for the swap.
  */
 static int handle_swap(u8 *buf, u8 rd, u8 size, u8 endian,
-		       bool force, u8 *len)
+		       bool force, bool do_zext, u8 *len)
 {
 	/* Sanity check on the size. */
 	switch (size) {
@@ -428,7 +426,7 @@ static int handle_swap(u8 *buf, u8 rd, u8 size, u8 endian,
 		return -EINVAL;
 	}
 
-	*len = gen_swap(buf, rd, size, endian, force);
+	*len = gen_swap(buf, rd, size, endian, force, do_zext);
 
 	return 0;
 }
@@ -866,7 +864,7 @@ static int handle_insn(struct jit_context *ctx, u32 idx)
 	case BPF_ALU64 | BPF_END | BPF_FROM_LE: {
 		CHECK_RET(handle_swap(buf, dst, imm, BPF_SRC(code),
 				      BPF_CLASS(code) == BPF_ALU64,
-				      &len));
+				      ctx->do_zext, &len));
 		break;
 	}
 	/* dst += src (64-bit) */
@@ -1049,11 +1047,12 @@ static int handle_insn(struct jit_context *ctx, u32 idx)
 
 	if (BPF_CLASS(code) == BPF_ALU) {
 		/*
-		 * Even 64-bit swaps are of type BPF_ALU (and not BPF_ALU64).
-		 * Therefore, the routine responsible for "swap" specifically
-		 * takes care of calling "zext()" based on the input "size".
+		 * Skip the "swap" instructions. Even 64-bit swaps are of type
+		 * BPF_ALU (and not BPF_ALU64). Therefore, for the swaps, one
+		 * has to look at the "size" of the operations rather than the
+		 * ALU type. "gen_swap()" specifically takes care of that.
 		 */
-		if (BPF_OP(code) != BPF_END)
+		if (BPF_OP(code) != BPF_END && ctx->do_zext)
 			len += zext(BUF(buf, len), dst);
 	}
 
-- 
2.35.8




More information about the linux-snps-arc mailing list