[PATCH RFC 10/10] riscv: csum: Remove inline assembly

Charlie Jenkins charlie at rivosinc.com
Wed Dec 10 08:13:47 PST 2025


When the kernel is set to have zbb enabled by default, the compiler
generates better code than is possible with the inline assembly.
Removing the inline assembly will greatly simplify the checksumming code
and improve the performance when zbb is enabled. However, performance
will be decreased on kernels where only runtime discovery is enabled.
Moving towards this performance model of optimizing for compiled-in
extensions will help to keep the kernel code from spinning out of
control with the vast amount of extensions that are available to riscv.

Signed-off-by: Charlie Jenkins <thecharlesjenkins at gmail.com>
---
 arch/riscv/include/asm/checksum.h | 32 -------------
 arch/riscv/lib/csum.c             | 94 ---------------------------------------
 2 files changed, 126 deletions(-)

diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h
index e747af23eea2..ecc4779209b9 100644
--- a/arch/riscv/include/asm/checksum.h
+++ b/arch/riscv/include/asm/checksum.h
@@ -45,38 +45,6 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 			csum += csum < ((const unsigned int *)iph)[pos];
 	} while (++pos < ihl);
 
-	/*
-	 * ZBB only saves three instructions on 32-bit and five on 64-bit so not
-	 * worth checking if supported without Alternatives.
-	 */
-	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
-	    IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) &&
-	    riscv_has_extension_likely(ZBB)) {
-		unsigned long fold_temp;
-
-		if (IS_ENABLED(CONFIG_32BIT)) {
-			asm(".option push				\n\
-			.option arch,+zbb				\n\
-				not	%[fold_temp], %[csum]		\n\
-				rori	%[csum], %[csum], 16		\n\
-				sub	%[csum], %[fold_temp], %[csum]	\n\
-			.option pop"
-			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp));
-		} else {
-			asm(".option push				\n\
-			.option arch,+zbb				\n\
-				rori	%[fold_temp], %[csum], 32	\n\
-				add	%[csum], %[fold_temp], %[csum]	\n\
-				srli	%[csum], %[csum], 32		\n\
-				not	%[fold_temp], %[csum]		\n\
-				roriw	%[csum], %[csum], 16		\n\
-				subw	%[csum], %[fold_temp], %[csum]	\n\
-			.option pop"
-			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp));
-		}
-		return (__force __sum16)(csum >> 16);
-	}
-
 #ifndef CONFIG_32BIT
 	csum += ror64(csum, 32);
 	csum >>= 32;
diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c
index 4db35dd698eb..93c073f2b883 100644
--- a/arch/riscv/lib/csum.c
+++ b/arch/riscv/lib/csum.c
@@ -40,24 +40,6 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 	uproto = (__force unsigned int)htonl(proto);
 	sum += uproto;
 
-	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
-	    IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) &&
-	    riscv_has_extension_likely(ZBB)) {
-		unsigned long fold_temp;
-
-		asm(".option push					\n\
-		.option arch,+zbb					\n\
-			rori	%[fold_temp], %[sum], 32		\n\
-			add	%[sum], %[fold_temp], %[sum]		\n\
-			srli	%[sum], %[sum], 32			\n\
-			not	%[fold_temp], %[sum]			\n\
-			roriw	%[sum], %[sum], 16			\n\
-			subw	%[sum], %[fold_temp], %[sum]		\n\
-		.option pop"
-		: [sum] "+r" (sum), [fold_temp] "=&r" (fold_temp));
-		return (__force __sum16)(sum >> 16);
-	}
-
 	sum += ror64(sum, 32);
 	sum >>= 32;
 	return csum_fold((__force __wsum)sum);
@@ -142,51 +124,6 @@ do_csum_with_alignment(const unsigned char *buff, int len)
 	end = (const unsigned long *)(buff + len);
 	csum = do_csum_common(ptr, end, data);
 
-#ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT
-	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
-	    IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) &&
-	    riscv_has_extension_likely(ZBB)) {
-		unsigned long fold_temp;
-
-#ifdef CONFIG_32BIT
-		asm_goto_output(".option push			\n\
-		.option arch,+zbb				\n\
-			rori	%[fold_temp], %[csum], 16	\n\
-			andi	%[offset], %[offset], 1		\n\
-			add	%[csum], %[fold_temp], %[csum]	\n\
-			beq	%[offset], zero, %l[end]	\n\
-			rev8	%[csum], %[csum]		\n\
-		.option pop"
-			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
-			: [offset] "r" (offset)
-			:
-			: end);
-
-		return (unsigned short)csum;
-#else /* !CONFIG_32BIT */
-		asm_goto_output(".option push			\n\
-		.option arch,+zbb				\n\
-			rori	%[fold_temp], %[csum], 32	\n\
-			add	%[csum], %[fold_temp], %[csum]	\n\
-			srli	%[csum], %[csum], 32		\n\
-			roriw	%[fold_temp], %[csum], 16	\n\
-			addw	%[csum], %[fold_temp], %[csum]	\n\
-			andi	%[offset], %[offset], 1		\n\
-			beq	%[offset], zero, %l[end]	\n\
-			rev8	%[csum], %[csum]		\n\
-		.option pop"
-			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
-			: [offset] "r" (offset)
-			:
-			: end);
-
-		return (csum << 16) >> 48;
-#endif /* !CONFIG_32BIT */
-end:
-		return csum >> 16;
-	}
-
-#endif /* CC_HAS_ASM_GOTO_TIED_OUTPUT */
 #ifndef CONFIG_32BIT
 	csum += ror64(csum, 32);
 	csum >>= 32;
@@ -215,37 +152,6 @@ do_csum_no_alignment(const unsigned char *buff, int len)
 	end = (const unsigned long *)(buff + len);
 	csum = do_csum_common(ptr, end, data);
 
-	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
-	    IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) &&
-	    riscv_has_extension_likely(ZBB)) {
-		unsigned long fold_temp;
-
-#ifdef CONFIG_32BIT
-		asm (".option push				\n\
-		.option arch,+zbb				\n\
-			rori	%[fold_temp], %[csum], 16	\n\
-			add	%[csum], %[fold_temp], %[csum]	\n\
-		.option pop"
-			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
-			:
-			: );
-
-#else /* !CONFIG_32BIT */
-		asm (".option push				\n\
-		.option arch,+zbb				\n\
-			rori	%[fold_temp], %[csum], 32	\n\
-			add	%[csum], %[fold_temp], %[csum]	\n\
-			srli	%[csum], %[csum], 32		\n\
-			roriw	%[fold_temp], %[csum], 16	\n\
-			addw	%[csum], %[fold_temp], %[csum]	\n\
-		.option pop"
-			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
-			:
-			: );
-#endif /* !CONFIG_32BIT */
-		return csum >> 16;
-	}
-
 #ifndef CONFIG_32BIT
 	csum += ror64(csum, 32);
 	csum >>= 32;

-- 
2.43.0




More information about the linux-riscv mailing list