[RFC PATCH 10/13] ARC: checksum: elide ZOL

Sergey Matyukevich geomatsi at gmail.com
Tue Feb 22 06:15:03 PST 2022


From: Vineet Gupta <vgupta at kernel.org>

Add checksum implementation based on double load/stores
if ZOL is not supported.

Signed-off-by: Vineet Gupta <vgupta at kernel.org>
---
 arch/arc/include/asm/checksum.h | 58 ++++++++++++++++++++++++++++++---
 1 file changed, 53 insertions(+), 5 deletions(-)

diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h
index 0b485800a392..435017be9900 100644
--- a/arch/arc/include/asm/checksum.h
+++ b/arch/arc/include/asm/checksum.h
@@ -29,10 +29,13 @@ static inline __sum16 csum_fold(__wsum s)
 	s -= r;
 	return s >> 16;
 }
+#define csum_fold csum_fold
 
+#ifndef CONFIG_ARC_LACKS_ZOL
 /*
- *	This is a version of ip_compute_csum() optimized for IP headers,
- *	which always checksum on 4 octet boundaries.
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ * @ihl comes from IP hdr and is number of 4-byte words
  */
 static inline __sum16
 ip_fast_csum(const void *iph, unsigned int ihl)
@@ -62,6 +65,54 @@ ip_fast_csum(const void *iph, unsigned int ihl)
 	return csum_fold(sum);
 }
 
+#else
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ * @ihl comes from IP hdr and is number of 4-byte words
+ *  - No loop enterted for canonical 5 words
+ *  - optimized for ARCv2
+ *    - LDL double load for fetching first 16 bytes
+ *    - DBNZ instruction for looping (ZOL not used)
+ */
+static inline __sum16
+ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	unsigned int tmp, sum;
+	u64 dw1, dw2;
+
+	__asm__(
+#ifdef CONFIG_ARC_HAS_LL64
+	"	ldd.ab %0, [%4, 8]	\n"
+	"	ldd.ab %1, [%4, 8]	\n"
+#else
+	"	ld.ab %L0, [%4, 4]	\n"
+	"	ld.ab %H0, [%4, 4]	\n"
+	"	ld.ab %L1, [%4, 4]	\n"
+	"	ld.ab %H1, [%4, 4]	\n"
+#endif
+	"	sub    %5, %5,  4	\n"
+	"	add.f  %3, %L0, %H0	\n"
+	"	adc.f  %3, %3,  %L1	\n"
+	"	adc.f  %3, %3,  %H1	\n"
+	"1:	ld.ab  %2, [%4, 4]	\n"
+	"	adc.f  %3, %3,  %2	\n"
+	"	DBNZR  %5, 1b		\n"
+	"	add.cs %3, %3,  1	\n"
+
+	: "=&r" (dw1), "=&r" (dw2), "=&r" (tmp), "=&r" (sum),
+	  "+&r" (iph), "+&r"(ihl)
+	:
+	: "cc", "memory");
+
+	return csum_fold(sum);
+}
+
+#endif
+
+#define ip_fast_csum ip_fast_csum
+
 /*
  * TCP pseudo Header is 12 bytes:
  * SA [4], DA [4], zeroes [1], Proto[1], TCP Seg(hdr+data) Len [2]
@@ -88,9 +139,6 @@ csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
 
 	return sum;
 }
-
-#define csum_fold csum_fold
-#define ip_fast_csum ip_fast_csum
 #define csum_tcpudp_nofold csum_tcpudp_nofold
 
 #include <asm-generic/checksum.h>
-- 
2.25.1




More information about the linux-snps-arc mailing list