[RFC PATCH 10/13] ARC: checksum: elide ZOL
Sergey Matyukevich
geomatsi at gmail.com
Tue Feb 22 06:15:03 PST 2022
From: Vineet Gupta <vgupta at kernel.org>
Add checksum implementation based on double load/stores
if ZOL is not supported.
Signed-off-by: Vineet Gupta <vgupta at kernel.org>
---
arch/arc/include/asm/checksum.h | 58 ++++++++++++++++++++++++++++++---
1 file changed, 53 insertions(+), 5 deletions(-)
diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h
index 0b485800a392..435017be9900 100644
--- a/arch/arc/include/asm/checksum.h
+++ b/arch/arc/include/asm/checksum.h
@@ -29,10 +29,13 @@ static inline __sum16 csum_fold(__wsum s)
s -= r;
return s >> 16;
}
+#define csum_fold csum_fold
+#ifndef CONFIG_ARC_LACKS_ZOL
/*
- * This is a version of ip_compute_csum() optimized for IP headers,
- * which always checksum on 4 octet boundaries.
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ * @ihl comes from IP hdr and is number of 4-byte words
*/
static inline __sum16
ip_fast_csum(const void *iph, unsigned int ihl)
@@ -62,6 +65,54 @@ ip_fast_csum(const void *iph, unsigned int ihl)
return csum_fold(sum);
}
+#else
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ * @ihl comes from IP hdr and is number of 4-byte words
+ * - No loop enterted for canonical 5 words
+ * - optimized for ARCv2
+ * - LDL double load for fetching first 16 bytes
+ * - DBNZ instruction for looping (ZOL not used)
+ */
+static inline __sum16
+ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ unsigned int tmp, sum;
+ u64 dw1, dw2;
+
+ __asm__(
+#ifdef CONFIG_ARC_HAS_LL64
+ " ldd.ab %0, [%4, 8] \n"
+ " ldd.ab %1, [%4, 8] \n"
+#else
+ " ld.ab %L0, [%4, 4] \n"
+ " ld.ab %H0, [%4, 4] \n"
+ " ld.ab %L1, [%4, 4] \n"
+ " ld.ab %H1, [%4, 4] \n"
+#endif
+ " sub %5, %5, 4 \n"
+ " add.f %3, %L0, %H0 \n"
+ " adc.f %3, %3, %L1 \n"
+ " adc.f %3, %3, %H1 \n"
+ "1: ld.ab %2, [%4, 4] \n"
+ " adc.f %3, %3, %2 \n"
+ " DBNZR %5, 1b \n"
+ " add.cs %3, %3, 1 \n"
+
+ : "=&r" (dw1), "=&r" (dw2), "=&r" (tmp), "=&r" (sum),
+ "+&r" (iph), "+&r"(ihl)
+ :
+ : "cc", "memory");
+
+ return csum_fold(sum);
+}
+
+#endif
+
+#define ip_fast_csum ip_fast_csum
+
/*
* TCP pseudo Header is 12 bytes:
* SA [4], DA [4], zeroes [1], Proto[1], TCP Seg(hdr+data) Len [2]
@@ -88,9 +139,6 @@ csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
return sum;
}
-
-#define csum_fold csum_fold
-#define ip_fast_csum ip_fast_csum
#define csum_tcpudp_nofold csum_tcpudp_nofold
#include <asm-generic/checksum.h>
--
2.25.1
More information about the linux-snps-arc
mailing list