Blob Blame History Raw
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 20 Jan 2020 18:52:29 +0000
Subject: arm64: csum: Optimise IPv6 header checksum
Patch-mainline: v5.7-rc1
Git-commit: e9c7ddbf8b4b6a291bf3b5bfa7c883235164d9be
References: bsc#1176447

Throwing our __uint128_t idioms at csum_ipv6_magic() makes it
about 1.3x-2x faster across a range of microarchitecture/compiler
combinations. Not much in absolute terms, but every little helps.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 arch/arm64/include/asm/checksum.h |    7 ++++++-
 arch/arm64/lib/csum.c             |   27 +++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

--- a/arch/arm64/include/asm/checksum.h
+++ b/arch/arm64/include/asm/checksum.h
@@ -5,7 +5,12 @@
 #ifndef __ASM_CHECKSUM_H
 #define __ASM_CHECKSUM_H
 
-#include <linux/types.h>
+#include <linux/in6.h>
+
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+			const struct in6_addr *daddr,
+			__u32 len, __u8 proto, __wsum sum);
 
 static inline __sum16 csum_fold(__wsum csum)
 {
--- a/arch/arm64/lib/csum.c
+++ b/arch/arm64/lib/csum.c
@@ -121,3 +121,30 @@ unsigned int do_csum(const unsigned char
 
 	return sum >> 16;
 }
+
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+			const struct in6_addr *daddr,
+			__u32 len, __u8 proto, __wsum csum)
+{
+	__uint128_t src, dst;
+	u64 sum = (__force u64)csum;
+
+	src = *(const __uint128_t *)saddr->s6_addr;
+	dst = *(const __uint128_t *)daddr->s6_addr;
+
+	sum += (__force u32)htonl(len);
+#ifdef __LITTLE_ENDIAN
+	sum += (u32)proto << 24;
+#else
+	sum += proto;
+#endif
+	src += (src >> 64) | (src << 64);
+	dst += (dst >> 64) | (dst << 64);
+
+	sum = accumulate(sum, src >> 64);
+	sum = accumulate(sum, dst >> 64);
+
+	sum += ((sum >> 32) | (sum << 32));
+	return csum_fold((__force __wsum)(sum >> 32));
+}
+EXPORT_SYMBOL(csum_ipv6_magic);