Blob Blame History Raw
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 4 Oct 2017 19:27:08 +0200
Subject: s390: optimize memset implementation
Git-commit: 993fef95b9c1858894d14b221e04f1161e4f4ed9
Patch-mainline: v4.15-rc1
References: git-fixes f19fbd5ed6

Like for the memset16/32/64 variants avoid that subsequent mvc
instructions depend on each other since that might have negative
performance impacts.

This patch is currently hardly relevant since at least gcc 7.1
generates only inline memset code and not a single memset call.
However there is no reason to not provide an optimized version
just in case gcc generates memset calls again, like it did in
the past.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: Petr Tesarik <ptesarik@suse.com>
---
 arch/s390/lib/mem.S | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index f88cf6983849..9255a087fa96 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -78,21 +78,25 @@ ENTRY(memset)
 	ex	%r4,0(%r3)
 	br	%r14
 .Lmemset_fill:
-	stc	%r3,0(%r2)
 	cghi	%r4,1
 	lgr	%r1,%r2
-	ber	%r14
+	je	.Lmemset_fill_exit
 	aghi	%r4,-2
-	srlg	%r3,%r4,8
-	ltgr	%r3,%r3
+	srlg	%r5,%r4,8
+	ltgr	%r5,%r5
 	jz	.Lmemset_fill_remainder
 .Lmemset_fill_loop:
-	mvc	1(256,%r1),0(%r1)
+	stc	%r3,0(%r1)
+	mvc	1(255,%r1),0(%r1)
 	la	%r1,256(%r1)
-	brctg	%r3,.Lmemset_fill_loop
+	brctg	%r5,.Lmemset_fill_loop
 .Lmemset_fill_remainder:
-	larl	%r3,.Lmemset_mvc
-	ex	%r4,0(%r3)
+	stc	%r3,0(%r1)
+	larl	%r5,.Lmemset_mvc
+	ex	%r4,0(%r5)
+	br	%r14
+.Lmemset_fill_exit:
+	stc	%r3,0(%r1)
 	br	%r14
 .Lmemset_xc:
 	xc	0(1,%r1),0(%r1)