Takashi Iwai ff3ce0
From 20f3337d350c4e1b4ac66d731fd4e98565bf6cc0 Mon Sep 17 00:00:00 2001
Takashi Iwai ff3ce0
From: Linus Torvalds <torvalds@linux-foundation.org>
Takashi Iwai ff3ce0
Date: Sat, 15 Apr 2023 12:01:14 -0700
Takashi Iwai ff3ce0
Subject: [PATCH] x86: don't use REP_GOOD or ERMS for small memory clearing
Takashi Iwai ff3ce0
Git-commit: 20f3337d350c4e1b4ac66d731fd4e98565bf6cc0
Takashi Iwai ff3ce0
Patch-mainline: v6.4-rc1
Takashi Iwai ff3ce0
References: bsc#1211140
Takashi Iwai ff3ce0
Takashi Iwai ff3ce0
The modern target to use is FSRS (Fast Short REP STOS), and the other
Takashi Iwai ff3ce0
cases should only be used for bigger areas (ie mainly things like page
Takashi Iwai ff3ce0
clearing).
Takashi Iwai ff3ce0
Takashi Iwai ff3ce0
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Takashi Iwai ff3ce0
Acked-by: Takashi Iwai <tiwai@suse.de>
Takashi Iwai ff3ce0
Takashi Iwai ff3ce0
---
Takashi Iwai ff3ce0
 arch/x86/lib/memset_64.S |   47 +++++++++++------------------------------------
Takashi Iwai ff3ce0
 1 file changed, 11 insertions(+), 36 deletions(-)
Takashi Iwai ff3ce0
Takashi Iwai ff3ce0
--- a/arch/x86/lib/memset_64.S
Takashi Iwai ff3ce0
+++ b/arch/x86/lib/memset_64.S
Takashi Iwai ff3ce0
@@ -16,28 +16,23 @@
Takashi Iwai ff3ce0
  * rdx   count (bytes)
Takashi Iwai ff3ce0
  *
Takashi Iwai ff3ce0
  * rax   original destination
Takashi Iwai ff3ce0
+ *
Takashi Iwai ff3ce0
+ * The FSRS alternative should be done inline (avoiding the call and
Takashi Iwai ff3ce0
+ * the disgusting return handling), but that would require some help
Takashi Iwai ff3ce0
+ * from the compiler for better calling conventions.
Takashi Iwai ff3ce0
+ *
Takashi Iwai ff3ce0
+ * The 'rep stosb' itself is small enough to replace the call, but all
Takashi Iwai ff3ce0
+ * the register moves blow up the code. And two of them are "needed"
Takashi Iwai ff3ce0
+ * only for the return value that is the same as the source input,
Takashi Iwai ff3ce0
+ * which the compiler could/should do much better anyway.
Takashi Iwai ff3ce0
  */
Takashi Iwai ff3ce0
 SYM_FUNC_START_WEAK(memset)
Takashi Iwai ff3ce0
 SYM_FUNC_START(__memset)
Takashi Iwai ff3ce0
-	/*
Takashi Iwai ff3ce0
-	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
Takashi Iwai ff3ce0
-	 * to use it when possible. If not available, use fast string instructions.
Takashi Iwai ff3ce0
-	 *
Takashi Iwai ff3ce0
-	 * Otherwise, use original memset function.
Takashi Iwai ff3ce0
-	 */
Takashi Iwai ff3ce0
-	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
Takashi Iwai ff3ce0
-		      "jmp memset_erms", X86_FEATURE_ERMS
Takashi Iwai ff3ce0
+	ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
Takashi Iwai ff3ce0
 
Takashi Iwai ff3ce0
 	movq %rdi,%r9
Takashi Iwai ff3ce0
+	movb %sil,%al
Takashi Iwai ff3ce0
 	movq %rdx,%rcx
Takashi Iwai ff3ce0
-	andl $7,%edx
Takashi Iwai ff3ce0
-	shrq $3,%rcx
Takashi Iwai ff3ce0
-	/* expand byte value  */
Takashi Iwai ff3ce0
-	movzbl %sil,%esi
Takashi Iwai ff3ce0
-	movabs $0x0101010101010101,%rax
Takashi Iwai ff3ce0
-	imulq %rsi,%rax
Takashi Iwai ff3ce0
-	rep stosq
Takashi Iwai ff3ce0
-	movl %edx,%ecx
Takashi Iwai ff3ce0
 	rep stosb
Takashi Iwai ff3ce0
 	movq %r9,%rax
Takashi Iwai ff3ce0
 	RET
Takashi Iwai ff3ce0
@@ -46,26 +41,6 @@ SYM_FUNC_END_ALIAS(memset)
Takashi Iwai ff3ce0
 EXPORT_SYMBOL(memset)
Takashi Iwai ff3ce0
 EXPORT_SYMBOL(__memset)
Takashi Iwai ff3ce0
 
Takashi Iwai ff3ce0
-/*
Takashi Iwai ff3ce0
- * ISO C memset - set a memory block to a byte value. This function uses
Takashi Iwai ff3ce0
- * enhanced rep stosb to override the fast string function.
Takashi Iwai ff3ce0
- * The code is simpler and shorter than the fast string function as well.
Takashi Iwai ff3ce0
- *
Takashi Iwai ff3ce0
- * rdi   destination
Takashi Iwai ff3ce0
- * rsi   value (char)
Takashi Iwai ff3ce0
- * rdx   count (bytes)
Takashi Iwai ff3ce0
- *
Takashi Iwai ff3ce0
- * rax   original destination
Takashi Iwai ff3ce0
- */
Takashi Iwai ff3ce0
-SYM_FUNC_START_LOCAL(memset_erms)
Takashi Iwai ff3ce0
-	movq %rdi,%r9
Takashi Iwai ff3ce0
-	movb %sil,%al
Takashi Iwai ff3ce0
-	movq %rdx,%rcx
Takashi Iwai ff3ce0
-	rep stosb
Takashi Iwai ff3ce0
-	movq %r9,%rax
Takashi Iwai ff3ce0
-	RET
Takashi Iwai ff3ce0
-SYM_FUNC_END(memset_erms)
Takashi Iwai ff3ce0
-
Takashi Iwai ff3ce0
 SYM_FUNC_START_LOCAL(memset_orig)
Takashi Iwai ff3ce0
 	movq %rdi,%r10
Takashi Iwai ff3ce0