From 20f3337d350c4e1b4ac66d731fd4e98565bf6cc0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 15 Apr 2023 12:01:14 -0700
Subject: [PATCH] x86: don't use REP_GOOD or ERMS for small memory clearing
Git-commit: 20f3337d350c4e1b4ac66d731fd4e98565bf6cc0
Patch-mainline: v6.4-rc1
References: bsc#1211140
The modern target to use is FSRS (Fast Short REP STOS), and the other
cases should only be used for bigger areas (ie mainly things like page
clearing).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Takashi Iwai <tiwai@suse.de>
---
arch/x86/lib/memset_64.S | 47 +++++++++++------------------------------------
1 file changed, 11 insertions(+), 36 deletions(-)
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -16,28 +16,23 @@
* rdx count (bytes)
*
* rax original destination
+ *
+ * The FSRS alternative should be done inline (avoiding the call and
+ * the disgusting return handling), but that would require some help
+ * from the compiler for better calling conventions.
+ *
+ * The 'rep stosb' itself is small enough to replace the call, but all
+ * the register moves blow up the code. And two of them are "needed"
+ * only for the return value that is the same as the source input,
+ * which the compiler could/should do much better anyway.
*/
SYM_FUNC_START_WEAK(memset)
SYM_FUNC_START(__memset)
- /*
- * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
- * to use it when possible. If not available, use fast string instructions.
- *
- * Otherwise, use original memset function.
- */
- ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
- "jmp memset_erms", X86_FEATURE_ERMS
+ ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
movq %rdi,%r9
+ movb %sil,%al
movq %rdx,%rcx
- andl $7,%edx
- shrq $3,%rcx
- /* expand byte value */
- movzbl %sil,%esi
- movabs $0x0101010101010101,%rax
- imulq %rsi,%rax
- rep stosq
- movl %edx,%ecx
rep stosb
movq %r9,%rax
RET
@@ -46,26 +41,6 @@ SYM_FUNC_END_ALIAS(memset)
EXPORT_SYMBOL(memset)
EXPORT_SYMBOL(__memset)
-/*
- * ISO C memset - set a memory block to a byte value. This function uses
- * enhanced rep stosb to override the fast string function.
- * The code is simpler and shorter than the fast string function as well.
- *
- * rdi destination
- * rsi value (char)
- * rdx count (bytes)
- *
- * rax original destination
- */
-SYM_FUNC_START_LOCAL(memset_erms)
- movq %rdi,%r9
- movb %sil,%al
- movq %rdx,%rcx
- rep stosb
- movq %r9,%rax
- RET
-SYM_FUNC_END(memset_erms)
-
SYM_FUNC_START_LOCAL(memset_orig)
movq %rdi,%r10