From ff3ce03865a033794db368170f82023850164c81 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: May 08 2023 13:20:13 +0000 Subject: x86: don't use REP_GOOD or ERMS for small memory clearing (bsc#1211140). --- diff --git a/patches.suse/x86-don-t-use-REP_GOOD-or-ERMS-for-small-memory-clea.patch b/patches.suse/x86-don-t-use-REP_GOOD-or-ERMS-for-small-memory-clea.patch new file mode 100644 index 0000000..ca20686 --- /dev/null +++ b/patches.suse/x86-don-t-use-REP_GOOD-or-ERMS-for-small-memory-clea.patch @@ -0,0 +1,88 @@ +From 20f3337d350c4e1b4ac66d731fd4e98565bf6cc0 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Sat, 15 Apr 2023 12:01:14 -0700 +Subject: [PATCH] x86: don't use REP_GOOD or ERMS for small memory clearing +Git-commit: 20f3337d350c4e1b4ac66d731fd4e98565bf6cc0 +Patch-mainline: v6.4-rc1 +References: bsc#1211140 + +The modern target to use is FSRS (Fast Short REP STOS), and the other +cases should only be used for bigger areas (ie mainly things like page +clearing). + +Signed-off-by: Linus Torvalds +Acked-by: Takashi Iwai + +--- + arch/x86/lib/memset_64.S | 47 +++++++++++------------------------------------ + 1 file changed, 11 insertions(+), 36 deletions(-) + +--- a/arch/x86/lib/memset_64.S ++++ b/arch/x86/lib/memset_64.S +@@ -16,28 +16,23 @@ + * rdx count (bytes) + * + * rax original destination ++ * ++ * The FSRS alternative should be done inline (avoiding the call and ++ * the disgusting return handling), but that would require some help ++ * from the compiler for better calling conventions. ++ * ++ * The 'rep stosb' itself is small enough to replace the call, but all ++ * the register moves blow up the code. And two of them are "needed" ++ * only for the return value that is the same as the source input, ++ * which the compiler could/should do much better anyway. + */ + SYM_FUNC_START_WEAK(memset) + SYM_FUNC_START(__memset) +- /* +- * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended +- * to use it when possible. If not available, use fast string instructions. +- * +- * Otherwise, use original memset function. +- */ +- ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ +- "jmp memset_erms", X86_FEATURE_ERMS ++ ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS + + movq %rdi,%r9 ++ movb %sil,%al + movq %rdx,%rcx +- andl $7,%edx +- shrq $3,%rcx +- /* expand byte value */ +- movzbl %sil,%esi +- movabs $0x0101010101010101,%rax +- imulq %rsi,%rax +- rep stosq +- movl %edx,%ecx + rep stosb + movq %r9,%rax + RET +@@ -46,26 +41,6 @@ SYM_FUNC_END_ALIAS(memset) + EXPORT_SYMBOL(memset) + EXPORT_SYMBOL(__memset) + +-/* +- * ISO C memset - set a memory block to a byte value. This function uses +- * enhanced rep stosb to override the fast string function. +- * The code is simpler and shorter than the fast string function as well. +- * +- * rdi destination +- * rsi value (char) +- * rdx count (bytes) +- * +- * rax original destination +- */ +-SYM_FUNC_START_LOCAL(memset_erms) +- movq %rdi,%r9 +- movb %sil,%al +- movq %rdx,%rcx +- rep stosb +- movq %r9,%rax +- RET +-SYM_FUNC_END(memset_erms) +- + SYM_FUNC_START_LOCAL(memset_orig) + movq %rdi,%r10 + diff --git a/series.conf b/series.conf index cf377aa..69064b6 100644 --- a/series.conf +++ b/series.conf @@ -36928,6 +36928,7 @@ patches.suse/nfc-st-nci-Fix-use-after-free-bug-in-ndlc_remove-due.patch patches.suse/s390-uaccess-add-missing-earlyclobber-annotations-to-__clear_user.patch patches.suse/vmxnet3-use-gro-callback-when-UPT-is-enabled.patch + patches.suse/x86-don-t-use-REP_GOOD-or-ERMS-for-small-memory-clea.patch ######################################################## # end of sorted patches