diff --git a/patches.suse/x86-cpufeatures-Add-macros-for-Intel-s-new-fast-rep-.patch b/patches.suse/x86-cpufeatures-Add-macros-for-Intel-s-new-fast-rep-.patch new file mode 100644 index 0000000..a7d830c --- /dev/null +++ b/patches.suse/x86-cpufeatures-Add-macros-for-Intel-s-new-fast-rep-.patch @@ -0,0 +1,35 @@ +From f8df91e73a6827a4569bb56cd53e55b4ea2f5b1f Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Thu, 1 Sep 2022 14:18:06 -0700 +Subject: [PATCH] x86/cpufeatures: Add macros for Intel's new fast rep string features +Git-commit: f8df91e73a6827a4569bb56cd53e55b4ea2f5b1f +Patch-mainline: v6.3-rc1 +References: bsc#1211140 + +KVM_GET_SUPPORTED_CPUID should reflect these host CPUID bits. The bits +are already cached in word 12. Give the bits X86_FEATURE names, so +that they can be easily referenced. Hide these bits from +/proc/cpuinfo, since the host kernel makes no use of them at present. + +Signed-off-by: Jim Mattson +Reviewed-by: Sean Christopherson +Link: https://lore.kernel.org/r/20220901211811.2883855-1-jmattson@google.com +Signed-off-by: Sean Christopherson +Acked-by: Takashi Iwai + +--- + arch/x86/include/asm/cpufeatures.h | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -307,6 +307,9 @@ + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ + #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ ++#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */ ++#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */ ++#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */ + #define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ + + /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ diff --git a/patches.suse/x86-don-t-use-REP_GOOD-or-ERMS-for-small-memory-clea.patch b/patches.suse/x86-don-t-use-REP_GOOD-or-ERMS-for-small-memory-clea.patch new file mode 100644 index 0000000..ca20686 --- /dev/null +++ b/patches.suse/x86-don-t-use-REP_GOOD-or-ERMS-for-small-memory-clea.patch @@ -0,0 +1,88 @@ +From 20f3337d350c4e1b4ac66d731fd4e98565bf6cc0 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Sat, 15 Apr 2023 12:01:14 -0700 +Subject: [PATCH] x86: don't use REP_GOOD or ERMS for small memory clearing +Git-commit: 20f3337d350c4e1b4ac66d731fd4e98565bf6cc0 +Patch-mainline: v6.4-rc1 +References: bsc#1211140 + +The modern target to use is FSRS (Fast Short REP STOS), and the other +cases should only be used for bigger areas (ie mainly things like page +clearing). + +Signed-off-by: Linus Torvalds +Acked-by: Takashi Iwai + +--- + arch/x86/lib/memset_64.S | 47 +++++++++++------------------------------------ + 1 file changed, 11 insertions(+), 36 deletions(-) + +--- a/arch/x86/lib/memset_64.S ++++ b/arch/x86/lib/memset_64.S +@@ -16,28 +16,23 @@ + * rdx count (bytes) + * + * rax original destination ++ * ++ * The FSRS alternative should be done inline (avoiding the call and ++ * the disgusting return handling), but that would require some help ++ * from the compiler for better calling conventions. ++ * ++ * The 'rep stosb' itself is small enough to replace the call, but all ++ * the register moves blow up the code. And two of them are "needed" ++ * only for the return value that is the same as the source input, ++ * which the compiler could/should do much better anyway. + */ + SYM_FUNC_START_WEAK(memset) + SYM_FUNC_START(__memset) +- /* +- * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended +- * to use it when possible. If not available, use fast string instructions. +- * +- * Otherwise, use original memset function. +- */ +- ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ +- "jmp memset_erms", X86_FEATURE_ERMS ++ ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS + + movq %rdi,%r9 ++ movb %sil,%al + movq %rdx,%rcx +- andl $7,%edx +- shrq $3,%rcx +- /* expand byte value */ +- movzbl %sil,%esi +- movabs $0x0101010101010101,%rax +- imulq %rsi,%rax +- rep stosq +- movl %edx,%ecx + rep stosb + movq %r9,%rax + RET +@@ -46,26 +41,6 @@ SYM_FUNC_END_ALIAS(memset) + EXPORT_SYMBOL(memset) + EXPORT_SYMBOL(__memset) + +-/* +- * ISO C memset - set a memory block to a byte value. This function uses +- * enhanced rep stosb to override the fast string function. +- * The code is simpler and shorter than the fast string function as well. +- * +- * rdi destination +- * rsi value (char) +- * rdx count (bytes) +- * +- * rax original destination +- */ +-SYM_FUNC_START_LOCAL(memset_erms) +- movq %rdi,%r9 +- movb %sil,%al +- movq %rdx,%rcx +- rep stosb +- movq %r9,%rax +- RET +-SYM_FUNC_END(memset_erms) +- + SYM_FUNC_START_LOCAL(memset_orig) + movq %rdi,%r10 + diff --git a/series.conf b/series.conf index af3f320..ffec9c3 100644 --- a/series.conf +++ b/series.conf @@ -36919,6 +36919,7 @@ patches.suse/ipmi-ssif-Add-a-timer-between-request-retries.patch patches.suse/scsi-smartpqi-Replace-one-element-array-with-flexibl-ead82126.patch patches.suse/mm-memcontrol-deprecate-charge-moving.patch + patches.suse/x86-cpufeatures-Add-macros-for-Intel-s-new-fast-rep-.patch patches.suse/ibmvnic-Assign-XPS-map-to-correct-queue-index.patch patches.suse/0001-net-tls-fix-possible-race-condition-between-do_tls_g.patch patches.suse/bnxt_en-Avoid-order-5-memory-allocation-for-TPA-data.patch @@ -36927,6 +36928,7 @@ patches.suse/nfc-st-nci-Fix-use-after-free-bug-in-ndlc_remove-due.patch patches.suse/s390-uaccess-add-missing-earlyclobber-annotations-to-__clear_user.patch patches.suse/vmxnet3-use-gro-callback-when-UPT-is-enabled.patch + patches.suse/x86-don-t-use-REP_GOOD-or-ERMS-for-small-memory-clea.patch patches.suse/0001-wifi-brcmfmac-slab-out-of-bounds-read-in-brcmf_get_a.patch ########################################################