From 5358acf6953cc1dd520f045d06425ff1ffff4753 Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Feb 22 2024 13:17:53 +0000 Subject: Merge branch 'users/nborisov/SLE12-SP5/for-nex' into SLE12-SP5 Pull x86 fixes from Nikolay Borisov --- diff --git a/blacklist.conf b/blacklist.conf index 4e6fbfa..13c9798 100644 --- a/blacklist.conf +++ b/blacklist.conf @@ -3109,3 +3109,4 @@ ef97e774713fcd34c45f7a7426c7d8845394f7be # documentation only 3bd57b90554b4bb82dce638e0668ef9dc95d3e96 # not enabled e198987e7dd7d3645a53875151cd6f8fc425b706 # already applied b2ce5617dad254230551feda3599f2cc68e53ad8 # cleanup designed to break kABI +07ea4ab1f9b83953ff5c3f6ccfb84d581bfe0046 # clang fix diff --git a/kabi/severities b/kabi/severities index 7bbd770..d6c545e 100644 --- a/kabi/severities +++ b/kabi/severities @@ -152,3 +152,5 @@ net/l2tp/* PASS srso_untrain_ret_alias PASS zen_untrain_ret PASS + +mds_user_clear PASS diff --git a/patches.suse/KVM-VMX-Move-VERW-closer-to-VMentry-for-MDS-mitigation.patch b/patches.suse/KVM-VMX-Move-VERW-closer-to-VMentry-for-MDS-mitigation.patch new file mode 100644 index 0000000..10dd3bb --- /dev/null +++ b/patches.suse/KVM-VMX-Move-VERW-closer-to-VMentry-for-MDS-mitigation.patch @@ -0,0 +1,61 @@ +From: Pawan Gupta +Date: Tue, 13 Feb 2024 18:22:56 -0800 +Subject: KVM: VMX: Move VERW closer to VMentry for MDS mitigation +Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git +Git-commit: 43fb862de8f628c5db5e96831c915b9aebf62d33 +Patch-mainline: Queued in subsystem maintainer repo +References: git-fixes + +During VMentry VERW is executed to mitigate MDS. After VERW, any memory +access like register push onto stack may put host data in MDS affected +CPU buffers. A guest can then use MDS to sample host data. + +Although likelihood of secrets surviving in registers at current VERW +callsite is less, but it can't be ruled out. Harden the MDS mitigation +by moving the VERW mitigation late in VMentry path. + +Note that VERW for MMIO Stale Data mitigation is unchanged because of +the complexity of per-guest conditional VERW which is not easy to handle +that late in asm with no GPRs available. If the CPU is also affected by +MDS, VERW is unconditionally executed late in asm regardless of guest +having MMIO access. + +Cc: +Signed-off-by: Pawan Gupta +Acked-by: Sean Christopherson +Acked-by: Nikolay Borisov +--- + arch/x86/kvm/vmx.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -1738,7 +1738,8 @@ static __always_inline void vmx_enable_f + + static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) + { +- vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; ++ vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) && ++ vmx_fb_clear_ctrl_available; + + /* + * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS +@@ -9929,8 +9930,6 @@ static void __noclone vmx_vcpu_run(struc + /* L1D Flush includes CPU buffer clear to mitigate MDS */ + if (static_branch_unlikely(&vmx_l1d_should_flush)) + vmx_l1d_flush(vcpu); +- else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF)) +- mds_clear_cpu_buffers(); + else if (static_branch_unlikely(&mmio_stale_data_clear) && + kvm_arch_has_assigned_device(vcpu->kvm)) + mds_clear_cpu_buffers(); +@@ -9975,6 +9974,9 @@ static void __noclone vmx_vcpu_run(struc + #endif + "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */ + ++ /* Clobbers EFLAGS.ZF */ ++ CLEAR_CPU_BUFFERS ++ + /* Enter guest mode */ + "jc 1f \n\t" + __ex(ASM_VMX_VMLAUNCH) "\n\t" diff --git a/patches.suse/KVM-VMX-Use-BT-JNC-i.e.-EFLAGS.CF-to-select-VMRESUME-vs.-V.patch b/patches.suse/KVM-VMX-Use-BT-JNC-i.e.-EFLAGS.CF-to-select-VMRESUME-vs.-V.patch new file mode 100644 index 0000000..b4e39e5 --- /dev/null +++ b/patches.suse/KVM-VMX-Use-BT-JNC-i.e.-EFLAGS.CF-to-select-VMRESUME-vs.-V.patch @@ -0,0 +1,51 @@ +From: Sean Christopherson +Date: Tue, 13 Feb 2024 18:22:40 -0800 +Subject: KVM: VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH +Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git +Git-commit: 706a189dcf74d3b3f955e9384785e726ed6c7c80 +Patch-mainline: Queued in subsystem maintainer repo +References: git-fixes + +Use EFLAGS.CF instead of EFLAGS.ZF to track whether to use VMRESUME versus +VMLAUNCH. Freeing up EFLAGS.ZF will allow doing VERW, which clobbers ZF, +for MDS mitigations as late as possible without needing to duplicate VERW +for both paths. + +Reviewed-by: Nikolay Borisov +Cc: +Signed-off-by: Sean Christopherson +Signed-off-by: Pawan Gupta +Acked-by: Nikolay Borisov +--- + arch/x86/kvm/vmx.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -9854,6 +9854,8 @@ static void vmx_arm_hv_timer(struct kvm_ + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc); + } + ++#define VMX_RUN_VMRESUME_SHIFT 0 ++ + static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + { + struct vcpu_vmx *vmx = to_vmx(vcpu); +@@ -9953,7 +9955,7 @@ static void __noclone vmx_vcpu_run(struc + "mov %%" _ASM_AX", %%cr2 \n\t" + "2: \n\t" + /* Check if vmlaunch of vmresume is needed */ +- "cmpb $0, %c[launched](%0) \n\t" ++ "btw $" __stringify(VMX_RUN_VMRESUME_SHIFT) ", %c[launched](%0) \n\t" + /* Load guest registers. Don't clobber flags. */ + "mov %c[rax](%0), %%" _ASM_AX " \n\t" + "mov %c[rbx](%0), %%" _ASM_BX " \n\t" +@@ -9974,7 +9976,7 @@ static void __noclone vmx_vcpu_run(struc + "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */ + + /* Enter guest mode */ +- "jne 1f \n\t" ++ "jc 1f \n\t" + __ex(ASM_VMX_VMLAUNCH) "\n\t" + "jmp 2f \n\t" + "1: " __ex(ASM_VMX_VMRESUME) "\n\t" diff --git a/patches.suse/KVM-x86-Move-open-coded-CPUID-leaf-0x80000021-EAX-bit-prop.patch b/patches.suse/KVM-x86-Move-open-coded-CPUID-leaf-0x80000021-EAX-bit-prop.patch new file mode 100644 index 0000000..0ea82ae --- /dev/null +++ b/patches.suse/KVM-x86-Move-open-coded-CPUID-leaf-0x80000021-EAX-bit-prop.patch @@ -0,0 +1,59 @@ +From: Kim Phillips +Date: Tue, 24 Jan 2023 10:33:13 -0600 +Subject: KVM: x86: Move open-coded CPUID leaf 0x80000021 EAX bit propagation + code +Git-commit: c35ac8c4bf600ee23bacb20f863aa7830efb23fb +Patch-mainline: v6.3-rc1 +References: git-fixes + +Move code from __do_cpuid_func() to kvm_set_cpu_caps() in preparation for adding +the features in their native leaf. + +Also drop the bit description comments as it will be more self-describing once +the individual features are added. + +Whilst there, switch to using the more efficient cpu_feature_enabled() instead +of static_cpu_has(). + +Note, LFENCE_RDTSC and "NULL selector clears base" are currently synthetic, +Linux-defined feature flags as Linux tracking of the features predates AMD's +definition. Keep the manual propagation of the flags from their synthetic +counterparts until the kernel fully converts to AMD's definition, otherwise KVM +would stop synthesizing the flags as intended. + +Signed-off-by: Kim Phillips +Signed-off-by: Borislav Petkov (AMD) +Acked-by: Sean Christopherson +Link: https://lore.kernel.org/r/20230124163319.2277355-3-kim.phillips@amd.com + +Acked-by: Nikolay Borisov +--- + arch/x86/kvm/cpuid.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -395,6 +395,11 @@ static inline int __do_cpuid_ent(struct + F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | + F(AMD_SSB_NO) | F(AMD_STIBP); + ++ const u32 kvm_cpuid_8000_0021_eax_x86_features = ++ BIT(0) /* NO_NESTED_DATA_BP */ | ++ BIT(2) /* LFENCE Always serializing */ | 0 /* SmmPgCfgLock */ | ++ BIT(6) /* NULL_SEL_CLR_BASE */ | 0 /* PrefetchCtlMsr */; ++ + /* cpuid 0xC0000001.edx */ + const u32 kvm_cpuid_C000_0001_edx_x86_features = + F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | +@@ -743,8 +748,9 @@ static inline int __do_cpuid_ent(struct + * EAX 3 SPCL, SMM page configuration lock + * EAX 13 PCMSR, Prefetch control MSR + */ +- entry->eax &= BIT(0) | BIT(2) | BIT(6); +- if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC)) ++ entry->eax &= kvm_cpuid_8000_0021_eax_x86_features; ++ ++ if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) + entry->eax |= BIT(2); + if (!static_cpu_has_bug(X86_BUG_NULL_SEG)) + entry->eax |= BIT(6); diff --git a/patches.suse/KVM-x86-add-support-for-CPUID-leaf-0x80000021.patch b/patches.suse/KVM-x86-add-support-for-CPUID-leaf-0x80000021.patch new file mode 100644 index 0000000..545d2ad --- /dev/null +++ b/patches.suse/KVM-x86-add-support-for-CPUID-leaf-0x80000021.patch @@ -0,0 +1,51 @@ +From: Paolo Bonzini +Date: Thu, 28 Oct 2021 13:26:38 -0400 +Subject: KVM: x86: add support for CPUID leaf 0x80000021 +Git-commit: 58b3d12c0a860cda34ed9d2378078ea5134e6812 +Patch-mainline: v5.18-rc1 +References: git-fixes + +CPUID leaf 0x80000021 defines some features (or lack of bugs) of AMD +processors. Expose the ones that make sense via KVM_GET_SUPPORTED_CPUID. + +Signed-off-by: Paolo Bonzini +Acked-by: Nikolay Borisov +--- + arch/x86/kvm/cpuid.c | 19 ++++++++++++++++++- + 1 file changed, 18 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -632,7 +632,7 @@ static inline int __do_cpuid_ent(struct + entry->edx = 0; + break; + case 0x80000000: +- entry->eax = min(entry->eax, 0x8000001f); ++ entry->eax = min(entry->eax, 0x80000021); + break; + case 0x80000001: + entry->edx &= kvm_cpuid_8000_0001_edx_x86_features; +@@ -696,6 +696,23 @@ static inline int __do_cpuid_ent(struct + case 0x8000001a: + case 0x8000001e: + break; ++ case 0x80000020: ++ entry->eax = entry->ebx = entry->ecx = entry->edx = 0; ++ break; ++ case 0x80000021: ++ entry->ebx = entry->ecx = entry->edx = 0; ++ /* ++ * Pass down these bits: ++ * EAX 0 NNDBP, Processor ignores nested data breakpoints ++ * EAX 2 LAS, LFENCE always serializing ++ * EAX 6 NSCB, Null selector clear base ++ * ++ * Other defined bits are for MSRs that KVM does not expose: ++ * EAX 3 SPCL, SMM page configuration lock ++ * EAX 13 PCMSR, Prefetch control MSR ++ */ ++ entry->eax &= BIT(0) | BIT(2) | BIT(6); ++ break; + /*Add support for Centaur's CPUID instruction*/ + case 0xC0000000: + /*Just support up to 0xC0000004 now*/ diff --git a/patches.suse/KVM-x86-synthesize-CPUID-leaf-0x80000021h-if-useful.patch b/patches.suse/KVM-x86-synthesize-CPUID-leaf-0x80000021h-if-useful.patch new file mode 100644 index 0000000..bedac4f --- /dev/null +++ b/patches.suse/KVM-x86-synthesize-CPUID-leaf-0x80000021h-if-useful.patch @@ -0,0 +1,76 @@ +From: Paolo Bonzini +Date: Thu, 21 Oct 2021 17:19:27 -0400 +Subject: KVM: x86: synthesize CPUID leaf 0x80000021h if useful +Git-commit: f144c49e8c3950add1b051e76ebf40a258984c9d +Patch-mainline: v5.18-rc1 +References: git-fixes + +Guests X86_BUG_NULL_SEG if and only if the host has them. Use the info +from static_cpu_has_bug to form the 0x80000021 CPUID leaf that was +defined for Zen3. Userspace can then set the bit even on older CPUs +that do not have the bug, such as Zen2. + +Do the same for X86_FEATURE_LFENCE_RDTSC as well, since various processors +have had very different ways of detecting it and not all of them are +available to userspace. + +Signed-off-by: Paolo Bonzini +Acked-by: Nikolay Borisov +--- + arch/x86/kvm/cpuid.c | 31 +++++++++++++++++++++++++++++++ + 1 file changed, 31 insertions(+) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -281,6 +281,25 @@ static void do_cpuid_1_ent(struct kvm_cp + { + entry->function = function; + entry->index = index; ++ switch(function & 0xC0000000) { ++ case 0x80000000: ++ /* ++ * 0x80000021 is sometimes synthesized by __do_cpuid_func, which ++ * would result in out-of-bounds calls to do_host_cpuid. ++ */ ++ { ++ static int max_cpuid_80000000; ++ if (!READ_ONCE(max_cpuid_80000000)) ++ WRITE_ONCE(max_cpuid_80000000, cpuid_eax(0x80000000)); ++ if (function > READ_ONCE(max_cpuid_80000000)) ++ return; ++ } ++ break; ++ ++ default: ++ break; ++ ++ } + cpuid_count(entry->function, entry->index, + &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); + entry->flags = 0; +@@ -633,6 +652,14 @@ static inline int __do_cpuid_ent(struct + break; + case 0x80000000: + entry->eax = min(entry->eax, 0x80000021); ++ /* ++ * Serializing LFENCE is reported in a multitude of ways, ++ * and NullSegClearsBase is not reported in CPUID on Zen2; ++ * help userspace by providing the CPUID leaf ourselves. ++ */ ++ if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC) ++ || !static_cpu_has_bug(X86_BUG_NULL_SEG)) ++ entry->eax = max(entry->eax, 0x80000021); + break; + case 0x80000001: + entry->edx &= kvm_cpuid_8000_0001_edx_x86_features; +@@ -712,6 +739,10 @@ static inline int __do_cpuid_ent(struct + * EAX 13 PCMSR, Prefetch control MSR + */ + entry->eax &= BIT(0) | BIT(2) | BIT(6); ++ if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC)) ++ entry->eax |= BIT(2); ++ if (!static_cpu_has_bug(X86_BUG_NULL_SEG)) ++ entry->eax |= BIT(6); + break; + /*Add support for Centaur's CPUID instruction*/ + case 0xC0000000: diff --git a/patches.suse/KVM-x86-work-around-QEMU-issue-with-synthetic-CPUID-leaves.patch b/patches.suse/KVM-x86-work-around-QEMU-issue-with-synthetic-CPUID-leaves.patch new file mode 100644 index 0000000..1446335 --- /dev/null +++ b/patches.suse/KVM-x86-work-around-QEMU-issue-with-synthetic-CPUID-leaves.patch @@ -0,0 +1,61 @@ +From: Paolo Bonzini +Date: Fri, 29 Apr 2022 14:43:04 -0400 +Subject: KVM: x86: work around QEMU issue with synthetic CPUID leaves +Git-commit: f751d8eac17692905cdd6935f72d523d8adf3b65 +Patch-mainline: v5.18-rc5 +References: git-fixes + +Synthesizing AMD leaves up to 0x80000021 caused problems with QEMU, +which assumes the *host* CPUID[0x80000000].EAX is higher or equal +to what KVM_GET_SUPPORTED_CPUID reports. + +This causes QEMU to issue bogus host CPUIDs when preparing the input +to KVM_SET_CPUID2. It can even get into an infinite loop, which is +only terminated by an abort(): + + cpuid_data is full, no space for cpuid(eax:0x8000001d,ecx:0x3e) + +To work around this, only synthesize those leaves if 0x8000001d exists +on the host. The synthetic 0x80000021 leaf is mostly useful on Zen2, +which satisfies the condition. + +Fixes: f144c49e8c39 ("KVM: x86: synthesize CPUID leaf 0x80000021h if useful") +Reported-by: Maxim Levitsky +Signed-off-by: Paolo Bonzini +Acked-by: Nikolay Borisov +--- + arch/x86/kvm/cpuid.c | 19 ++++++++++++++----- + 1 file changed, 14 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index b24ca7f4ed7c..598334ed5fbc 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -1085,12 +1085,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) + case 0x80000000: + entry->eax = min(entry->eax, 0x80000021); + /* +- * Serializing LFENCE is reported in a multitude of ways, +- * and NullSegClearsBase is not reported in CPUID on Zen2; +- * help userspace by providing the CPUID leaf ourselves. ++ * Serializing LFENCE is reported in a multitude of ways, and ++ * NullSegClearsBase is not reported in CPUID on Zen2; help ++ * userspace by providing the CPUID leaf ourselves. ++ * ++ * However, only do it if the host has CPUID leaf 0x8000001d. ++ * QEMU thinks that it can query the host blindly for that ++ * CPUID leaf if KVM reports that it supports 0x8000001d or ++ * above. The processor merrily returns values from the ++ * highest Intel leaf which QEMU tries to use as the guest's ++ * 0x8000001d. Even worse, this can result in an infinite ++ * loop if said highest leaf has no subleaves indexed by ECX. + */ +- if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC) +- || !static_cpu_has_bug(X86_BUG_NULL_SEG)) ++ if (entry->eax >= 0x8000001d && ++ (static_cpu_has(X86_FEATURE_LFENCE_RDTSC) ++ || !static_cpu_has_bug(X86_BUG_NULL_SEG))) + entry->eax = max(entry->eax, 0x80000021); + break; + case 0x80000001: + diff --git a/patches.suse/x86-asm-Add-_ASM_RIP-macro-for-x86-64-rip-suffix.patch b/patches.suse/x86-asm-Add-_ASM_RIP-macro-for-x86-64-rip-suffix.patch new file mode 100644 index 0000000..32d3859 --- /dev/null +++ b/patches.suse/x86-asm-Add-_ASM_RIP-macro-for-x86-64-rip-suffix.patch @@ -0,0 +1,46 @@ +From: "H. Peter Anvin (Intel)" +Date: Fri, 10 Sep 2021 12:59:09 -0700 +Subject: x86/asm: Add _ASM_RIP() macro for x86-64 (%rip) suffix +Git-commit: f87bc8dc7a7c438c70f97b4e51c76a183313272e +Patch-mainline: v5.16-rc1 +References: git-fixes + +Add a macro _ASM_RIP() to add a (%rip) suffix on 64 bits only. This is +useful for immediate memory references where one doesn't want gcc +to possibly use a register indirection as it may in the case of an "m" +constraint. + +Signed-off-by: H. Peter Anvin (Intel) +Signed-off-by: Borislav Petkov +Link: https://lkml.kernel.org/r/20210910195910.2542662-3-hpa@zytor.com + +Acked-by: Nikolay Borisov +--- + arch/x86/include/asm/asm.h | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/arch/x86/include/asm/asm.h ++++ b/arch/x86/include/asm/asm.h +@@ -5,10 +5,12 @@ + # define __ASM_FORM(x) x + # define __ASM_FORM_RAW(x) x + # define __ASM_FORM_COMMA(x) x, ++# define __ASM_REGPFX % + #else + # define __ASM_FORM(x) " " #x " " + # define __ASM_FORM_RAW(x) #x + # define __ASM_FORM_COMMA(x) " " #x "," ++# define __ASM_REGPFX %% + #endif + + #ifndef __x86_64__ +@@ -45,6 +47,9 @@ + #define _ASM_SI __ASM_REG(si) + #define _ASM_DI __ASM_REG(di) + ++/* Adds a (%rip) suffix on 64 bits only; for immediate memory references */ ++#define _ASM_RIP(x) __ASM_SEL_RAW(x, x (__ASM_REGPFX rip)) ++ + #ifndef __x86_64__ + /* 32 bit */ + diff --git a/patches.suse/x86-bugs-Add-asm-helpers-for-executing-VERW.patch b/patches.suse/x86-bugs-Add-asm-helpers-for-executing-VERW.patch new file mode 100644 index 0000000..640b577 --- /dev/null +++ b/patches.suse/x86-bugs-Add-asm-helpers-for-executing-VERW.patch @@ -0,0 +1,129 @@ +From: Pawan Gupta +Date: Tue, 13 Feb 2024 18:21:35 -0800 +Subject: x86/bugs: Add asm helpers for executing VERW +Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git +Git-commit: baf8361e54550a48a7087b603313ad013cc13386 +Patch-mainline: Queued in subsystem maintainer repo +References: git-fixes + +MDS mitigation requires clearing the CPU buffers before returning to +user. This needs to be done late in the exit-to-user path. Current +location of VERW leaves a possibility of kernel data ending up in CPU +buffers for memory accesses done after VERW such as: + + 1. Kernel data accessed by an NMI between VERW and return-to-user can + remain in CPU buffers since NMI returning to kernel does not + execute VERW to clear CPU buffers. + 2. Alyssa reported that after VERW is executed, + CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system + call. Memory accesses during stack scrubbing can move kernel stack + contents into CPU buffers. + 3. When caller saved registers are restored after a return from + function executing VERW, the kernel stack accesses can remain in + CPU buffers(since they occur after VERW). + +To fix this VERW needs to be moved very late in exit-to-user path. + +In preparation for moving VERW to entry/exit asm code, create macros +that can be used in asm. Also make VERW patching depend on a new feature +flag X86_FEATURE_CLEAR_CPU_BUF. + +Reported-by: Alyssa Milburn +Suggested-by: Andrew Cooper +Suggested-by: Peter Zijlstra +Cc: +Signed-off-by: Pawan Gupta +Acked-by: Nikolay Borisov +--- + arch/x86/entry/entry.S | 25 +++++++++++++++++++++++++ + arch/x86/include/asm/cpufeatures.h | 2 +- + arch/x86/include/asm/nospec-branch.h | 20 ++++++++++++++++++++ + 3 files changed, 46 insertions(+), 1 deletion(-) + +--- a/arch/x86/entry/entry.S ++++ b/arch/x86/entry/entry.S +@@ -6,6 +6,9 @@ + #include + #include + #include ++#include ++#include ++#include + + .pushsection .noinstr.text, "ax" + +@@ -30,3 +33,25 @@ END(entry_ibpb) + EXPORT_SYMBOL_GPL(entry_ibpb); + + .popsection ++ ++/* ++ * Define the VERW operand that is disguised as entry code so that ++ * it can be referenced with KPTI enabled. This ensure VERW can be ++ * used late in exit-to-user path after page tables are switched. ++ */ ++.pushsection .entry.text, "ax" ++ ++.align L1_CACHE_BYTES, 0xcc ++ .globl mds_verw_sel ++mds_verw_sel: ++ .word __KERNEL_DS ++.align L1_CACHE_BYTES, 0xcc ++ .type mds_verw_sel STT_NOTYPE ++ .set .L__sym_size_mds_verw_sel, .-mds_verw_sel ++ .size mds_verw_sel, .L__sym_size_mds_verw_sel ++ ++/* For KVM */ ++EXPORT_SYMBOL_GPL(mds_verw_sel); ++ ++.popsection ++ +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -98,7 +98,7 @@ + #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ + #define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ + #define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ +-/* FREE was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" Lfence synchronizes RDTSC */ ++#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */ + #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ + #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ + #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -188,8 +188,26 @@ + #endif + .endm + ++/* ++ * Macro to execute VERW instruction that mitigate transient data sampling ++ * attacks such as MDS. On affected systems a microcode update overloaded VERW ++ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. ++ * ++ * Note: Only the memory operand variant of VERW clears the CPU buffers. ++ */ ++.macro CLEAR_CPU_BUFFERS ++ ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF ++ verw _ASM_RIP(mds_verw_sel) ++.Lskip_verw_\@: ++.endm ++ + #else /* __ASSEMBLY__ */ + ++#define CLEAR_CPU_BUFFERS \ ++ ALTERNATIVE("jmp 1f\t\n", "", X86_FEATURE_CLEAR_CPU_BUF) \ ++ "verw " _ASM_RIP(mds_verw_sel) " \t\n" \ ++ "1:\t\n" ++ + #if defined(CONFIG_RETPOLINE) || defined(CONFIG_CPU_SRSO) + #define UNTRAIN_RET_VM \ + ALTERNATIVE_2( \ +@@ -389,6 +407,8 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear) + + DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); + ++extern u16 mds_verw_sel; ++ + #include + + /** diff --git a/patches.suse/x86-bugs-Use-ALTERNATIVE-instead-of-mds_user_clear-static-.patch b/patches.suse/x86-bugs-Use-ALTERNATIVE-instead-of-mds_user_clear-static-.patch new file mode 100644 index 0000000..7f44dc7 --- /dev/null +++ b/patches.suse/x86-bugs-Use-ALTERNATIVE-instead-of-mds_user_clear-static-.patch @@ -0,0 +1,213 @@ +From: Pawan Gupta +Date: Tue, 13 Feb 2024 18:22:24 -0800 +Subject: x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key +Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git +Git-commit: 6613d82e617dd7eb8b0c40b2fe3acea655b1d611 +Patch-mainline: Queued in subsystem maintainer repo +References: git-fixes + +The VERW mitigation at exit-to-user is enabled via a static branch +mds_user_clear. This static branch is never toggled after boot, and can +be safely replaced with an ALTERNATIVE() which is convenient to use in +asm. + +Switch to ALTERNATIVE() to use the VERW mitigation late in exit-to-user +path. Also remove the now redundant VERW in exc_nmi() and +arch_exit_to_user_mode(). + +Cc: +Signed-off-by: Pawan Gupta +Acked-by: Nikolay Borisov +--- + Documentation/x86/mds.rst | 33 +++++++++++++++++++++++++++------ + arch/x86/entry/common.c | 1 - + arch/x86/include/asm/nospec-branch.h | 12 ------------ + arch/x86/kernel/cpu/bugs.c | 15 ++++++--------- + arch/x86/kernel/nmi.c | 3 --- + arch/x86/kvm/vmx.c | 6 +++--- + 6 files changed, 36 insertions(+), 34 deletions(-) + +--- a/Documentation/x86/mds.rst ++++ b/Documentation/x86/mds.rst +@@ -90,6 +90,9 @@ The kernel provides a function to invoke + + mds_clear_cpu_buffers() + ++Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. ++Other than CFLAGS.ZF, this macro doesn't clobber any registers. ++ + The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state + (idle) transitions. + +@@ -137,13 +140,31 @@ Mitigation points + + When transitioning from kernel to user space the CPU buffers are flushed + on affected CPUs when the mitigation is not disabled on the kernel +- command line. The migitation is enabled through the static key +- mds_user_clear. ++ command line. The mitigation is enabled through the feature flag ++ X86_FEATURE_CLEAR_CPU_BUF. ++ ++ The mitigation is invoked just before transitioning to userspace after ++ user registers are restored. This is done to minimize the window in ++ which kernel data could be accessed after VERW e.g. via an NMI after ++ VERW. ++ ++ **Corner case not handled** ++ Interrupts returning to kernel don't clear CPUs buffers since the ++ exit-to-user path is expected to do that anyways. But, there could be ++ a case when an NMI is generated in kernel after the exit-to-user path ++ has cleared the buffers. This case is not handled and NMI returning to ++ kernel don't clear CPU buffers because: ++ ++ 1. It is rare to get an NMI after VERW, but before returning to userspace. ++ 2. For an unprivileged user, there is no known way to make that NMI ++ less rare or target it. ++ 3. It would take a large number of these precisely-timed NMIs to mount ++ an actual attack. There's presumably not enough bandwidth. ++ 4. The NMI in question occurs after a VERW, i.e. when user state is ++ restored and most interesting data is already scrubbed. Whats left ++ is only the data that NMI touches, and that may or may not be of ++ any interest. + +- The mitigation is invoked in prepare_exit_to_usermode() which covers +- most of the kernel to user space transitions. There are a few exceptions +- which are not invoking prepare_exit_to_usermode() on return to user +- space. These exceptions use the paranoid exit code. + + - Non Maskable Interrupt (NMI): + +--- a/arch/x86/entry/common.c ++++ b/arch/x86/entry/common.c +@@ -211,7 +211,6 @@ __visible inline void prepare_exit_to_us + + user_enter_irqoff(); + +- mds_user_clear_cpu_buffers(); + amd_clear_divider(); + } + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -402,7 +402,6 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_ + DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); + DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); + +-DECLARE_STATIC_KEY_FALSE(mds_user_clear); + DECLARE_STATIC_KEY_FALSE(mds_idle_clear); + + DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); +@@ -435,17 +434,6 @@ static __always_inline void mds_clear_cp + } + + /** +- * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability +- * +- * Clear CPU buffers if the corresponding static key is enabled +- */ +-static __always_inline void mds_user_clear_cpu_buffers(void) +-{ +- if (static_branch_likely(&mds_user_clear)) +- mds_clear_cpu_buffers(); +-} +- +-/** + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -108,9 +108,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_i + /* Control unconditional IBPB in switch_mm() */ + DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); + +-/* Control MDS CPU buffer clear before returning to user space */ +-DEFINE_STATIC_KEY_FALSE(mds_user_clear); +-EXPORT_SYMBOL_GPL(mds_user_clear); + /* Control MDS CPU buffer clear before idling (halt, mwait) */ + DEFINE_STATIC_KEY_FALSE(mds_idle_clear); + EXPORT_SYMBOL_GPL(mds_idle_clear); +@@ -285,7 +282,7 @@ static void __init mds_select_mitigation + if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) + mds_mitigation = MDS_MITIGATION_VMWERV; + +- static_branch_enable(&mds_user_clear); ++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + + if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && + (mds_nosmt || cpu_mitigations_auto_nosmt())) +@@ -382,7 +379,7 @@ static void __init taa_select_mitigation + * For guests that can't determine whether the correct microcode is + * present on host, enable the mitigation for UCODE_NEEDED as well. + */ +- static_branch_enable(&mds_user_clear); ++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + + if (taa_nosmt || cpu_mitigations_auto_nosmt()) + cpu_smt_disable(false); +@@ -450,7 +447,7 @@ static void __init mmio_select_mitigatio + */ + if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && + boot_cpu_has(X86_FEATURE_RTM))) +- static_branch_enable(&mds_user_clear); ++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + else + static_branch_enable(&mmio_stale_data_clear); + +@@ -510,12 +507,12 @@ static void __init md_clear_update_mitig + if (cpu_mitigations_off()) + return; + +- if (!static_key_enabled(&mds_user_clear)) ++ if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) + goto out; + + /* +- * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data +- * mitigation, if necessary. ++ * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO ++ * Stale Data mitigation, if necessary. + */ + if (mds_mitigation == MDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MDS)) { +--- a/arch/x86/kernel/nmi.c ++++ b/arch/x86/kernel/nmi.c +@@ -534,9 +534,6 @@ nmi_restart: + write_cr2(this_cpu_read(nmi_cr2)); + if (this_cpu_dec_return(nmi_state)) + goto nmi_restart; +- +- if (user_mode(regs)) +- mds_user_clear_cpu_buffers(); + } + NOKPROBE_SYMBOL(do_nmi); + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -9927,7 +9927,7 @@ static void __noclone vmx_vcpu_run(struc + /* L1D Flush includes CPU buffer clear to mitigate MDS */ + if (static_branch_unlikely(&vmx_l1d_should_flush)) + vmx_l1d_flush(vcpu); +- else if (static_branch_unlikely(&mds_user_clear)) ++ else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF)) + mds_clear_cpu_buffers(); + else if (static_branch_unlikely(&mmio_stale_data_clear) && + kvm_arch_has_assigned_device(vcpu->kvm)) +@@ -11376,7 +11376,7 @@ static int prepare_vmcs02(struct kvm_vcp + /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are + * emulated by vmx_set_efer(), below. + */ +- vm_entry_controls_init(vmx, ++ vm_entry_controls_init(vmx, + (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & + ~VM_ENTRY_IA32E_MODE) | + (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); +@@ -12477,7 +12477,7 @@ static void nested_vmx_vmexit(struct kvm + + return; + } +- ++ + /* + * After an early L2 VM-entry failure, we're now back + * in L1 which thinks it just finished a VMLAUNCH or diff --git a/patches.suse/x86-cpu-kvm-Move-X86_FEATURE_LFENCE_RDTSC-to-its-native-le.patch b/patches.suse/x86-cpu-kvm-Move-X86_FEATURE_LFENCE_RDTSC-to-its-native-le.patch new file mode 100644 index 0000000..00d70de --- /dev/null +++ b/patches.suse/x86-cpu-kvm-Move-X86_FEATURE_LFENCE_RDTSC-to-its-native-le.patch @@ -0,0 +1,92 @@ +From: Kim Phillips +Date: Tue, 24 Jan 2023 10:33:15 -0600 +Subject: x86/cpu, kvm: Move X86_FEATURE_LFENCE_RDTSC to its native leaf +Git-commit: 84168ae786f8a15a7eb0f79d34f20b8d261ce2f5 +Patch-mainline: v6.3-rc1 +References: git-fixes + +The LFENCE always serializing feature bit was defined as scattered +LFENCE_RDTSC and its native leaf bit position open-coded for KVM. Add +it to its newly added CPUID leaf 0x80000021 EAX proper. With +LFENCE_RDTSC in its proper place, the kernel's set_cpu_cap() will +effectively synthesize the feature for KVM going forward. + +Also, DE_CFG[1] doesn't need to be set on such CPUs anymore. + + [ bp: Massage and merge diff from Sean. ] + +Signed-off-by: Kim Phillips +Signed-off-by: Borislav Petkov (AMD) +Acked-by: Sean Christopherson +Link: https://lore.kernel.org/r/20230124163319.2277355-5-kim.phillips@amd.com + +Acked-by: Nikolay Borisov +--- + arch/x86/include/asm/cpufeatures.h | 5 ++++- + arch/x86/kernel/cpu/amd.c | 2 +- + arch/x86/kvm/cpuid.c | 14 ++++++++++++-- + 3 files changed, 17 insertions(+), 4 deletions(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -98,7 +98,7 @@ + #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ + #define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ + #define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ +-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ ++/* FREE was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" Lfence synchronizes RDTSC */ + #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ + #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ + #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ +@@ -374,6 +374,9 @@ + #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ + #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ + ++ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ ++#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ ++ + /* + * BUG word(s) + */ +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -966,7 +966,7 @@ static void init_amd(struct cpuinfo_x86 + if (c->x86 >= 0xf) + set_cpu_cap(c, X86_FEATURE_K8); + +- if (cpu_has(c, X86_FEATURE_XMM2)) { ++ if (!cpu_has(c, X86_FEATURE_LFENCE_RDTSC) && cpu_has(c, X86_FEATURE_XMM2)) { + unsigned long long val; + int ret; + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -397,7 +397,7 @@ static inline int __do_cpuid_ent(struct + + const u32 kvm_cpuid_8000_0021_eax_x86_features = + BIT(0) /* NO_NESTED_DATA_BP */ | +- BIT(2) /* LFENCE Always serializing */ | 0 /* SmmPgCfgLock */ | ++ F(LFENCE_RDTSC) /* LFENCE Always serializing */ | 0 /* SmmPgCfgLock */ | + BIT(6) /* NULL_SEL_CLR_BASE */ | 0 /* PrefetchCtlMsr */; + + /* cpuid 0xC0000001.edx */ +@@ -750,8 +750,18 @@ static inline int __do_cpuid_ent(struct + */ + entry->eax &= kvm_cpuid_8000_0021_eax_x86_features; + ++ /* ++ * Synthesize "LFENCE is serializing" into the AMD-defined entry in ++ * KVM's supported CPUID if the feature is reported as supported by the ++ * kernel. LFENCE_RDTSC was a Linux-defined synthetic feature long ++ * before AMD joined the bandwagon, e.g. LFENCE is serializing on most ++ * CPUs that support SSE2. On CPUs that don't support AMD's leaf, ++ * kvm_cpu_cap_mask() will unfortunately drop the flag due to ANDing ++ * the mask with the raw host CPUID, and reporting support in AMD's ++ * leaf can make it easier for userspace to detect the feature. ++ */ + if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) +- entry->eax |= BIT(2); ++ entry->eax |= F(LFENCE_RDTSC); + if (!static_cpu_has_bug(X86_BUG_NULL_SEG)) + entry->eax |= BIT(6); + break; diff --git a/patches.suse/x86-entry_32-Add-VERW-just-before-userspace-transition.patch b/patches.suse/x86-entry_32-Add-VERW-just-before-userspace-transition.patch new file mode 100644 index 0000000..0f1ef64 --- /dev/null +++ b/patches.suse/x86-entry_32-Add-VERW-just-before-userspace-transition.patch @@ -0,0 +1,44 @@ +From: Pawan Gupta +Date: Tue, 13 Feb 2024 18:22:08 -0800 +Subject: x86/entry_32: Add VERW just before userspace transition +Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git +Git-commit: a0e2dab44d22b913b4c228c8b52b2a104434b0b3 +Patch-mainline: Queued in subsystem maintainer repo +References: git-fixes + +As done for entry_64, add support for executing VERW late in exit to +user path for 32-bit mode. + +Cc: +Signed-off-by: Pawan Gupta +Acked-by: Nikolay Borisov +--- + arch/x86/entry/entry_32.S | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -278,6 +278,7 @@ ENTRY(schedule_tail_wrapper) + pushl %eax + call schedule_tail + popl %eax ++ CLEAR_CPU_BUFFERS + + FRAME_END + RET +@@ -565,6 +566,7 @@ restore_all: + #endif + .Lrestore_nocheck: + RESTORE_REGS 4 # skip orig_eax/error_code ++ CLEAR_CPU_BUFFERS + .Lirq_return: + INTERRUPT_RETURN + +@@ -1020,6 +1022,7 @@ ENTRY(nmi) + + /* Not on SYSENTER stack. */ + call do_nmi ++ CLEAR_CPU_BUFFERS + jmp .Lrestore_all_notrace + + .Lnmi_from_sysenter_stack: diff --git a/patches.suse/x86-entry_64-Add-VERW-just-before-userspace-transition.patch b/patches.suse/x86-entry_64-Add-VERW-just-before-userspace-transition.patch new file mode 100644 index 0000000..5c32218 --- /dev/null +++ b/patches.suse/x86-entry_64-Add-VERW-just-before-userspace-transition.patch @@ -0,0 +1,116 @@ +From: Pawan Gupta +Date: Tue, 13 Feb 2024 18:21:52 -0800 +Subject: x86/entry_64: Add VERW just before userspace transition +Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git +Git-commit: 3c7501722e6b31a6e56edd23cea5e77dbb9ffd1a +Patch-mainline: Queued in subsystem maintainer repo +References: git-fixes + +Mitigation for MDS is to use VERW instruction to clear any secrets in +CPU Buffers. Any memory accesses after VERW execution can still remain +in CPU buffers. It is safer to execute VERW late in return to user path +to minimize the window in which kernel data can end up in CPU buffers. +There are not many kernel secrets to be had after SWITCH_TO_USER_CR3. + +Add support for deploying VERW mitigation after user register state is +restored. This helps minimize the chances of kernel data ending up into +CPU buffers after executing VERW. + +Note that the mitigation at the new location is not yet enabled. + + Corner case not handled + ======================= + Interrupts returning to kernel don't clear CPUs buffers since the + exit-to-user path is expected to do that anyways. But, there could be + a case when an NMI is generated in kernel after the exit-to-user path + has cleared the buffers. This case is not handled and NMI returning to + kernel don't clear CPU buffers because: + + 1. It is rare to get an NMI after VERW, but before returning to userspace. + 2. For an unprivileged user, there is no known way to make that NMI + less rare or target it. + 3. It would take a large number of these precisely-timed NMIs to mount + an actual attack. There's presumably not enough bandwidth. + 4. The NMI in question occurs after a VERW, i.e. when user state is + restored and most interesting data is already scrubbed. Whats left + is only the data that NMI touches, and that may or may not be of + any interest. + +Suggested-by: Dave Hansen +Cc: +Signed-off-by: Pawan Gupta +Acked-by: Nikolay Borisov +--- + arch/x86/entry/entry_64.S | 11 +++++++++++ + arch/x86/entry/entry_64_compat.S | 1 + + arch/x86/include/asm/irqflags.h | 1 + + 3 files changed, 13 insertions(+) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -48,6 +48,7 @@ + ENTRY(native_usergs_sysret64) + UNWIND_HINT_EMPTY + swapgs ++ CLEAR_CPU_BUFFERS + sysretq + END(native_usergs_sysret64) + #endif /* CONFIG_PARAVIRT */ +@@ -638,6 +639,7 @@ GLOBAL(swapgs_restore_regs_and_return_to + /* Restore RDI. */ + popq %rdi + SWAPGS ++ CLEAR_CPU_BUFFERS + INTERRUPT_RETURN + + +@@ -759,6 +761,8 @@ native_irq_return_ldt: + */ + popq %rax /* Restore user RAX */ + ++ CLEAR_CPU_BUFFERS ++ + /* + * RSP now points to an ordinary IRET frame, except that the page + * is read-only and RSP[31:16] are preloaded with the userspace +@@ -1694,6 +1698,12 @@ nmi_restore: + movq $0, 5*8(%rsp) /* clear "NMI executing" */ + + /* ++ * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like ++ * NMI in kernel after user state is restored. For an unprivileged user ++ * these conditions are hard to meet. ++ */ ++ ++ /* + * iretq reads the "iret" frame and exits the NMI stack in a + * single instruction. We are returning to kernel mode, so this + * cannot result in a fault. Similarly, we don't need to worry +@@ -1705,6 +1715,7 @@ END(nmi) + ENTRY(ignore_sysret) + UNWIND_HINT_EMPTY + mov $-ENOSYS, %eax ++ CLEAR_CPU_BUFFERS + sysret + END(ignore_sysret) + +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -311,6 +311,7 @@ sysret32_from_system_call: + xorl %r9d, %r9d + xorl %r10d, %r10d + swapgs ++ CLEAR_CPU_BUFFERS + sysretl + END(entry_SYSCALL_compat) + +--- a/arch/x86/include/asm/irqflags.h ++++ b/arch/x86/include/asm/irqflags.h +@@ -140,6 +140,7 @@ static inline notrace unsigned long arch + #define INTERRUPT_RETURN jmp native_iret + #define USERGS_SYSRET64 \ + swapgs; \ ++ CLEAR_CPU_BUFFERS \ + sysretq; + #define USERGS_SYSRET32 \ + swapgs; \ diff --git a/patches.suse/x86-srso-add-ibpb_brtype-support.patch b/patches.suse/x86-srso-add-ibpb_brtype-support.patch index d49c714..0a550b0 100644 --- a/patches.suse/x86-srso-add-ibpb_brtype-support.patch +++ b/patches.suse/x86-srso-add-ibpb_brtype-support.patch @@ -25,9 +25,9 @@ Acked-by: Nikolay Borisov --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h -@@ -375,6 +375,8 @@ - #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ - #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +@@ -380,6 +380,8 @@ + /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ + #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ + @@ -36,7 +36,7 @@ Acked-by: Nikolay Borisov */ --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c -@@ -1736,10 +1736,20 @@ static void __init srso_select_mitigatio +@@ -1631,10 +1631,20 @@ static void __init srso_select_mitigatio if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off()) return; diff --git a/patches.suse/x86-srso-add-srso_no-support.patch b/patches.suse/x86-srso-add-srso_no-support.patch index 1e84338..fae3be8 100644 --- a/patches.suse/x86-srso-add-srso_no-support.patch +++ b/patches.suse/x86-srso-add-srso_no-support.patch @@ -22,9 +22,9 @@ Acked-by: Nikolay Borisov --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h -@@ -375,7 +375,9 @@ - #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ - #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +@@ -380,7 +380,9 @@ + /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ + #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ @@ -34,7 +34,7 @@ Acked-by: Nikolay Borisov * BUG word(s) --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h -@@ -51,6 +51,7 @@ +@@ -55,6 +55,7 @@ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -61,7 +61,7 @@ Acked-by: Nikolay Borisov /* The Intel SPEC CTRL MSR base value cache */ --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c -@@ -1247,14 +1247,14 @@ bool cpu_has_ibpb_brtype_microcode(void) +@@ -1256,14 +1256,14 @@ bool cpu_has_ibpb_brtype_microcode(void) { u8 fam = boot_cpu_data.x86; @@ -94,7 +94,7 @@ Acked-by: Nikolay Borisov static DEFINE_MUTEX(spec_ctrl_mutex); /* Update SPEC_CTRL MSR and its cached copy unconditionally */ -@@ -1618,7 +1621,7 @@ static void __init srso_select_mitigatio +@@ -1629,7 +1632,7 @@ static void __init srso_select_mitigatio bool has_microcode; if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off()) @@ -103,7 +103,7 @@ Acked-by: Nikolay Borisov /* * The first check is for the kernel running as a guest in order -@@ -1631,9 +1634,18 @@ static void __init srso_select_mitigatio +@@ -1642,9 +1645,18 @@ static void __init srso_select_mitigatio } else { /* * Enable the synthetic (even if in a real CPUID leaf) @@ -123,7 +123,7 @@ Acked-by: Nikolay Borisov } switch (srso_cmd) { -@@ -1656,16 +1668,20 @@ static void __init srso_select_mitigatio +@@ -1667,16 +1679,20 @@ static void __init srso_select_mitigatio srso_mitigation = SRSO_MITIGATION_SAFE_RET; } else { pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); @@ -148,7 +148,7 @@ Acked-by: Nikolay Borisov #undef pr_fmt --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c -@@ -1159,8 +1159,10 @@ static void __init cpu_set_bug_bits(stru +@@ -1165,8 +1165,10 @@ static void __init cpu_set_bug_bits(stru !(ia32_cap & ARCH_CAP_PBRSB_NO)) setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); diff --git a/series.conf b/series.conf index c94e91d..284df62 100644 --- a/series.conf +++ b/series.conf @@ -62369,6 +62369,7 @@ patches.suse/x86-Fix-__get_wchan-for-STACKTRACE.patch patches.suse/edac-amd64-handle-three-rank-interleaving-mode.patch patches.suse/edac-sb_edac-fix-top-of-high-memory-value-for-broadwell-haswell.patch + patches.suse/x86-asm-Add-_ASM_RIP-macro-for-x86-64-rip-suffix.patch patches.suse/media-dvb-usb-fix-ununit-value-in-az6027_rc_query.patch patches.suse/media-v4l2-ioctl-S_CTRL-output-the-right-value.patch patches.suse/media-mtk-vpu-Fix-a-resource-leak-in-the-error-handl.patch @@ -63018,6 +63019,8 @@ patches.suse/KVM-x86-Fix-emulation-in-writing-cr8.patch patches.suse/KVM-x86-emulator-Defer-not-present-segment-check-in-.patch patches.suse/KVM-s390x-fix-SCK-locking + patches.suse/KVM-x86-add-support-for-CPUID-leaf-0x80000021.patch + patches.suse/KVM-x86-synthesize-CPUID-leaf-0x80000021h-if-useful.patch patches.suse/msft-hv-2551-x86-hyperv-Output-host-build-info-as-normal-Windows-.patch patches.suse/msft-hv-2514-net-mana-Use-struct_size-helper-in-mana_gd_create_dm.patch patches.suse/msft-hv-2516-net-mana-Add-counter-for-packet-dropped-by-XDP.patch @@ -63225,6 +63228,7 @@ patches.suse/USB-serial-whiteheat-fix-heap-overflow-in-WHITEHEAT_.patch patches.suse/arch_topology-Do-not-set-llc_sibling-if-llc_id-is-in.patch patches.suse/x86-cpu-load-microcode-during-restore_processor_state.patch + patches.suse/KVM-x86-work-around-QEMU-issue-with-synthetic-CPUID-leaves.patch patches.suse/nfc-replace-improper-check-device_is_registered-in-n.patch patches.suse/nfc-nfcmrvl-main-reorder-destructive-operations-in-n.patch patches.suse/secure_seq-use-the-64-bits-of-the-siphash-for-port-o.patch @@ -64194,6 +64198,8 @@ patches.suse/uaccess-Add-speculation-barrier-to-copy_from_user.patch patches.suse/x86-bugs-Reset-speculation-control-settings-on-init.patch patches.suse/x86-cpu-kvm-add-support-for-cpuid_80000021_eax.patch + patches.suse/KVM-x86-Move-open-coded-CPUID-leaf-0x80000021-EAX-bit-prop.patch + patches.suse/x86-cpu-kvm-Move-X86_FEATURE_LFENCE_RDTSC-to-its-native-le.patch patches.suse/arm64-cpufeature-Fix-field-sign-for-DIT-hwcap-detection.patch patches.suse/crypto-x86-ghash-fix-unaligned-access-in-ghash_setkey.patch patches.suse/gve-Fix-gve-interrupt-names.patch @@ -64860,6 +64866,14 @@ patches.suse/dm-limit-the-number-of-targets-and-parameter-size-ar.patch patches.suse/USB-hub-check-for-alternate-port-before-enabling-A_A.patch + # tip/tip + patches.suse/x86-bugs-Add-asm-helpers-for-executing-VERW.patch + patches.suse/x86-entry_64-Add-VERW-just-before-userspace-transition.patch + patches.suse/x86-entry_32-Add-VERW-just-before-userspace-transition.patch + patches.suse/x86-bugs-Use-ALTERNATIVE-instead-of-mds_user_clear-static-.patch + patches.suse/KVM-VMX-Use-BT-JNC-i.e.-EFLAGS.CF-to-select-VMRESUME-vs.-V.patch + patches.suse/KVM-VMX-Move-VERW-closer-to-VMentry-for-MDS-mitigation.patch + # dhowells/linux-fs keys-uefi patches.suse/0001-KEYS-Allow-unrestricted-boot-time-addition-of-keys-t.patch patches.suse/0003-efi-Add-an-EFI-signature-blob-parser.patch