From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 14 Jun 2022 23:15:48 +0200
Subject: x86: Add magic AMD return-thunk
Git-commit: a149180fbcf336e97ce4eb2cdc13672727feb94d
Patch-mainline: Queued in tip for 5.19
Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
References: bsc#1199657 CVE-2022-29900 CVE-2022-29901
Note: needs to be in a section distinct from Retpolines such that the
Retpoline RET substitution cannot possibly use immediate jumps.
ORC unwinding for zen_untrain_ret() and __x86_return_thunk() is a
little tricky but works due to the fact that zen_untrain_ret() doesn't
have any stack ops and as such will emit a single ORC entry at the
start (+0x3f).
Meanwhile, unwinding an IP, including the __x86_return_thunk() one
(+0x40) will search for the largest ORC entry smaller or equal to the
IP, these will find the one ORC entry (+0x3f) and all works.
[ Alexandre: SVM part. ]
[ bp: Build fix, massages. ]
Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
---
arch/x86/entry/entry_64.S | 11 ++++++
arch/x86/entry/entry_64_compat.S | 7 +++
arch/x86/include/asm/cpufeatures.h | 1
arch/x86/include/asm/nospec-branch.h | 17 +++++++++
arch/x86/kernel/vmlinux.lds.S | 2 -
arch/x86/kvm/svm.c | 4 ++
arch/x86/lib/retpoline.S | 64 +++++++++++++++++++++++++++++++++--
7 files changed, 103 insertions(+), 3 deletions(-)
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -13,6 +13,7 @@
#include <asm/irqflags.h>
#include <asm/asm.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
#include <linux/linkage.h>
#include <linux/err.h>
@@ -107,6 +108,8 @@ ENTRY(entry_SYSENTER_compat)
xorl %r15d, %r15d /* nospec r15 */
cld
+ UNTRAIN_RET
+
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
@@ -248,6 +251,8 @@ GLOBAL(entry_SYSCALL_compat_after_hwfram
/* Restrict Indirect Branch Speculation. All registers are saved already */
RESTRICT_IB_SPEC_CLOBBER
+ UNTRAIN_RET
+
/* User mode is traced as though IRQs are on, and SYSENTER
* turned them off.
*/
@@ -433,6 +438,8 @@ ENTRY(entry_INT80_compat)
*/
TRACE_IRQS_OFF
+ UNTRAIN_RET
+
movq %rsp, %rdi
call do_int80_syscall_32
.Lsyscall_32_done:
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -233,6 +233,9 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
/* IRQs are off. */
movq %rsp, %rdi
+
+ UNTRAIN_RET
+
call do_syscall_64 /* returns with IRQs disabled */
TRACE_IRQS_IRETQ /* we're about to change IF */
@@ -716,6 +719,7 @@ native_irq_return_ldt:
pushq %rdi /* Stash user RDI */
SWAPGS /* to kernel GS */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
+ UNTRAIN_RET
/*
* There is no point in disabling Indirect Branch Speculation
@@ -870,8 +874,11 @@ ENTRY(switch_to_thread_stack)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
/* Restrict Indirect Branch Speculation */
RESTRICT_IB_SPEC
+ UNTRAIN_RET
+
UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
pushq 7*8(%rdi) /* regs->ss */
@@ -1250,6 +1257,7 @@ ENTRY(error_entry)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
/* Restrict Indirect Branch Speculation */
RESTRICT_IB_SPEC_CLOBBER
+ UNTRAIN_RET
.Lerror_entry_from_usermode_after_swapgs:
/* Put us onto the real thread stack. */
@@ -1301,6 +1309,7 @@ ENTRY(error_entry)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
/* Restrict Indirect Branch Speculation */
RESTRICT_IB_SPEC_CLOBBER
+ UNTRAIN_RET
jmp .Lerror_entry_done
.Lbstep_iret:
@@ -1318,6 +1327,7 @@ ENTRY(error_entry)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
/* Restrict Indirect Branch Speculation */
RESTRICT_IB_SPEC
+ UNTRAIN_RET
/*
* Pretend that the exception came from user mode: set up pt_regs
@@ -1415,6 +1425,7 @@ ENTRY(nmi)
/* Restrict Indirect Branch Speculation */
RESTRICT_IB_SPEC
+ UNTRAIN_RET
UNWIND_HINT_IRET_REGS base=%rdx offset=8
pushq 5*8(%rdx) /* pt_regs->ss */
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -288,6 +288,7 @@
#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -150,6 +150,22 @@
#endif
.endm
+/*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+ * return thunk isn't mapped into the userspace tables (then again, AMD
+ * typically has NO_MELTDOWN).
+ *
+ * Doesn't clobber any registers but does require a stable stack.
+ *
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+ * where we have a stack but before any RET instruction.
+ */
+.macro UNTRAIN_RET
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET
+#endif
+.endm
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_NOSPEC_ALTERNATIVE \
@@ -168,6 +184,7 @@
#ifdef CONFIG_X86_64
extern void __x86_return_thunk(void);
+extern void zen_untrain_ret(void);
/*
* Inline asm uses the %V modifier which is only in newer GCC
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -127,7 +127,7 @@ SECTIONS
#ifdef CONFIG_RETPOLINE
__indirect_thunk_start = .;
- *(.text.__x86.indirect_thunk)
+ *(.text.__x86.*)
__indirect_thunk_end = .;
#endif
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -52,6 +52,7 @@
#include <asm/microcode.h>
#include <asm/spec-ctrl.h>
#include <asm/cpu_device_id.h>
+#include <asm/nospec-branch.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -5682,6 +5683,9 @@ static void svm_vcpu_run(struct kvm_vcpu
"mov %%r14, %c[r14](%[svm]) \n\t"
"mov %%r15, %c[r15](%[svm]) \n\t"
#endif
+
+ ALTERNATIVE("", "call zen_untrain_ret", X86_FEATURE_UNRET)
+
/*
* Clear host registers marked as clobbered to prevent
* speculative use.
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -53,11 +53,71 @@ GENERATE_THUNK(r15)
* This function name is magical and is used by -mfunction-return=thunk-extern
* for the compiler to generate JMPs to it.
*/
+
+ .section .text.__x86.return_thunk
+
+/*
+ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
+ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ * alignment within the BTB.
+ * 2) The instruction at zen_untrain_ret must contain, and not
+ * end with, the 0xc3 byte of the RET.
+ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
+ * from re-poisioning the BTB prediction.
+ */
+ .align 64
+ .skip 63, 0xcc
+.globl zen_untrain_ret;
+zen_untrain_ret:
+
+ /*
+ * As executed from zen_untrain_ret, this is:
+ *
+ * TEST $0xcc, %bl
+ * LFENCE
+ * JMP __x86_return_thunk
+ *
+ * Executing the TEST instruction has a side effect of evicting any BTB
+ * prediction (potentially attacker controlled) attached to the RET, as
+ * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+ */
+ .byte 0xf6
+
+ /*
+ * As executed from __x86_return_thunk, this is a plain RET.
+ *
+ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
+ *
+ * We subsequently jump backwards and architecturally execute the RET.
+ * This creates a correct BTB prediction (type=ret), but in the
+ * meantime we suffer Straight Line Speculation (because the type was
+ * no branch) which is halted by the INT3.
+ *
+ * With SMT enabled and STIBP active, a sibling thread cannot poison
+ * RET's prediction to a type of its choice, but can evict the
+ * prediction due to competitive sharing. If the prediction is
+ * evicted, __x86_return_thunk will suffer Straight Line Speculation
+ * which will be contained safely by the INT3.
+ */
+
ENTRY(__x86_return_thunk)
ret
int3
-ENDPROC(__x86_return_thunk)
-__EXPORT_THUNK(__x86_return_thunk)
+ /*
+ * Ensure the TEST decoding / BTB invalidation is complete.
+ */
+ lfence
+
+ /*
+ * Jump back and execute the RET in the middle of the TEST instruction.
+ * INT3 is for SLS protection.
+ */
+ jmp __x86_return_thunk
+ int3
+ENDPROC(zen_untrain_ret)
+__EXPORT_THUNK(zen_untrain_ret)
+
+EXPORT_SYMBOL(__x86_return_thunk)
#endif /* CONFIG_RETPOLINE */