From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 14 Jun 2022 23:15:48 +0200
Subject: x86: Add magic AMD return-thunk
Git-commit: a149180fbcf336e97ce4eb2cdc13672727feb94d
Patch-mainline: v5.19-rc4
References: bsc#1199657 CVE-2022-29900 CVE-2022-29901
Note: needs to be in a section distinct from Retpolines such that the
Retpoline RET substitution cannot possibly use immediate jumps.
ORC unwinding for zen_untrain_ret() and __x86_return_thunk() is a
little tricky but works due to the fact that zen_untrain_ret() doesn't
have any stack ops and as such will emit a single ORC entry at the
start (+0x3f).
Meanwhile, unwinding an IP, including the __x86_return_thunk() one
(+0x40) will search for the largest ORC entry smaller or equal to the
IP, these will find the one ORC entry (+0x3f) and all works.
[ Alexandre: SVM part. ]
[ bp: Build fix, massages. ]
Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
---
arch/x86/entry/entry_64.S | 5 ++
arch/x86/entry/entry_64_compat.S | 3 +
arch/x86/include/asm/cpufeatures.h | 1
arch/x86/include/asm/nospec-branch.h | 17 +++++++++
arch/x86/kernel/vmlinux.lds.S | 2 -
arch/x86/kvm/svm.c | 4 ++
arch/x86/lib/retpoline.S | 61 ++++++++++++++++++++++++++++++++++-
7 files changed, 91 insertions(+), 2 deletions(-)
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -14,6 +14,7 @@
#include <asm/irqflags.h>
#include <asm/asm.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
#include <linux/linkage.h>
#include <linux/err.h>
@@ -106,6 +107,8 @@ ENTRY(entry_SYSENTER_compat)
xorl %r15d, %r15d /* nospec r15 */
cld
+ UNTRAIN_RET
+
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -155,6 +155,7 @@ ENTRY(entry_SYSCALL_64)
movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ UNTRAIN_RET
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
@@ -746,6 +747,7 @@ native_irq_return_ldt:
pushq %rdi /* Stash user RDI */
SWAPGS /* to kernel GS */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
+ UNTRAIN_RET
movq PER_CPU_VAR(espfix_waddr), %rdi
movq %rax, (0*8)(%rdi) /* user RAX */
@@ -1244,6 +1246,7 @@ ENTRY(paranoid_entry)
* to kernel code, but with a user CR3 value.
*/
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+ UNTRAIN_RET
/*
* The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
@@ -1305,6 +1308,7 @@ ENTRY(error_entry)
FENCE_SWAPGS_USER_ENTRY
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ UNTRAIN_RET
.Lerror_entry_from_usermode_after_swapgs:
/* Put us onto the real thread stack. */
@@ -1455,6 +1459,7 @@ ENTRY(nmi)
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
UNWIND_HINT_IRET_REGS base=%rdx offset=8
+ UNTRAIN_RET
pushq 5*8(%rdx) /* pt_regs->ss */
pushq 4*8(%rdx) /* pt_regs->rsp */
pushq 3*8(%rdx) /* pt_regs->flags */
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -288,6 +288,7 @@
#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -146,6 +146,22 @@
#endif
.endm
+/*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+ * return thunk isn't mapped into the userspace tables (then again, AMD
+ * typically has NO_MELTDOWN).
+ *
+ * Doesn't clobber any registers but does require a stable stack.
+ *
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+ * where we have a stack but before any RET instruction.
+ */
+.macro UNTRAIN_RET
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET
+#endif
+.endm
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
@@ -158,6 +174,7 @@
#ifdef CONFIG_X86_64
extern void __x86_return_thunk(void);
+extern void zen_untrain_ret(void);
/*
* Inline asm uses the %V modifier which is only in newer GCC
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -138,7 +138,7 @@ SECTIONS
#ifdef CONFIG_RETPOLINE
__indirect_thunk_start = .;
- *(.text.__x86.indirect_thunk)
+ *(.text.__x86.*)
__indirect_thunk_end = .;
#endif
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -48,6 +48,7 @@
#include <asm/irq_remapping.h>
#include <asm/spec-ctrl.h>
#include <asm/cpu_device_id.h>
+#include <asm/nospec-branch.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -5784,6 +5785,9 @@ static void svm_vcpu_run(struct kvm_vcpu
"mov %%r13, %c[r13](%[svm]) \n\t"
"mov %%r14, %c[r14](%[svm]) \n\t"
"mov %%r15, %c[r15](%[svm]) \n\t"
+
+ ALTERNATIVE("", "call zen_untrain_ret", X86_FEATURE_UNRET)
+
/*
* Clear host registers marked as clobbered to prevent
* speculative use.
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -51,9 +51,68 @@ GENERATE_THUNK(r15)
* This function name is magical and is used by -mfunction-return=thunk-extern
* for the compiler to generate JMPs to it.
*/
+ .section .text.__x86.return_thunk
+
+/*
+ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
+ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ * alignment within the BTB.
+ * 2) The instruction at zen_untrain_ret must contain, and not
+ * end with, the 0xc3 byte of the RET.
+ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
+ * from re-poisioning the BTB prediction.
+ */
+ .align 64
+ .skip 63, 0xcc
+.globl zen_untrain_ret;
+zen_untrain_ret:
+
+ /*
+ * As executed from zen_untrain_ret, this is:
+ *
+ * TEST $0xcc, %bl
+ * LFENCE
+ * JMP __x86_return_thunk
+ *
+ * Executing the TEST instruction has a side effect of evicting any BTB
+ * prediction (potentially attacker controlled) attached to the RET, as
+ * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+ */
+ .byte 0xf6
+
+ /*
+ * As executed from __x86_return_thunk, this is a plain RET.
+ *
+ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
+ *
+ * We subsequently jump backwards and architecturally execute the RET.
+ * This creates a correct BTB prediction (type=ret), but in the
+ * meantime we suffer Straight Line Speculation (because the type was
+ * no branch) which is halted by the INT3.
+ *
+ * With SMT enabled and STIBP active, a sibling thread cannot poison
+ * RET's prediction to a type of its choice, but can evict the
+ * prediction due to competitive sharing. If the prediction is
+ * evicted, __x86_return_thunk will suffer Straight Line Speculation
+ * which will be contained safely by the INT3.
+ */
ENTRY(__x86_return_thunk)
ret
int3
END(__x86_return_thunk)
-__EXPORT_THUNK(__x86_return_thunk)
+ /*
+ * Ensure the TEST decoding / BTB invalidation is complete.
+ */
+ lfence
+
+ /*
+ * Jump back and execute the RET in the middle of the TEST instruction.
+ * INT3 is for SLS protection.
+ */
+ jmp __x86_return_thunk
+ int3
+END(zen_untrain_ret)
+__EXPORT_THUNK(zen_untrain_ret)
+
+EXPORT_SYMBOL(__x86_return_thunk)