Blob Blame History Raw
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 14 Jun 2022 23:15:48 +0200
Subject: x86: Add magic AMD return-thunk
Git-commit: a149180fbcf336e97ce4eb2cdc13672727feb94d
Patch-mainline: v5.19-rc4
References: bsc#1199657 CVE-2022-29900 CVE-2022-29901

Note: needs to be in a section distinct from Retpolines such that the
Retpoline RET substitution cannot possibly use immediate jumps.

ORC unwinding for zen_untrain_ret() and __x86_return_thunk() is a
little tricky but works due to the fact that zen_untrain_ret() doesn't
have any stack ops and as such will emit a single ORC entry at the
start (+0x3f).

Meanwhile, unwinding an IP, including the __x86_return_thunk() one
(+0x40) will search for the largest ORC entry smaller or equal to the
IP, these will find the one ORC entry (+0x3f) and all works.

  [ Alexandre: SVM part. ]
  [ bp: Build fix, massages. ]

Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/entry/entry_64.S            |   10 +++++
 arch/x86/entry/entry_64_compat.S     |    5 ++
 arch/x86/include/asm/cpufeatures.h   |    1 
 arch/x86/include/asm/nospec-branch.h |   17 +++++++++
 arch/x86/kernel/vmlinux.lds.S        |    2 -
 arch/x86/kvm/svm.c                   |    4 ++
 arch/x86/lib/retpoline.S             |   63 ++++++++++++++++++++++++++++++++++-
 7 files changed, 100 insertions(+), 2 deletions(-)

--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -13,6 +13,7 @@
 #include <asm/irqflags.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
+#include <asm/nospec-branch.h>
 #include <linux/linkage.h>
 #include <linux/err.h>
 
@@ -105,6 +106,8 @@ ENTRY(entry_SYSENTER_compat)
 	xorl	%r15d, %r15d		/* nospec   r15 */
 	cld
 
+	UNTRAIN_RET
+
 	/*
 	 * SYSENTER doesn't filter flags, so we need to clear NT and AC
 	 * ourselves.  To save a few cycles, we can check whether
@@ -416,6 +419,8 @@ ENTRY(entry_INT80_compat)
 	 */
 	TRACE_IRQS_OFF
 
+	UNTRAIN_RET
+
 	movq	%rsp, %rdi
 	call	do_int80_syscall_32
 .Lsyscall_32_done:
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -229,6 +229,9 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
 
 	/* IRQs are off. */
 	movq	%rsp, %rdi
+
+	UNTRAIN_RET
+
 	call	do_syscall_64		/* returns with IRQs disabled */
 
 	TRACE_IRQS_IRETQ		/* we're about to change IF */
@@ -709,6 +712,7 @@ native_irq_return_ldt:
 	pushq	%rdi				/* Stash user RDI */
 	SWAPGS					/* to kernel GS */
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi	/* to kernel CR3 */
+	UNTRAIN_RET
 
 	movq	PER_CPU_VAR(espfix_waddr), %rdi
 	movq	%rax, (0*8)(%rdi)		/* user RAX */
@@ -856,6 +860,9 @@ ENTRY(switch_to_thread_stack)
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
 	movq	%rsp, %rdi
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+	UNTRAIN_RET
+
 	UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
 
 	pushq	7*8(%rdi)		/* regs->ss */
@@ -1228,6 +1235,7 @@ ENTRY(error_entry)
 	FENCE_SWAPGS_USER_ENTRY
 	/* We have user CR3.  Change to kernel CR3. */
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+	UNTRAIN_RET
 
 .Lerror_entry_from_usermode_after_swapgs:
 	/* Put us onto the real thread stack. */
@@ -1277,6 +1285,7 @@ ENTRY(error_entry)
 	SWAPGS
 	FENCE_SWAPGS_USER_ENTRY
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+	UNTRAIN_RET
 	jmp .Lerror_entry_done
 
 .Lbstep_iret:
@@ -1368,6 +1377,7 @@ ENTRY(nmi)
 
 	testb	$3, CS-RIP+8(%rsp)
 	jz	.Lnmi_from_kernel
+	UNTRAIN_RET
 
 	/*
 	 * NMI from user mode.  We need to run on the thread stack, but we
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -287,6 +287,7 @@
 #define X86_FEATURE_RETPOLINE		(11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_LFENCE	(11*32+13) /* "" Use LFENCE for Spectre variant 2 */
 #define X86_FEATURE_RETHUNK		(11*32+14) /* "" Use REturn THUNK */
+#define X86_FEATURE_UNRET		(11*32+15) /* "" AMD BTB untrain return */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -150,6 +150,22 @@
 #endif
 .endm
 
+/*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+ * return thunk isn't mapped into the userspace tables (then again, AMD
+ * typically has NO_MELTDOWN).
+ *
+ * Doesn't clobber any registers but does require a stable stack.
+ *
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+ * where we have a stack but before any RET instruction.
+ */
+.macro UNTRAIN_RET
+#ifdef CONFIG_RETPOLINE
+	ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET
+#endif
+.endm
+
 #else /* __ASSEMBLY__ */
 
 #define ANNOTATE_NOSPEC_ALTERNATIVE				\
@@ -168,6 +184,7 @@
 #ifdef CONFIG_X86_64
 
 extern void __x86_return_thunk(void);
+extern void zen_untrain_ret(void);
 
 /*
  * Inline asm uses the %V modifier which is only in newer GCC
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -127,7 +127,7 @@ SECTIONS
 
 #ifdef CONFIG_RETPOLINE
 		__indirect_thunk_start = .;
-		*(.text.__x86.indirect_thunk)
+		*(.text.__x86.*)
 		__indirect_thunk_end = .;
 #endif
 
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -52,6 +52,7 @@
 #include <asm/microcode.h>
 #include <asm/spec-ctrl.h>
 #include <asm/cpu_device_id.h>
+#include <asm/nospec-branch.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
@@ -5682,6 +5683,9 @@ static void svm_vcpu_run(struct kvm_vcpu
 		"mov %%r14, %c[r14](%[svm]) \n\t"
 		"mov %%r15, %c[r15](%[svm]) \n\t"
 #endif
+
+		ALTERNATIVE("", "call zen_untrain_ret", X86_FEATURE_UNRET)
+
 		/*
 		* Clear host registers marked as clobbered to prevent
 		* speculative use.
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -50,9 +50,70 @@ GENERATE_THUNK(r15)
  * This function name is magical and is used by -mfunction-return=thunk-extern
  * for the compiler to generate JMPs to it.
  */
+
+	.section .text.__x86.return_thunk
+
+/*
+ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
+ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ *    alignment within the BTB.
+ * 2) The instruction at zen_untrain_ret must contain, and not
+ *    end with, the 0xc3 byte of the RET.
+ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
+ *    from re-poisioning the BTB prediction.
+ */
+	.align 64
+	.skip 63, 0xcc
+.globl zen_untrain_ret;
+zen_untrain_ret:
+
+	/*
+	 * As executed from zen_untrain_ret, this is:
+	 *
+	 *   TEST $0xcc, %bl
+	 *   LFENCE
+	 *   JMP __x86_return_thunk
+	 *
+	 * Executing the TEST instruction has a side effect of evicting any BTB
+	 * prediction (potentially attacker controlled) attached to the RET, as
+	 * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+	 */
+	.byte	0xf6
+
+	/*
+	 * As executed from __x86_return_thunk, this is a plain RET.
+	 *
+	 * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
+	 *
+	 * We subsequently jump backwards and architecturally execute the RET.
+	 * This creates a correct BTB prediction (type=ret), but in the
+	 * meantime we suffer Straight Line Speculation (because the type was
+	 * no branch) which is halted by the INT3.
+	 *
+	 * With SMT enabled and STIBP active, a sibling thread cannot poison
+	 * RET's prediction to a type of its choice, but can evict the
+	 * prediction due to competitive sharing. If the prediction is
+	 * evicted, __x86_return_thunk will suffer Straight Line Speculation
+	 * which will be contained safely by the INT3.
+	 */
+
 ENTRY(__x86_return_thunk)
 	ret
 	int3
 ENDPROC(__x86_return_thunk)
 
-__EXPORT_THUNK(__x86_return_thunk)
+	/*
+	 * Ensure the TEST decoding / BTB invalidation is complete.
+	 */
+	lfence
+
+	/*
+	 * Jump back and execute the RET in the middle of the TEST instruction.
+	 * INT3 is for SLS protection.
+	 */
+	jmp __x86_return_thunk
+	int3
+ENDPROC(zen_untrain_ret)
+__EXPORT_THUNK(zen_untrain_ret)
+
+EXPORT_SYMBOL(__x86_return_thunk)