Blob Blame History Raw
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Wed, 3 Apr 2024 15:15:27 -0700
Subject: x86/bhi: Add support for clearing branch history at syscall entry
Patch-mainline: Not yet, embargoed patch
References: bsc#1217339 CVE-2024-2201

Branch History Injection (BHI) attacks may allow a malicious
application to influence indirect branch prediction in kernel by
poisoning the branch history. eIBRS isolates indirect branch targets
in ring0.  The BHB can still influence the choice of indirect branch
predictor entry, and although branch predictor entries are isolated
between modes when eIBRS is enabled, the BHB itself is not isolated
between modes.

Alder Lake and new processors supports a hardware control BHI_DIS_S to
mitigate BHI.  For older processors Intel has released a software
sequence to clear the branch history on parts that don't support
BHI_DIS_S. Add support to execute the software sequence at syscall
entry and VMexit to overwrite the branch history.

For now, branch history is not cleared at interrupt entry, as
malicious applications are not believed to have sufficient control
over the registers, since previous register state is cleared at
interrupt entry. Researchers continue to poke at this area and it may
become necessary to clear at interrupt entry as well in the future.

This mitigation is only defined here. It is enabled later.

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Co-developed-by: Daniel Sneddon <daniel.sneddon@linux.intel.com>
Signed-off-by: Daniel Sneddon <daniel.sneddon@linux.intel.com>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Acked-by: Nikolay Borisov <nik.borisov@suse.com>
---
 arch/x86/entry/entry_64.S            |   60 +++++++++++++++++++++++++++++++++++
 arch/x86/entry/entry_64_compat.S     |    3 +
 arch/x86/include/asm/cpufeature.h    |    1 
 arch/x86/include/asm/cpufeatures.h   |    5 ++
 arch/x86/include/asm/nospec-branch.h |   23 +++++++++++++
 arch/x86/kvm/vmx.c                   |    2 +
 6 files changed, 93 insertions(+), 1 deletion(-)

--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -234,6 +234,7 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
 	/* clobbers %rax, make sure it is after saving the syscall nr */
 	IBRS_ENTER
 	UNTRAIN_RET
+	CLEAR_BRANCH_HISTORY
 
 	call	do_syscall_64		/* returns with IRQs disabled */
 
@@ -1730,3 +1731,62 @@ ENTRY(rewind_stack_do_exit)
 
 	call	do_exit
 END(rewind_stack_do_exit)
+
+/*
+ * This sequence executes branches in order to remove user branch information
+ * from the branch history tracker in the Branch Predictor, therefore removing
+ * user influence on subsequent BTB lookups.
+ *
+ * It should be used on parts prior to Alder Lake. Newer parts should use the
+ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
+ * virtualized on newer hardware the VMM should protect against BHI attacks by
+ * setting BHI_DIS_S for the guests.
+ *
+ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
+ * and not clearing the branch history. The call tree looks like:
+ *
+ * call 1
+ *    call 2
+ *      call 2
+ *        call 2
+ *          call 2
+ * 	      call 2
+ * 	      ret
+ * 	    ret
+ *        ret
+ *      ret
+ *    ret
+ * ret
+ *
+ * This means that the stack is non-constant and ORC can't unwind it with %rsp
+ * alone.  Therefore we unconditionally set up the frame pointer, which allows
+ * ORC to unwind properly.
+ *
+ * The alignment is for performance and not for safety, and may be safely
+ * refactored in the future if needed.
+ */
+ENTRY(clear_bhb_loop)
+	push	%rbp
+	mov	%rsp, %rbp
+	movl	$5, %ecx
+	ANNOTATE_INTRA_FUNCTION_CALL
+	call	1f
+	jmp	5f
+	.align 64, 0xcc
+	ANNOTATE_INTRA_FUNCTION_CALL
+1:	call	2f
+	RET
+	.align 64, 0xcc
+2:	movl	$5, %eax
+3:	jmp	4f
+	nop
+4:	sub	$1, %eax
+	jnz	3b
+	sub	$1, %ecx
+	jnz	1b
+	RET
+5:	lfence
+	pop	%rbp
+	RET
+ENDPROC(clear_bhb_loop)
+EXPORT_SYMBOL_GPL(clear_bhb_loop)
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -109,6 +109,7 @@ ENTRY(entry_SYSENTER_compat)
 
 	IBRS_ENTER
 	UNTRAIN_RET
+	CLEAR_BRANCH_HISTORY
 
 	/*
 	 * SYSENTER doesn't filter flags, so we need to clear NT and AC
@@ -256,6 +257,7 @@ GLOBAL(entry_SYSCALL_compat_after_hwfram
 
 	IBRS_ENTER
 	UNTRAIN_RET
+	CLEAR_BRANCH_HISTORY
 
 	movq	%rsp, %rdi
 	call	do_fast_syscall_32
@@ -427,6 +429,7 @@ ENTRY(entry_INT80_compat)
 
 	IBRS_ENTER
 	UNTRAIN_RET
+	CLEAR_BRANCH_HISTORY
 
 	movq	%rsp, %rdi
 	call	do_int80_syscall_32
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -31,6 +31,7 @@ enum cpuid_leafs
 	CPUID_7_EDX,
 	CPUID_8000_0021_EAX,
 	CPUID_LNX_5,
+	CPUID_LNX_6,
 };
 
 #ifdef CONFIG_X86_FEATURE_NAMES
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS	21      /* N 32-bit words worth of info */
+#define NCAPINTS	23      /* N 32-bit words worth of info */
 #define NBUGINTS	1	/* N 32-bit bug flags */
 
 /*
@@ -385,6 +385,9 @@
 #define X86_FEATURE_IBPB_BRTYPE		(20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
 #define X86_FEATURE_SRSO_NO		(20*32+29) /* "" CPU is not affected by SRSO */
 
+/* Linux defined features */
+#define X86_FEATURE_CLEAR_BHB_LOOP	(22*32+ 0) /* "" Clear branch history at syscall entry using SW loop */
+
 /*
  * BUG word(s)
  */
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -216,8 +216,27 @@
 .Lskip_verw_\@:
 .endm
 
+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+	ALTERNATIVE "jmp .Lskip_bhb_loop_\@", "", X86_FEATURE_CLEAR_BHB_LOOP
+	call clear_bhb_loop
+.Lskip_bhb_loop_\@:
+.endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#endif
+
 #else /* __ASSEMBLY__ */
 
+#ifdef CONFIG_X86_64
+#define CLEAR_BRANCH_HISTORY \
+	ALTERNATIVE("jmp 1f\t\n", "", X86_FEATURE_CLEAR_BHB_LOOP) \
+	"call clear_bhb_loop\t\n" \
+	"1:\t\n"
+#else
+#define CLEAR_BRANCH_HISTORY
+#endif
+
 #define CLEAR_CPU_BUFFERS \
         ALTERNATIVE("jmp 1f\t\n", "", X86_FEATURE_CLEAR_CPU_BUF) \
         "verw " _ASM_RIP(mds_verw_sel) " \t\n"                             \
@@ -256,6 +275,10 @@ extern void srso_untrain_ret(void);
 extern void srso_alias_untrain_ret(void);
 extern void entry_ibpb(void);
 
+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
 /*
  * Inline asm uses the %V modifier which is only in newer GCC
  * which is ensured when CONFIG_RETPOLINE is defined.
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10024,6 +10024,8 @@ static void __noclone vmx_vcpu_run(struc
 		"mov %%cr2, %%" _ASM_AX "   \n\t"
 		"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
 
+		CLEAR_BRANCH_HISTORY
+
 		"xor %%eax, %%eax \n\t"
 		"xor %%ebx, %%ebx \n\t"
 		"xor %%ecx, %%ecx \n\t"