Borislav Petkov acb666
From: Thomas Gleixner <tglx@linutronix.de>
Borislav Petkov acb666
Date: Mon, 18 Feb 2019 23:42:51 +0100
Borislav Petkov acb666
Subject: x86/speculation/mds: Clear CPU buffers on exit to user
Borislav Petkov acb666
Git-commit: 04dcbdb8057827b043b3c71aa397c4c63e67d086
Borislav Petkov acb666
Patch-mainline: v5.1-rc1
Borislav Petkov 6576c0
References: bsc#1111331, CVE-2018-12126, CVE-2018-12127, CVE-2018-12130, CVE-2019-11091
Borislav Petkov acb666
Borislav Petkov acb666
Add a static key which controls the invocation of the CPU buffer clear
Borislav Petkov acb666
mechanism on exit to user space and add the call into
Borislav Petkov acb666
prepare_exit_to_usermode() and do_nmi() right before actually returning.
Borislav Petkov acb666
Borislav Petkov acb666
Add documentation which kernel to user space transition this covers and
Borislav Petkov acb666
explain why some corner cases are not mitigated.
Borislav Petkov acb666
Borislav Petkov acb666
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Borislav Petkov acb666
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Borislav Petkov acb666
Reviewed-by: Borislav Petkov <bp@suse.de>
Borislav Petkov acb666
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Borislav Petkov acb666
Reviewed-by: Jon Masters <jcm@redhat.com>
Borislav Petkov acb666
Tested-by: Jon Masters <jcm@redhat.com>
Borislav Petkov acb666
Acked-by: Borislav Petkov <bp@suse.de>
Borislav Petkov acb666
---
Borislav Petkov acb666
 Documentation/x86/mds.rst            |   52 +++++++++++++++++++++++++++++++++++
Borislav Petkov acb666
 arch/x86/entry/common.c              |    3 ++
Borislav Petkov acb666
 arch/x86/include/asm/nospec-branch.h |   13 ++++++++
Borislav Petkov acb666
 arch/x86/kernel/cpu/bugs.c           |    3 ++
Borislav Petkov acb666
 arch/x86/kernel/nmi.c                |    4 ++
Borislav Petkov acb666
 arch/x86/kernel/traps.c              |    8 +++++
Borislav Petkov acb666
 6 files changed, 83 insertions(+)
Borislav Petkov acb666
Borislav Petkov acb666
--- a/arch/x86/entry/common.c
Borislav Petkov acb666
+++ b/arch/x86/entry/common.c
Borislav Petkov acb666
@@ -30,6 +30,7 @@
Borislav Petkov acb666
 #include <asm/vdso.h>
Borislav Petkov acb666
 #include <linux/uaccess.h>
Borislav Petkov acb666
 #include <asm/cpufeature.h>
Borislav Petkov acb666
+#include <asm/nospec-branch.h>
Borislav Petkov acb666
 
Borislav Petkov acb666
 #define CREATE_TRACE_POINTS
Borislav Petkov acb666
 #include <trace/events/syscalls.h>
Borislav Petkov acb666
@@ -208,6 +209,8 @@ __visible inline void prepare_exit_to_us
Borislav Petkov acb666
 #endif
Borislav Petkov acb666
 
Borislav Petkov acb666
 	user_enter_irqoff();
Borislav Petkov acb666
+
Borislav Petkov acb666
+	mds_user_clear_cpu_buffers();
Borislav Petkov acb666
 }
Borislav Petkov acb666
 
Borislav Petkov acb666
 #define SYSCALL_EXIT_WORK_FLAGS				\
Borislav Petkov acb666
--- a/arch/x86/include/asm/nospec-branch.h
Borislav Petkov acb666
+++ b/arch/x86/include/asm/nospec-branch.h
Borislav Petkov acb666
@@ -318,6 +318,8 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_
Borislav Petkov acb666
 DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
Borislav Petkov acb666
 DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
Borislav Petkov acb666
 
Borislav Petkov acb666
+DECLARE_STATIC_KEY_FALSE(mds_user_clear);
Borislav Petkov acb666
+
Borislav Petkov acb666
 #include <asm/segment.h>
Borislav Petkov acb666
 
Borislav Petkov acb666
 /**
Borislav Petkov acb666
@@ -343,6 +345,17 @@ static inline void mds_clear_cpu_buffers
Borislav Petkov acb666
 	asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
Borislav Petkov acb666
 }
Borislav Petkov acb666
 
Borislav Petkov acb666
+/**
Borislav Petkov acb666
+ * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
Borislav Petkov acb666
+ *
Borislav Petkov acb666
+ * Clear CPU buffers if the corresponding static key is enabled
Borislav Petkov acb666
+ */
Borislav Petkov acb666
+static inline void mds_user_clear_cpu_buffers(void)
Borislav Petkov acb666
+{
Borislav Petkov acb666
+	if (static_branch_likely(&mds_user_clear))
Borislav Petkov acb666
+		mds_clear_cpu_buffers();
Borislav Petkov acb666
+}
Borislav Petkov acb666
+
Borislav Petkov acb666
 #endif /* __ASSEMBLY__ */
Borislav Petkov acb666
 
Borislav Petkov acb666
 /*
Borislav Petkov acb666
--- a/arch/x86/kernel/cpu/bugs.c
Borislav Petkov acb666
+++ b/arch/x86/kernel/cpu/bugs.c
Borislav Petkov acb666
@@ -60,6 +60,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_i
Borislav Petkov acb666
 /* Control unconditional IBPB in switch_mm() */
Borislav Petkov acb666
 DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
Borislav Petkov acb666
 
Borislav Petkov acb666
+/* Control MDS CPU buffer clear before returning to user space */
Borislav Petkov acb666
+DEFINE_STATIC_KEY_FALSE(mds_user_clear);
Borislav Petkov acb666
+
Borislav Petkov acb666
 void __init check_bugs(void)
Borislav Petkov acb666
 {
Borislav Petkov acb666
 	identify_boot_cpu();
Borislav Petkov acb666
--- a/arch/x86/kernel/nmi.c
Borislav Petkov acb666
+++ b/arch/x86/kernel/nmi.c
Borislav Petkov acb666
@@ -34,6 +34,7 @@
Borislav Petkov acb666
 #include <asm/x86_init.h>
Borislav Petkov acb666
 #include <asm/reboot.h>
Borislav Petkov acb666
 #include <asm/cache.h>
Borislav Petkov acb666
+#include <asm/nospec-branch.h>
Borislav Petkov acb666
 
Borislav Petkov acb666
 #define CREATE_TRACE_POINTS
Borislav Petkov acb666
 #include <trace/events/nmi.h>
Borislav Petkov acb666
@@ -533,6 +534,9 @@ nmi_restart:
Borislav Petkov acb666
 		write_cr2(this_cpu_read(nmi_cr2));
Borislav Petkov acb666
 	if (this_cpu_dec_return(nmi_state))
Borislav Petkov acb666
 		goto nmi_restart;
Borislav Petkov acb666
+
Borislav Petkov acb666
+	if (user_mode(regs))
Borislav Petkov acb666
+		mds_user_clear_cpu_buffers();
Borislav Petkov acb666
 }
Borislav Petkov acb666
 NOKPROBE_SYMBOL(do_nmi);
Borislav Petkov acb666
 
Borislav Petkov acb666
--- a/arch/x86/kernel/traps.c
Borislav Petkov acb666
+++ b/arch/x86/kernel/traps.c
Borislav Petkov acb666
@@ -59,6 +59,7 @@
Borislav Petkov acb666
 #include <asm/alternative.h>
Borislav Petkov acb666
 #include <asm/fpu/xstate.h>
Borislav Petkov acb666
 #include <asm/trace/mpx.h>
Borislav Petkov acb666
+#include <asm/nospec-branch.h>
Borislav Petkov acb666
 #include <asm/mpx.h>
Borislav Petkov acb666
 #include <asm/vm86.h>
Borislav Petkov acb666
 #include <asm/umip.h>
Borislav Petkov acb666
@@ -394,6 +395,13 @@ dotraplinkage void do_double_fault(struc
Borislav Petkov acb666
 		regs->ip = (unsigned long)general_protection;
Borislav Petkov acb666
 		regs->sp = (unsigned long)&gpregs->orig_ax;
Borislav Petkov acb666
 
Borislav Petkov acb666
+		/*
Borislav Petkov acb666
+		 * This situation can be triggered by userspace via
Borislav Petkov acb666
+		 * modify_ldt(2) and the return does not take the regular
Borislav Petkov acb666
+		 * user space exit, so a CPU buffer clear is required when
Borislav Petkov acb666
+		 * MDS mitigation is enabled.
Borislav Petkov acb666
+		 */
Borislav Petkov acb666
+		mds_user_clear_cpu_buffers();
Borislav Petkov acb666
 		return;
Borislav Petkov acb666
 	}
Borislav Petkov acb666
 #endif
Borislav Petkov acb666
--- a/Documentation/x86/mds.rst
Borislav Petkov acb666
+++ b/Documentation/x86/mds.rst
Borislav Petkov acb666
@@ -97,3 +97,55 @@ According to current knowledge additiona
Borislav Petkov acb666
 itself are not required because the necessary gadgets to expose the leaked
Borislav Petkov acb666
 data cannot be controlled in a way which allows exploitation from malicious
Borislav Petkov acb666
 user space or VM guests.
Borislav Petkov acb666
+
Borislav Petkov acb666
+Mitigation points
Borislav Petkov acb666
+-----------------
Borislav Petkov acb666
+
Borislav Petkov acb666
+1. Return to user space
Borislav Petkov acb666
+^^^^^^^^^^^^^^^^^^^^^^^
Borislav Petkov acb666
+
Borislav Petkov acb666
+   When transitioning from kernel to user space the CPU buffers are flushed
Borislav Petkov acb666
+   on affected CPUs when the mitigation is not disabled on the kernel
Borislav Petkov acb666
+   command line. The migitation is enabled through the static key
Borislav Petkov acb666
+   mds_user_clear.
Borislav Petkov acb666
+
Borislav Petkov acb666
+   The mitigation is invoked in prepare_exit_to_usermode() which covers
Borislav Petkov acb666
+   most of the kernel to user space transitions. There are a few exceptions
Borislav Petkov acb666
+   which are not invoking prepare_exit_to_usermode() on return to user
Borislav Petkov acb666
+   space. These exceptions use the paranoid exit code.
Borislav Petkov acb666
+
Borislav Petkov acb666
+   - Non Maskable Interrupt (NMI):
Borislav Petkov acb666
+
Borislav Petkov acb666
+     Access to sensible data like keys, credentials in the NMI context is
Borislav Petkov acb666
+     mostly theoretical: The CPU can do prefetching or execute a
Borislav Petkov acb666
+     misspeculated code path and thereby fetching data which might end up
Borislav Petkov acb666
+     leaking through a buffer.
Borislav Petkov acb666
+
Borislav Petkov acb666
+     But for mounting other attacks the kernel stack address of the task is
Borislav Petkov acb666
+     already valuable information. So in full mitigation mode, the NMI is
Borislav Petkov acb666
+     mitigated on the return from do_nmi() to provide almost complete
Borislav Petkov acb666
+     coverage.
Borislav Petkov acb666
+
Borislav Petkov acb666
+   - Double fault (#DF):
Borislav Petkov acb666
+
Borislav Petkov acb666
+     A double fault is usually fatal, but the ESPFIX workaround, which can
Borislav Petkov acb666
+     be triggered from user space through modify_ldt(2) is a recoverable
Borislav Petkov acb666
+     double fault. #DF uses the paranoid exit path, so explicit mitigation
Borislav Petkov acb666
+     in the double fault handler is required.
Borislav Petkov acb666
+
Borislav Petkov acb666
+   - Machine Check Exception (#MC):
Borislav Petkov acb666
+
Borislav Petkov acb666
+     Another corner case is a #MC which hits between the CPU buffer clear
Borislav Petkov acb666
+     invocation and the actual return to user. As this still is in kernel
Borislav Petkov acb666
+     space it takes the paranoid exit path which does not clear the CPU
Borislav Petkov acb666
+     buffers. So the #MC handler repopulates the buffers to some
Borislav Petkov acb666
+     extent. Machine checks are not reliably controllable and the window is
Borislav Petkov acb666
+     extremly small so mitigation would just tick a checkbox that this
Borislav Petkov acb666
+     theoretical corner case is covered. To keep the amount of special
Borislav Petkov acb666
+     cases small, ignore #MC.
Borislav Petkov acb666
+
Borislav Petkov acb666
+   - Debug Exception (#DB):
Borislav Petkov acb666
+
Borislav Petkov acb666
+     This takes the paranoid exit path only when the INT1 breakpoint is in
Borislav Petkov acb666
+     kernel space. #DB on a user space address takes the regular exit path,
Borislav Petkov acb666
+     so no extra mitigation required.