From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Mon, 7 Sep 2020 15:15:47 +0200
Subject: x86/sev-es: Add a Runtime #VC Exception Handler
Git-commit: 0786138c78e79343c7b015d77507cbf9d5f15d00
Patch-mainline: v5.10-rc1
References: jsc#SLE-14337
Add the handlers for #VC exceptions invoked at runtime.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-47-joro@8bytes.org
---
arch/x86/entry/entry_64.S | 4
arch/x86/include/asm/traps.h | 3
arch/x86/kernel/idt.c | 11 +
arch/x86/kernel/sev-es.c | 241 ++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 251 insertions(+), 8 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1297,6 +1297,10 @@ idtentry async_page_fault do_async_page_
idtentry machine_check do_mce has_error_code=0 paranoid=1
#endif
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+idtentry_vc X86_TRAP_VC asm_vmm_communication vmm_communication
+#endif
+
/*
* Save all registers in pt_regs. Return GSBASE related information
* in EBX depending on the availability of the FSGSBASE instructions:
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -36,6 +36,9 @@ asmlinkage void alignment_check(void);
#ifdef CONFIG_X86_MCE
asmlinkage void machine_check(void);
#endif /* CONFIG_X86_MCE */
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+asmlinkage void asm_vmm_communication(void);
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
asmlinkage void simd_coprocessor_error(void);
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -172,11 +172,14 @@ gate_desc debug_idt_table[IDT_ENTRIES] _
* cpu_init() when the TSS has been initialized.
*/
static const __initconst struct idt_data ist_idts[] = {
- ISTG(X86_TRAP_DB, debug, IST_INDEX_DB),
- ISTG(X86_TRAP_NMI, nmi, IST_INDEX_NMI),
- ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF),
+ ISTG(X86_TRAP_DB, debug, IST_INDEX_DB),
+ ISTG(X86_TRAP_NMI, nmi, IST_INDEX_NMI),
+ ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF),
#ifdef CONFIG_X86_MCE
- ISTG(X86_TRAP_MC, &machine_check, IST_INDEX_MCE),
+ ISTG(X86_TRAP_MC, &machine_check, IST_INDEX_MCE),
+#endif
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ ISTG(X86_TRAP_VC, asm_vmm_communication, IST_INDEX_VC),
#endif
};
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -7,9 +7,12 @@
* Author: Joerg Roedel <jroedel@suse.de>
*/
+#define pr_fmt(fmt) "SEV-ES: " fmt
+
#include <linux/sched/debug.h> /* For show_regs() */
#include <linux/percpu-defs.h>
#include <linux/mem_encrypt.h>
+#include <linux/lockdep.h>
#include <linux/printk.h>
#include <linux/mm_types.h>
#include <linux/set_memory.h>
@@ -22,8 +25,8 @@
#include <asm/insn-eval.h>
#include <asm/fpu/internal.h>
#include <asm/processor.h>
-#include <asm/trap_pf.h>
-#include <asm/trapnr.h>
+#include <asm/realmode.h>
+#include <asm/traps.h>
#include <asm/svm.h>
/* For early boot hypervisor communication in SEV-ES enabled guests */
@@ -48,11 +51,45 @@ struct sev_es_runtime_data {
* interrupted stack in the #VC entry code.
*/
char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
+
+ /*
+ * Reserve one page per CPU as backup storage for the unencrypted GHCB.
+ * It is needed when an NMI happens while the #VC handler uses the real
+ * GHCB, and the NMI handler itself is causing another #VC exception. In
+ * that case the GHCB content of the first handler needs to be backed up
+ * and restored.
+ */
+ struct ghcb backup_ghcb;
+
+ /*
+ * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
+ * There is no need for it to be atomic, because nothing is written to
+ * the GHCB between the read and the write of ghcb_active. So it is safe
+ * to use it when a nested #VC exception happens before the write.
+ *
+ * This is necessary for example in the #VC->NMI->#VC case when the NMI
+ * happens while the first #VC handler uses the GHCB. When the NMI code
+ * raises a second #VC handler it might overwrite the contents of the
+ * GHCB written by the first handler. To avoid this the content of the
+ * GHCB is saved and restored when the GHCB is detected to be in use
+ * already.
+ */
+ bool ghcb_active;
+ bool backup_ghcb_active;
+};
+
+struct ghcb_state {
+ struct ghcb *ghcb;
};
static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
+/* Needed in vc_early_forward_exception */
+void do_early_exception(struct pt_regs *regs, int trapnr);
+
+void safe_stack_vmm_communication(struct pt_regs *regs, unsigned long error_code);
+
static void __init setup_vc_stacks(int cpu)
{
struct sev_es_runtime_data *data;
@@ -123,8 +160,52 @@ void __sev_es_ist_exit(void)
this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
}
-/* Needed in vc_early_forward_exception */
-void do_early_exception(struct pt_regs *regs, int trapnr);
+static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
+{
+ struct sev_es_runtime_data *data;
+ struct ghcb *ghcb;
+
+ data = this_cpu_read(runtime_data);
+ ghcb = &data->ghcb_page;
+
+ if (unlikely(data->ghcb_active)) {
+ /* GHCB is already in use - save its contents */
+
+ if (unlikely(data->backup_ghcb_active))
+ return NULL;
+
+ /* Mark backup_ghcb active before writing to it */
+ data->backup_ghcb_active = true;
+
+ state->ghcb = &data->backup_ghcb;
+
+ /* Backup GHCB content */
+ *state->ghcb = *ghcb;
+ } else {
+ state->ghcb = NULL;
+ data->ghcb_active = true;
+ }
+
+ return ghcb;
+}
+
+static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
+{
+ struct sev_es_runtime_data *data;
+ struct ghcb *ghcb;
+
+ data = this_cpu_read(runtime_data);
+ ghcb = &data->ghcb_page;
+
+ if (state->ghcb) {
+ /* Restore GHCB from Backup */
+ *ghcb = *state->ghcb;
+ data->backup_ghcb_active = false;
+ state->ghcb = NULL;
+ } else {
+ data->ghcb_active = false;
+ }
+}
static inline u64 sev_es_rd_ghcb_msr(void)
{
@@ -316,6 +397,9 @@ static void __init init_ghcb(int cpu)
panic("Can't map GHCBs unencrypted");
memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
+
+ data->ghcb_active = false;
+ data->backup_ghcb_active = false;
}
void __init sev_es_init_vc_handling(void)
@@ -336,6 +420,9 @@ void __init sev_es_init_vc_handling(void
init_ghcb(cpu);
setup_vc_stacks(cpu);
}
+
+ /* Secondary CPUs use the runtime #VC handler */
+ initial_vc_handler = (unsigned long)safe_stack_vmm_communication;
}
static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
@@ -366,6 +453,152 @@ static enum es_result vc_handle_exitcode
return result;
}
+static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
+{
+ long error_code = ctxt->fi.error_code;
+ int trapnr = ctxt->fi.vector;
+
+ ctxt->regs->orig_ax = ctxt->fi.error_code;
+
+ switch (trapnr) {
+ case X86_TRAP_GP:
+ do_general_protection(ctxt->regs, error_code);
+ break;
+ case X86_TRAP_UD:
+ do_invalid_op(ctxt->regs, error_code);
+ break;
+ default:
+ pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
+ BUG();
+ }
+}
+
+static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
+{
+ unsigned long sp = (unsigned long)regs;
+
+ return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
+}
+
+/*
+ * Main #VC exception handler. It is called when the entry code was able to
+ * switch off the IST to a safe kernel stack.
+ *
+ * With the current implementation it is always possible to switch to a safe
+ * stack because #VC exceptions only happen at known places, like intercepted
+ * instructions or accesses to MMIO areas/IO ports. They can also happen with
+ * code instrumentation when the hypervisor intercepts #DB, but the critical
+ * paths are forbidden to be instrumented, so #DB exceptions currently also
+ * only happen in safe places.
+ */
+void safe_stack_vmm_communication(struct pt_regs *regs, unsigned long error_code)
+{
+ struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+ struct ghcb_state state;
+ struct es_em_ctxt ctxt;
+ enum es_result result;
+ struct ghcb *ghcb;
+
+ /*
+ * This is invoked through an interrupt gate, so IRQs are disabled. The
+ * code below might walk page-tables for user or kernel addresses, so
+ * keep the IRQs disabled to protect us against concurrent TLB flushes.
+ */
+
+ ghcb = sev_es_get_ghcb(&state);
+ if (!ghcb) {
+ /*
+ * Mark GHCBs inactive so that panic() is able to print the
+ * message.
+ */
+ data->ghcb_active = false;
+ data->backup_ghcb_active = false;
+
+ panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
+ }
+
+ vc_ghcb_invalidate(ghcb);
+ result = vc_init_em_ctxt(&ctxt, regs, error_code);
+
+ if (result == ES_OK)
+ result = vc_handle_exitcode(&ctxt, ghcb, error_code);
+
+ sev_es_put_ghcb(&state);
+
+ /* Done - now check the result */
+ switch (result) {
+ case ES_OK:
+ vc_finish_insn(&ctxt);
+ break;
+ case ES_UNSUPPORTED:
+ pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
+ error_code, regs->ip);
+ goto fail;
+ case ES_VMM_ERROR:
+ pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
+ error_code, regs->ip);
+ goto fail;
+ case ES_DECODE_FAILED:
+ pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
+ error_code, regs->ip);
+ goto fail;
+ case ES_EXCEPTION:
+ vc_forward_exception(&ctxt);
+ break;
+ case ES_RETRY:
+ /* Nothing to do */
+ break;
+ default:
+ pr_emerg("Unknown result in %s():%d\n", __func__, result);
+ /*
+ * Emulating the instruction which caused the #VC exception
+ * failed - can't continue so print debug information
+ */
+ BUG();
+ }
+
+out:
+ return;
+
+fail:
+ if (user_mode(regs)) {
+ /*
+ * Do not kill the machine if user-space triggered the
+ * exception. Send SIGBUS instead and let user-space deal with
+ * it.
+ */
+ force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
+ } else {
+ pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n",
+ result);
+
+ /* Show some debug info */
+ show_regs(regs);
+
+ /* Ask hypervisor to sev_es_terminate */
+ sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+
+ /* If that fails and we get here - just panic */
+ panic("Returned from Terminate-Request to Hypervisor\n");
+ }
+
+ goto out;
+}
+
+/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */
+void ist_vmm_communication(struct pt_regs *regs, unsigned long error_code)
+{
+ panic("Can't handle #VC exception from unsupported context\n");
+}
+
+void vmm_communication(struct pt_regs *regs, unsigned long error_code)
+{
+ if (likely(!on_vc_fallback_stack(regs)))
+ safe_stack_vmm_communication(regs, error_code);
+ else
+ ist_vmm_communication(regs, error_code);
+}
+
bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
{
unsigned long exit_code = regs->orig_ax;