From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 14 Apr 2021 14:35:43 +0200
Subject: x86/kvm: Disable all PV features on crash
Patch-mainline: v5.13-rc2
Git-commit: 3d6b84132d2a57b5a74100f6923a8feb679ac2ce
References: bsc#1185308
Crash shutdown handler only disables kvmclock and steal time, other PV
features remain active so we risk corrupting memory or getting some
side-effects in kdump kernel. Move crash handler to kvm.c and unify
with CPU offline.
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210414123544.1060604-5-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Liang Yan <lyan@suse.com>
---
arch/x86/include/asm/kvm_para.h | 5 ----
arch/x86/kernel/kvm.c | 44 +++++++++++++++++++++++++++++-----------
arch/x86/kernel/kvmclock.c | 21 -------------------
3 files changed, 32 insertions(+), 38 deletions(-)
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -92,7 +92,6 @@ void __init kvm_guest_init(void);
void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
-extern void kvm_disable_steal_time(void);
#ifdef CONFIG_PARAVIRT_SPINLOCKS
void __init kvm_spinlock_init(void);
@@ -122,10 +121,6 @@ static inline u32 kvm_read_and_reset_pf_
return 0;
}
-static inline void kvm_disable_steal_time(void)
-{
- return;
-}
#endif
#endif /* _ASM_X86_KVM_PARA_H */
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -47,6 +47,7 @@
#include <asm/apicdef.h>
#include <asm/hypervisor.h>
#include <asm/kvm_guest.h>
+#include <asm/reboot.h>
static int kvmapf = 1;
@@ -377,6 +378,14 @@ static void kvm_pv_disable_apf(void)
smp_processor_id());
}
+static void kvm_disable_steal_time(void)
+{
+ if (!has_steal_clock)
+ return;
+
+ wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
+}
+
static void kvm_pv_guest_cpu_reboot(void *unused)
{
/*
@@ -419,14 +428,6 @@ static u64 kvm_steal_clock(int cpu)
return steal;
}
-void kvm_disable_steal_time(void)
-{
- if (!has_steal_clock)
- return;
-
- wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
-}
-
static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
{
early_set_memory_decrypted((unsigned long) ptr, size);
@@ -454,13 +455,14 @@ static void __init sev_map_percpu_data(v
}
}
-static void kvm_guest_cpu_offline(void)
+static void kvm_guest_cpu_offline(bool shutdown)
{
kvm_disable_steal_time();
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
kvm_pv_disable_apf();
- apf_task_wake_all();
+ if (!shutdown)
+ apf_task_wake_all();
kvmclock_disable();
}
@@ -493,7 +495,7 @@ static int kvm_cpu_down_prepare(unsigned
unsigned long flags;
local_irq_save(flags);
- kvm_guest_cpu_offline();
+ kvm_guest_cpu_offline(false);
local_irq_restore(flags);
return 0;
}
@@ -501,7 +503,7 @@ static int kvm_cpu_down_prepare(unsigned
static int kvm_suspend(void)
{
- kvm_guest_cpu_offline();
+ kvm_guest_cpu_offline(false);
return 0;
}
@@ -516,6 +518,20 @@ static struct syscore_ops kvm_syscore_op
.resume = kvm_resume,
};
+/*
+ * After a PV feature is registered, the host will keep writing to the
+ * registered memory location. If the guest happens to shutdown, this memory
+ * won't be valid. In cases like kexec, in which you install a new kernel, this
+ * means a random memory location will be kept being written.
+ */
+#ifdef CONFIG_KEXEC_CORE
+static void kvm_crash_shutdown(struct pt_regs *regs)
+{
+ kvm_guest_cpu_offline(true);
+ native_machine_crash_shutdown(regs);
+}
+#endif
+
static void __init kvm_apf_trap_init(void)
{
set_intr_gate(14, async_page_fault);
@@ -556,6 +572,10 @@ void __init kvm_guest_init(void)
kvm_guest_cpu_init();
#endif
+#ifdef CONFIG_KEXEC_CORE
+ machine_ops.crash_shutdown = kvm_crash_shutdown;
+#endif
+
register_syscore_ops(&kvm_syscore_ops);
/*
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -29,7 +29,6 @@
#include <asm/mem_encrypt.h>
#include <asm/x86_init.h>
-#include <asm/reboot.h>
#include <asm/kvmclock.h>
static int kvmclock __ro_after_init = 1;
@@ -221,23 +220,6 @@ static void kvm_setup_secondary_clock(vo
}
#endif
-/*
- * After the clock is registered, the host will keep writing to the
- * registered memory location. If the guest happens to shutdown, this memory
- * won't be valid. In cases like kexec, in which you install a new kernel, this
- * means a random memory location will be kept being written. So before any
- * kind of shutdown from our side, we unregister the clock by writing anything
- * that does not have the 'enable' bit set in the msr
- */
-#ifdef CONFIG_KEXEC_CORE
-static void kvm_crash_shutdown(struct pt_regs *regs)
-{
- native_write_msr(msr_kvm_system_time, 0, 0);
- kvm_disable_steal_time();
- native_machine_crash_shutdown(regs);
-}
-#endif
-
void kvmclock_disable(void)
{
native_write_msr(msr_kvm_system_time, 0, 0);
@@ -340,9 +322,6 @@ void __init kvmclock_init(void)
#endif
x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
-#ifdef CONFIG_KEXEC_CORE
- machine_ops.crash_shutdown = kvm_crash_shutdown;
-#endif
kvm_get_preset_lpj();
clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
pv_info.name = "KVM";