Blob Blame History Raw
From: QingFeng Hao <haoqf@linux.vnet.ibm.com>
Subject: KVM: s390: Backup the guest's machine check info
Patch-mainline: v4.13-rc1
Git-commit: da72ca4d4090a8ab0e6b0a23682ef42d39d7ae00
References: FATE#324256,LTC#160417,bsc#1066327

Summary:     kvm: Robust Machine Check Handling
Description: A machine check is caused by a machine malfunction and not by data
             or instructions. The machine check could happen on CPU, I/O,
             storage etc.
             There are several kinds of machine checks, e.g. System Damage(SD),
             Instruction Processing Damage(IPD), Delayed Access Exception(DAE),
             Channel Report Pending(CRW), External Damage(ED).
             In general, they are of two types: exigent condition and
             repressible condition. The 64 bits Machine-Check-Interruption
             Code(MCIC) in lowcore is set to indicate the current machine
             check's type with the validity bits when a machine check happens.
             Meanwhile, the machine check handler set in lowcore is called to
             handle it. Through the validity bits in MCIC, the program can
             determine if the machine check could be recovered.
             The present machine check handler can handle almost all of the
             machine checks and validate the registers to recover the machine
             by the validity bits in MCIC. It can also inject the CRW machine
             check request from QEMU to the guest.
             VS1522 improves it to inject most of the host supported machine
             checks to the guest if they happen during the guest's running
             and the necessary validity bits to recover are set. Exceptions are
             the SD and Timer Facility Damage. The original handling is kept
             and they will not be injected into the guest.
             Additionally, this line item reinjects the DAE machine check
             if it happens during the guest's running instead of damage host
             because DAE could be caused by the program's using an improper
             procedure to update the DAT tables.
             The program will not inject the machine check for device hotplug
             such as CRW into the guest. For machine checks happening during
             the nested guest's(guest-3) running, guest-2 will reinject them
             into guest-3.

Upstream-Description:

             KVM: s390: Backup the guest's machine check info

             When a machine check happens in the guest, related mcck info (mcic,
             external damage code, ...) is stored in the vcpu's lowcore on the host.
             Then the machine check handler's low-level part is executed, followed
             by the high-level part.

             If the high-level part's execution is interrupted by a new machine check
             happening on the same vcpu on the host, the mcck info in the lowcore is
             overwritten with the new machine check's data.

             If the high-level part's execution is scheduled to a different cpu,
             the mcck info in the lowcore is uncertain.

             Therefore, for both cases, the further reinjection to the guest will use
             the wrong data.
             Let's backup the mcck info in the lowcore to the sie page
             for further reinjection, so that the right data will be used.

             Add new member into struct sie_page to store related machine check's
             info of mcic, failing storage address and external damage code.

             Signed-off-by: QingFeng Hao <haoqf@linux.vnet.ibm.com>
             Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
             Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>


Signed-off-by: QingFeng Hao <haoqf@linux.vnet.ibm.com>
Acked-by: Hannes Reinecke <hare@suse.com>
---
 arch/s390/include/asm/kvm_host.h |   17 ++++++++++++++++-
 arch/s390/kernel/nmi.c           |   34 ++++++++++++++++++++++++++++++++++
 arch/s390/kvm/kvm-s390.c         |    1 +
 3 files changed, 51 insertions(+), 1 deletion(-)

--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -109,6 +109,20 @@ struct esca_block {
 	struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
 } __packed;
 
+/*
+ * This struct is used to store some machine check info from lowcore
+ * for machine checks that happen while the guest is running.
+ * This info in host's lowcore might be overwritten by a second machine
+ * check from host when host is in the machine check's high-level handling.
+ * The size is 24 bytes.
+ */
+struct mcck_volatile_info {
+	__u64 mcic;
+	__u64 failing_storage_address;
+	__u32 ext_damage_code;
+	__u32 reserved;
+};
+
 #define CPUSTAT_STOPPED    0x80000000
 #define CPUSTAT_WAIT       0x10000000
 #define CPUSTAT_ECALL_PEND 0x08000000
@@ -266,7 +280,8 @@ struct kvm_s390_itdb {
 
 struct sie_page {
 	struct kvm_s390_sie_block sie_block;
-	__u8 reserved200[1024];		/* 0x0200 */
+	struct mcck_volatile_info mcck_info;	/* 0x0200 */
+	__u8 reserved218[1000];		/* 0x0218 */
 	struct kvm_s390_itdb itdb;	/* 0x0600 */
 	__u8 reserved700[2304];		/* 0x0700 */
 } __packed;
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -26,6 +26,7 @@
 #include <asm/switch_to.h>
 #include <asm/ctl_reg.h>
 #include <asm/asm-offsets.h>
+#include <linux/kvm_host.h>
 
 struct mcck_struct {
 	unsigned int kill_task : 1;
@@ -275,6 +276,31 @@ static int notrace s390_validate_registe
 	return kill_task;
 }
 
+/*
+ * Backup the guest's machine check info to its description block
+ */
+static void notrace s390_backup_mcck_info(struct pt_regs *regs)
+{
+	struct mcck_volatile_info *mcck_backup;
+	struct sie_page *sie_page;
+
+	/* r14 contains the sie block, which was set in sie64a */
+	struct kvm_s390_sie_block *sie_block =
+			(struct kvm_s390_sie_block *) regs->gprs[14];
+
+	if (sie_block == NULL)
+		/* Something's seriously wrong, stop system. */
+		s390_handle_damage();
+
+	sie_page = container_of(sie_block, struct sie_page, sie_block);
+	mcck_backup = &sie_page->mcck_info;
+	mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
+				~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
+	mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
+	mcck_backup->failing_storage_address
+			= S390_lowcore.failing_storage_address;
+}
+
 #define MAX_IPD_COUNT	29
 #define MAX_IPD_TIME	(5 * 60 * USEC_PER_SEC) /* 5 minutes */
 
@@ -355,6 +381,14 @@ void notrace s390_do_machine_check(struc
 		mcck->mcck_code = mci.val;
 		set_cpu_flag(CIF_MCCK_PENDING);
 	}
+
+	/*
+	 * Backup the machine check's info if it happens when the guest
+	 * is running.
+	 */
+	if (test_cpu_flag(CIF_MCCK_GUEST))
+		s390_backup_mcck_info(regs);
+
 	if (mci.cd) {
 		/* Timing facility damage */
 		s390_handle_damage();
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2407,6 +2407,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
 	if (!vcpu)
 		goto out;
 
+	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
 	if (!sie_page)
 		goto out_free_cpu;