Blob Blame History Raw
From: Jim Mattson <jmattson@google.com>
Date: Thu, 26 Apr 2018 16:09:12 -0700
Subject: kvm: nVMX: Use nested_run_pending rather than from_vmentry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Git-commit: 6514dc380d35570ef8b0cf107d388fe3169cca11
Patch-mainline: v4.18-rc1
References: bsc#1106240

When saving a vCPU's nested state, the vmcs02 is discarded. Only the
shadow vmcs12 is saved. The shadow vmcs12 contains all of the
information needed to reconstruct an equivalent vmcs02 on restore, but
we have to be able to deal with two contexts:

1. The nested state was saved immediately after an emulated VM-entry,
   before the vmcs02 was ever launched.

2. The nested state was saved some time after the first successful
   launch of the vmcs02.

Though it's an implementation detail rather than an architected bit,
vmx->nested_run_pending serves to distinguish between these two
cases. Hence, we save it as part of the vCPU's nested state. (Yes,
this is ugly.)

Even when restoring from a checkpoint, it may be necessary to build
the vmcs02 as if prepare_vmcs02 was called from nested_vmx_run. So,
the 'from_vmentry' argument should be dropped, and
vmx->nested_run_pending should be consulted instead. The nested state
restoration code then has to set vmx->nested_run_pending prior to
calling prepare_vmcs02. It's important that the restoration code set
vmx->nested_run_pending anyway, since the flag impacts things like
interrupt delivery as well.

Fixes: cf8b84f48a59 ("kvm: nVMX: Prepare for checkpointing L2 state")
Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
---
 arch/x86/kvm/vmx.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10542,7 +10542,7 @@ static int nested_vmx_load_cr3(struct kv
  * is assigned to entry_failure_code on failure.
  */
 static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
-			  bool from_vmentry, u32 *entry_failure_code)
+			  u32 *entry_failure_code)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 exec_control, vmcs12_exec_ctrl;
@@ -10584,7 +10584,7 @@ static int prepare_vmcs02(struct kvm_vcp
 	vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
 	vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
 
-	if (from_vmentry &&
+	if (vmx->nested.nested_run_pending &&
 	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
 		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
 		vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
@@ -10592,7 +10592,7 @@ static int prepare_vmcs02(struct kvm_vcp
 		kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
 		vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
 	}
-	if (from_vmentry) {
+	if (vmx->nested.nested_run_pending) {
 		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
 			     vmcs12->vm_entry_intr_info_field);
 		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
@@ -10784,7 +10784,7 @@ static int prepare_vmcs02(struct kvm_vcp
 			~VM_ENTRY_IA32E_MODE) |
 		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
 
-	if (from_vmentry &&
+	if (vmx->nested.nested_run_pending &&
 	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
 		vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
 		vcpu->arch.pat = vmcs12->guest_ia32_pat;
@@ -10794,7 +10794,7 @@ static int prepare_vmcs02(struct kvm_vcp
 
 	set_cr4_guest_host_mask(vmx);
 
-	if (from_vmentry &&
+	if (vmx->nested.nested_run_pending &&
 	    vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
 		vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
 
@@ -10867,7 +10867,7 @@ static int prepare_vmcs02(struct kvm_vcp
 	vmx_set_cr4(vcpu, vmcs12->guest_cr4);
 	vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
 
-	if (from_vmentry &&
+	if (vmx->nested.nested_run_pending &&
 	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
 		vcpu->arch.efer = vmcs12->guest_ia32_efer;
 	else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
@@ -11029,7 +11029,7 @@ static int check_vmentry_postreqs(struct
 	return 0;
 }
 
-static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
+static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -11044,7 +11044,7 @@ static int enter_vmx_non_root_mode(struc
 	vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
 	vmx_segment_cache_clear(vmx);
 
-	if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
+	if (prepare_vmcs02(vcpu, vmcs12, &exit_qual)) {
 		leave_guest_mode(vcpu);
 		vmx_switch_vmcs(vcpu, &vmx->vmcs01);
 		nested_vmx_entry_failure(vcpu, vmcs12,
@@ -11147,19 +11147,22 @@ static int nested_vmx_run(struct kvm_vcp
 	 * the nested entry.
 	 */
 
-	ret = enter_vmx_non_root_mode(vcpu, true);
-	if (ret)
+	vmx->nested.nested_run_pending = 1;
+	ret = enter_vmx_non_root_mode(vcpu);
+	if (ret) {
+		vmx->nested.nested_run_pending = 0;
 		return ret;
+	}
 
 	/*
 	 * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
 	 * by event injection, halt vcpu.
 	 */
 	if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
-	    !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
+	    !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) {
+		vmx->nested.nested_run_pending = 0;
 		return kvm_vcpu_halt(vcpu);
-
-	vmx->nested.nested_run_pending = 1;
+	}
 
 	return 1;
 
@@ -12200,7 +12203,7 @@ static int vmx_pre_leave_smm(struct kvm_
 
 	if (vmx->nested.smm.guest_mode) {
 		vcpu->arch.hflags &= ~HF_SMM_MASK;
-		ret = enter_vmx_non_root_mode(vcpu, false);
+		ret = enter_vmx_non_root_mode(vcpu);
 		vcpu->arch.hflags |= HF_SMM_MASK;
 		if (ret)
 			return ret;