Alexander Graf b7577e
From: Roman Kagan <rkagan@virtuozzo.com>
Alexander Graf b7577e
Date: Fri, 14 Jul 2017 17:13:20 +0300
Alexander Graf b7577e
Subject: kvm: x86: hyperv: make VP_INDEX managed by userspace
Alexander Graf b7577e
MIME-Version: 1.0
Alexander Graf b7577e
Content-Type: text/plain; charset=UTF-8
Alexander Graf b7577e
Content-Transfer-Encoding: 8bit
Alexander Graf b7577e
Patch-mainline: v4.13-rc1
Alexander Graf b7577e
Git-commit: d3457c877b14aaee8c52923eedf05a3b78af0476
Alexander Graf b7577e
References: bsc#1077761
Alexander Graf b7577e
Alexander Graf b7577e
Hyper-V identifies vCPUs by Virtual Processor Index, which can be
Alexander Graf b7577e
queried via HV_X64_MSR_VP_INDEX msr.  It is defined by the spec as a
Alexander Graf b7577e
sequential number which can't exceed the maximum number of vCPUs per VM.
Alexander Graf b7577e
APIC ids can be sparse and thus aren't a valid replacement for VP
Alexander Graf b7577e
indices.
Alexander Graf b7577e
Alexander Graf b7577e
Current KVM uses its internal vcpu index as VP_INDEX.  However, to make
Alexander Graf b7577e
it predictable and persistent across VM migrations, the userspace has to
Alexander Graf b7577e
control the value of VP_INDEX.
Alexander Graf b7577e
Alexander Graf b7577e
This patch achieves that, by storing vp_index explicitly on vcpu, and
Alexander Graf b7577e
allowing HV_X64_MSR_VP_INDEX to be set from the host side.  For
Alexander Graf b7577e
compatibility it's initialized to KVM vcpu index.  Also a few variables
Alexander Graf b7577e
are renamed to make clear distinction betweed this Hyper-V vp_index and
Alexander Graf b7577e
KVM vcpu_id (== APIC id).  Besides, a new capability,
Alexander Graf b7577e
KVM_CAP_HYPERV_VP_INDEX, is added to allow the userspace to skip
Alexander Graf b7577e
attempting msr writes where unsupported, to avoid spamming error logs.
Alexander Graf b7577e
Alexander Graf b7577e
Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Alexander Graf b7577e
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Alexander Graf b7577e
Acked-by: Alexander Graf <agraf@suse.de>
Alexander Graf b7577e
---
Alexander Graf b7577e
 Documentation/virtual/kvm/api.txt |    9 ++++++
Alexander Graf b7577e
 arch/x86/include/asm/kvm_host.h   |    1 
Alexander Graf b7577e
 arch/x86/kvm/hyperv.c             |   54 ++++++++++++++++++++++++--------------
Alexander Graf b7577e
 arch/x86/kvm/hyperv.h             |    1 
Alexander Graf b7577e
 arch/x86/kvm/x86.c                |    3 ++
Alexander Graf b7577e
 include/uapi/linux/kvm.h          |    1 
Alexander Graf b7577e
 6 files changed, 50 insertions(+), 19 deletions(-)
Alexander Graf b7577e
Alexander Graf b7577e
--- a/Documentation/virtual/kvm/api.txt
Alexander Graf b7577e
+++ b/Documentation/virtual/kvm/api.txt
Alexander Graf b7577e
@@ -4338,3 +4338,12 @@
Alexander Graf b7577e
 controller (SynIC).  The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
Alexander Graf b7577e
 doesn't clear SynIC message and event flags pages when they are enabled by
Alexander Graf b7577e
 writing to the respective MSRs.
Alexander Graf b7577e
+
Alexander Graf b7577e
+8.12 KVM_CAP_HYPERV_VP_INDEX
Alexander Graf b7577e
+
Alexander Graf b7577e
+Architectures: x86
Alexander Graf b7577e
+
Alexander Graf b7577e
+This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr.  Its
Alexander Graf b7577e
+value is used to denote the target vcpu for a SynIC interrupt.  For
Alexander Graf b7577e
+compatibilty, KVM initializes this msr to KVM's internal vcpu index.  When this
Alexander Graf b7577e
+capability is absent, userspace can still query this msr's value.
Alexander Graf b7577e
--- a/arch/x86/include/asm/kvm_host.h
Alexander Graf b7577e
+++ b/arch/x86/include/asm/kvm_host.h
Alexander Graf b7577e
@@ -467,6 +467,7 @@
Alexander Graf b7577e
 
Alexander Graf b7577e
 /* Hyper-V per vcpu emulation context */
Alexander Graf b7577e
 struct kvm_vcpu_hv {
Alexander Graf b7577e
+	u32 vp_index;
Alexander Graf b7577e
 	u64 hv_vapic;
Alexander Graf b7577e
 	s64 runtime_offset;
Alexander Graf b7577e
 	struct kvm_vcpu_hv_synic synic;
Alexander Graf b7577e
--- a/arch/x86/kvm/hyperv.c
Alexander Graf b7577e
+++ b/arch/x86/kvm/hyperv.c
Alexander Graf b7577e
@@ -106,14 +106,27 @@
Alexander Graf b7577e
 	return 0;
Alexander Graf b7577e
 }
Alexander Graf b7577e
 
Alexander Graf b7577e
-static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id)
Alexander Graf b7577e
+static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
Alexander Graf b7577e
+{
Alexander Graf b7577e
+	struct kvm_vcpu *vcpu = NULL;
Alexander Graf b7577e
+	int i;
Alexander Graf b7577e
+
Alexander Graf b7577e
+	if (vpidx < KVM_MAX_VCPUS)
Alexander Graf b7577e
+		vcpu = kvm_get_vcpu(kvm, vpidx);
Alexander Graf b7577e
+	if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
Alexander Graf b7577e
+		return vcpu;
Alexander Graf b7577e
+	kvm_for_each_vcpu(i, vcpu, kvm)
Alexander Graf b7577e
+		if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
Alexander Graf b7577e
+			return vcpu;
Alexander Graf b7577e
+	return NULL;
Alexander Graf b7577e
+}
Alexander Graf b7577e
+
Alexander Graf b7577e
+static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
Alexander Graf b7577e
 {
Alexander Graf b7577e
 	struct kvm_vcpu *vcpu;
Alexander Graf b7577e
 	struct kvm_vcpu_hv_synic *synic;
Alexander Graf b7577e
 
Alexander Graf b7577e
-	if (vcpu_id >= atomic_read(&kvm->online_vcpus))
Alexander Graf b7577e
-		return NULL;
Alexander Graf b7577e
-	vcpu = kvm_get_vcpu(kvm, vcpu_id);
Alexander Graf b7577e
+	vcpu = get_vcpu_by_vpidx(kvm, vpidx);
Alexander Graf b7577e
 	if (!vcpu)
Alexander Graf b7577e
 		return NULL;
Alexander Graf b7577e
 	synic = vcpu_to_synic(vcpu);
Alexander Graf b7577e
@@ -320,11 +333,11 @@
Alexander Graf b7577e
 	return ret;
Alexander Graf b7577e
 }
Alexander Graf b7577e
 
Alexander Graf b7577e
-int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint)
Alexander Graf b7577e
+int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
Alexander Graf b7577e
 {
Alexander Graf b7577e
 	struct kvm_vcpu_hv_synic *synic;
Alexander Graf b7577e
 
Alexander Graf b7577e
-	synic = synic_get(kvm, vcpu_id);
Alexander Graf b7577e
+	synic = synic_get(kvm, vpidx);
Alexander Graf b7577e
 	if (!synic)
Alexander Graf b7577e
 		return -EINVAL;
Alexander Graf b7577e
 
Alexander Graf b7577e
@@ -343,11 +356,11 @@
Alexander Graf b7577e
 			kvm_hv_notify_acked_sint(vcpu, i);
Alexander Graf b7577e
 }
Alexander Graf b7577e
 
Alexander Graf b7577e
-static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi)
Alexander Graf b7577e
+static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
Alexander Graf b7577e
 {
Alexander Graf b7577e
 	struct kvm_vcpu_hv_synic *synic;
Alexander Graf b7577e
 
Alexander Graf b7577e
-	synic = synic_get(kvm, vcpu_id);
Alexander Graf b7577e
+	synic = synic_get(kvm, vpidx);
Alexander Graf b7577e
 	if (!synic)
Alexander Graf b7577e
 		return -EINVAL;
Alexander Graf b7577e
 
Alexander Graf b7577e
@@ -689,6 +702,13 @@
Alexander Graf b7577e
 		stimer_init(&hv_vcpu->stimer[i], i);
Alexander Graf b7577e
 }
Alexander Graf b7577e
 
Alexander Graf b7577e
+void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu)
Alexander Graf b7577e
+{
Alexander Graf b7577e
+	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
Alexander Graf b7577e
+
Alexander Graf b7577e
+	hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
Alexander Graf b7577e
+}
Alexander Graf b7577e
+
Alexander Graf b7577e
 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
Alexander Graf b7577e
 {
Alexander Graf b7577e
 	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
Alexander Graf b7577e
@@ -983,6 +1003,11 @@
Alexander Graf b7577e
 	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
Alexander Graf b7577e
 
Alexander Graf b7577e
 	switch (msr) {
Alexander Graf b7577e
+	case HV_X64_MSR_VP_INDEX:
Alexander Graf b7577e
+		if (!host)
Alexander Graf b7577e
+			return 1;
Alexander Graf b7577e
+		hv->vp_index = (u32)data;
Alexander Graf b7577e
+		break;
Alexander Graf b7577e
 	case HV_X64_MSR_APIC_ASSIST_PAGE: {
Alexander Graf b7577e
 		u64 gfn;
Alexander Graf b7577e
 		unsigned long addr;
Alexander Graf b7577e
@@ -1094,18 +1119,9 @@
Alexander Graf b7577e
 	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
Alexander Graf b7577e
 
Alexander Graf b7577e
 	switch (msr) {
Alexander Graf b7577e
-	case HV_X64_MSR_VP_INDEX: {
Alexander Graf b7577e
-		int r;
Alexander Graf b7577e
-		struct kvm_vcpu *v;
Alexander Graf b7577e
-
Alexander Graf b7577e
-		kvm_for_each_vcpu(r, v, vcpu->kvm) {
Alexander Graf b7577e
-			if (v == vcpu) {
Alexander Graf b7577e
-				data = r;
Alexander Graf b7577e
-				break;
Alexander Graf b7577e
-			}
Alexander Graf b7577e
-		}
Alexander Graf b7577e
+	case HV_X64_MSR_VP_INDEX:
Alexander Graf b7577e
+		data = hv->vp_index;
Alexander Graf b7577e
 		break;
Alexander Graf b7577e
-	}
Alexander Graf b7577e
 	case HV_X64_MSR_EOI:
Alexander Graf b7577e
 		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
Alexander Graf b7577e
 	case HV_X64_MSR_ICR:
Alexander Graf b7577e
--- a/arch/x86/kvm/hyperv.h
Alexander Graf b7577e
+++ b/arch/x86/kvm/hyperv.h
Alexander Graf b7577e
@@ -59,6 +59,7 @@
Alexander Graf b7577e
 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
Alexander Graf b7577e
 
Alexander Graf b7577e
 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
Alexander Graf b7577e
+void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
Alexander Graf b7577e
 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
Alexander Graf b7577e
 
Alexander Graf b7577e
 static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
Alexander Graf b7577e
--- a/arch/x86/kvm/x86.c
Alexander Graf b7577e
+++ b/arch/x86/kvm/x86.c
Alexander Graf b7577e
@@ -2667,6 +2667,7 @@
Alexander Graf b7577e
 	case KVM_CAP_HYPERV_SPIN:
Alexander Graf b7577e
 	case KVM_CAP_HYPERV_SYNIC:
Alexander Graf b7577e
 	case KVM_CAP_HYPERV_SYNIC2:
Alexander Graf b7577e
+	case KVM_CAP_HYPERV_VP_INDEX:
Alexander Graf b7577e
 	case KVM_CAP_PCI_SEGMENT:
Alexander Graf b7577e
 	case KVM_CAP_DEBUGREGS:
Alexander Graf b7577e
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
Alexander Graf b7577e
@@ -7700,6 +7701,8 @@
Alexander Graf b7577e
 	struct msr_data msr;
Alexander Graf b7577e
 	struct kvm *kvm = vcpu->kvm;
Alexander Graf b7577e
 
Alexander Graf b7577e
+	kvm_hv_vcpu_postcreate(vcpu);
Alexander Graf b7577e
+
Alexander Graf b7577e
 	if (vcpu_load(vcpu))
Alexander Graf b7577e
 		return;
Alexander Graf b7577e
 	msr.data = 0x0;
Alexander Graf b7577e
--- a/include/uapi/linux/kvm.h
Alexander Graf b7577e
+++ b/include/uapi/linux/kvm.h
Alexander Graf b7577e
@@ -929,6 +929,7 @@
Alexander Graf b7577e
 #define KVM_CAP_PPC_FWNMI 146
Alexander Graf b7577e
 #define KVM_CAP_PPC_SMT_POSSIBLE 147
Alexander Graf b7577e
 #define KVM_CAP_HYPERV_SYNIC2 148
Alexander Graf b7577e
+#define KVM_CAP_HYPERV_VP_INDEX 149
Alexander Graf b7577e
 
Alexander Graf b7577e
 #ifdef KVM_CAP_IRQ_ROUTING
Alexander Graf b7577e