Blob Blame History Raw
From: jbeulich@suse.com
Subject: kexec: properly handle pCPU hotplug
Patch-mainline: n/a

--- head.orig/arch/x86/kernel/machine_kexec_xen.c	2011-12-02 17:22:49.000000000 +0100
+++ head/arch/x86/kernel/machine_kexec_xen.c	2011-12-05 12:23:02.000000000 +0100
@@ -1,3 +1,14 @@
+int machine_kexec_setup_resource(struct resource *hypervisor,
+				 struct resource *phys_cpu)
+{
+	/* The per-cpu crash note resources belong to the hypervisor resource */
+	insert_resource(hypervisor, phys_cpu);
+	if (!phys_cpu->parent) /* outside of hypervisor range */
+		insert_resource(&iomem_resource, phys_cpu);
+
+	return 0;
+}
+
 int __init machine_kexec_setup_resources(struct resource *hypervisor,
 					 struct resource *phys_cpus,
 					 int nr_phys_cpus)
@@ -8,12 +19,8 @@ int __init machine_kexec_setup_resources
 	if (crashk_res.end > crashk_res.start)
 		insert_resource(&iomem_resource, &crashk_res);
 
-	/* The per-cpu crash note resources belong to the hypervisor resource */
-	for (k = 0; k < nr_phys_cpus; k++) {
-		insert_resource(hypervisor, phys_cpus + k);
-		if (!phys_cpus[k].parent) /* outside of hypervisor range */
-			insert_resource(&iomem_resource, phys_cpus + k);
-	}
+	for (k = 0; k < nr_phys_cpus; k++)
+		machine_kexec_setup_resource(hypervisor, phys_cpus + k);
 
 	return xen_create_contiguous_region((unsigned long)&vmcoreinfo_note,
 					    get_order(sizeof(vmcoreinfo_note)),
--- head.orig/drivers/xen/core/Makefile	2012-02-17 14:37:35.000000000 +0100
+++ head/drivers/xen/core/Makefile	2012-02-17 14:37:50.000000000 +0100
@@ -4,8 +4,7 @@
 
 obj-y := evtchn.o gnttab.o reboot.o machine_reboot.o
 
-obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += firmware.o
-obj-$(CONFIG_ACPI_HOTPLUG_CPU)	+= pcpu.o
+obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += firmware.o pcpu.o
 obj-$(CONFIG_PROC_FS)		+= xen_proc.o
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_SMPBOOT)	+= smpboot.o
--- head.orig/drivers/xen/core/machine_kexec.c	2011-12-21 11:09:57.000000000 +0100
+++ head/drivers/xen/core/machine_kexec.c	2011-12-07 14:28:59.000000000 +0100
@@ -4,31 +4,181 @@
  */
 
 #include <linux/kexec.h>
+#include <linux/slab.h>
 #include <xen/interface/kexec.h>
 #include <xen/interface/platform.h>
 #include <linux/reboot.h>
 #include <linux/mm.h>
 #include <linux/bootmem.h>
+#include <xen/pcpu.h>
 
 extern void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, 
 					 struct kimage *image);
 extern int machine_kexec_setup_resources(struct resource *hypervisor,
 					 struct resource *phys_cpus,
 					 int nr_phys_cpus);
+extern int machine_kexec_setup_resource(struct resource *hypervisor,
+					struct resource *phys_cpu);
 extern void machine_kexec_register_resources(struct resource *res);
 
-static int __initdata xen_max_nr_phys_cpus;
+static unsigned int xen_nr_phys_cpus, xen_max_nr_phys_cpus;
 static struct resource xen_hypervisor_res;
-static struct resource *__initdata xen_phys_cpus;
+static struct resource *xen_phys_cpus;
+static struct xen_phys_cpu_entry {
+	struct xen_phys_cpu_entry *next;
+	struct resource res;
+} *xen_phys_cpu_list;
 
 size_t vmcoreinfo_size_xen;
 unsigned long paddr_vmcoreinfo_xen;
 
+static int fill_crash_res(struct resource *res, unsigned int cpu)
+{
+	xen_kexec_range_t range = {
+		.range = KEXEC_RANGE_MA_CPU,
+		.nr = cpu
+	};
+	int rc = HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range);
+
+	if (!rc && !range.size)
+		rc = -ENODEV;
+	if (!rc) {
+		res->name = "Crash note";
+		res->start = range.start;
+		res->end = range.start + range.size - 1;
+		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+	}
+
+	return rc;
+}
+
+static struct resource *find_crash_res(const struct resource *r,
+				       unsigned int *idx)
+{
+	unsigned int i;
+	struct xen_phys_cpu_entry *ent;
+
+	for (i = 0; i < xen_max_nr_phys_cpus; ++i) {
+		struct resource *res = xen_phys_cpus + i;
+
+		if (res->parent && res->start == r->start
+		    && res->end == r->end) {
+			if (idx)
+				*idx = i;
+			return res;
+		}
+	}
+
+	for (ent = xen_phys_cpu_list; ent; ent = ent->next, ++i)
+		if (ent->res.parent && ent->res.start == r->start
+		    && ent->res.end == r->end) {
+			if (idx)
+				*idx = i;
+			return &ent->res;
+		}
+
+	return NULL;
+}
+
+static int kexec_cpu_callback(struct notifier_block *nfb,
+			      unsigned long action, void *hcpu)
+{
+	unsigned int i, cpu = (unsigned long)hcpu;
+	struct xen_phys_cpu_entry *ent;
+	struct resource *res = NULL, r;
+
+	if (xen_nr_phys_cpus < xen_max_nr_phys_cpus)
+		xen_nr_phys_cpus = xen_max_nr_phys_cpus;
+	switch (action) {
+	case CPU_ONLINE:
+		for (i = 0; i < xen_max_nr_phys_cpus; ++i)
+			if (!xen_phys_cpus[i].parent) {
+				res = xen_phys_cpus + i;
+				break;
+			}
+		if (!res)
+			for (ent = xen_phys_cpu_list; ent; ent = ent->next)
+				if (!ent->res.parent) {
+					res = &ent->res;
+					break;
+				}
+		if (!res) {
+			ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+			res = ent ? &ent->res : NULL;
+		} else
+			ent = NULL;
+		if (res && !fill_crash_res(res, cpu)
+		    && !machine_kexec_setup_resource(&xen_hypervisor_res,
+						     res)) {
+			if (ent) {
+				ent->next = xen_phys_cpu_list;
+				xen_phys_cpu_list = ent;
+				++xen_nr_phys_cpus;
+			}
+		} else {
+			pr_warn("Could not set up crash note for pCPU#%u\n",
+				cpu);
+			kfree(ent);
+		}
+		break;
+
+	case CPU_DEAD:
+		if (!fill_crash_res(&r, cpu))
+			res = find_crash_res(&r, NULL);
+		if (!res) {
+			unsigned long *map;
+			xen_platform_op_t op;
+
+			map = kcalloc(BITS_TO_LONGS(xen_nr_phys_cpus),
+				      sizeof(long), GFP_KERNEL);
+			if (!map)
+				break;
+
+			op.cmd = XENPF_get_cpuinfo;
+			op.u.pcpu_info.xen_cpuid = 0;
+			if (HYPERVISOR_platform_op(&op) == 0)
+				i = op.u.pcpu_info.max_present + 1;
+			else
+				i = xen_nr_phys_cpus;
+
+			for (cpu = 0; cpu < i; ++cpu) {
+				unsigned int idx;
+
+				if (fill_crash_res(&r, cpu))
+					continue;
+				if (find_crash_res(&r, &idx)) {
+					BUG_ON(idx >= xen_nr_phys_cpus);
+					__set_bit(idx, map);
+				}
+			}
+
+			for (i = 0; i < xen_max_nr_phys_cpus; ++i)
+				if (xen_phys_cpus[i].parent && !test_bit(i, map)) {
+					res = xen_phys_cpus + i;
+					break;
+				}
+			for (ent = xen_phys_cpu_list; !res && ent;
+			     ent = ent->next, ++i)
+				if (ent->res.parent && !test_bit(i, map))
+					res = &ent->res;
+			kfree(map);
+		}
+		if (res)
+			release_resource(res);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block kexec_cpu_notifier = {
+	.notifier_call = kexec_cpu_callback
+};
+
 void __init xen_machine_kexec_setup_resources(void)
 {
 	xen_kexec_range_t range;
 	xen_platform_op_t op;
-	struct resource *res;
 	unsigned int k = 0, nr = 0;
 	int rc;
 
@@ -79,21 +229,9 @@ void __init xen_machine_kexec_setup_reso
 
 	/* fill in xen_phys_cpus with per-cpu crash note information */
 
-	for (k = 0; k < xen_max_nr_phys_cpus; k++) {
-		memset(&range, 0, sizeof(range));
-		range.range = KEXEC_RANGE_MA_CPU;
-		range.nr = k;
-
-		if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range)
-		    || range.size == 0)
-			continue;
-
-		res = xen_phys_cpus + nr++;
-		res->name = "Crash note";
-		res->start = range.start;
-		res->end = range.start + range.size - 1;
-		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
-	}
+	for (k = 0; k < xen_max_nr_phys_cpus; k++)
+		if (!fill_crash_res(xen_phys_cpus + nr, k))
+			++nr;
 
 	if (nr == 0)
 		goto free;
@@ -149,7 +287,10 @@ void __init xen_machine_kexec_setup_reso
 		goto err;
 	}
 
-	xen_max_nr_phys_cpus = nr;
+	xen_nr_phys_cpus = nr;
+	rc = register_pcpu_notifier(&kexec_cpu_notifier);
+	if (rc)
+		pr_warn("kexec: pCPU notifier registration failed (%d)\n", rc);
 
 	return;
 
@@ -157,7 +298,7 @@ void __init xen_machine_kexec_setup_reso
 	free_bootmem(__pa(xen_phys_cpus),
 		     xen_max_nr_phys_cpus * sizeof(*xen_phys_cpus));
  err:
-	xen_max_nr_phys_cpus = 0;
+	xen_nr_phys_cpus = 0;
 }
 
 #ifndef CONFIG_X86
@@ -167,7 +308,7 @@ void __init xen_machine_kexec_register_r
 	struct resource *r;
 
 	request_resource(res, &xen_hypervisor_res);
-	for (k = 0; k < xen_max_nr_phys_cpus; k++) {
+	for (k = 0; k < xen_nr_phys_cpus; k++) {
 		r = xen_phys_cpus + k;
 		if (r->parent == NULL) /* out of xen_hypervisor_res range */
 			request_resource(res, r);
--- head.orig/drivers/xen/core/pcpu.c	2012-02-10 13:22:33.000000000 +0100
+++ head/drivers/xen/core/pcpu.c	2012-02-10 13:25:08.000000000 +0100
@@ -2,7 +2,6 @@
  * pcpu.c - management physical cpu in dom0 environment
  */
 #include <linux/acpi.h>
-#include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/interrupt.h>
@@ -12,6 +11,7 @@
 #include <asm/hypervisor.h>
 #include <xen/interface/platform.h>
 #include <xen/evtchn.h>
+#include <xen/pcpu.h>
 #include <acpi/processor.h>
 
 struct pcpu {
@@ -35,6 +35,44 @@ static DEFINE_MUTEX(xen_pcpu_lock);
 
 static LIST_HEAD(xen_pcpus);
 
+static BLOCKING_NOTIFIER_HEAD(pcpu_chain);
+
+static inline void *notifier_param(const struct pcpu *pcpu)
+{
+	return (void *)(unsigned long)pcpu->dev.id;
+}
+
+int register_pcpu_notifier(struct notifier_block *nb)
+{
+	int err;
+
+	get_pcpu_lock();
+
+	err = blocking_notifier_chain_register(&pcpu_chain, nb);
+
+	if (!err) {
+		struct pcpu *pcpu;
+
+		list_for_each_entry(pcpu, &xen_pcpus, pcpu_list)
+			if (xen_pcpu_online(pcpu->flags))
+				nb->notifier_call(nb, CPU_ONLINE,
+						  notifier_param(pcpu));
+	}
+
+	put_pcpu_lock();
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(register_pcpu_notifier);
+
+void unregister_pcpu_notifier(struct notifier_block *nb)
+{
+	get_pcpu_lock();
+	blocking_notifier_chain_unregister(&pcpu_chain, nb);
+	put_pcpu_lock();
+}
+EXPORT_SYMBOL_GPL(unregister_pcpu_notifier);
+
 static int xen_pcpu_down(uint32_t xen_id)
 {
 	xen_platform_op_t op;
@@ -150,12 +188,16 @@ static int xen_pcpu_online_check(struct 
 	if (xen_pcpu_online(info->flags) && !xen_pcpu_online(pcpu->flags)) {
 		/* the pcpu is onlined */
 		pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
+		blocking_notifier_call_chain(&pcpu_chain, CPU_ONLINE,
+					     notifier_param(pcpu));
 		kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE);
 		result = 1;
 	} else if (!xen_pcpu_online(info->flags) &&
 		   xen_pcpu_online(pcpu->flags))  {
 		/* The pcpu is offlined now */
 		pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE;
+		blocking_notifier_call_chain(&pcpu_chain, CPU_DEAD,
+					     notifier_param(pcpu));
 		kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE);
 		result = 1;
 	}
@@ -337,6 +379,8 @@ static irqreturn_t xen_pcpu_interrupt(in
 	return IRQ_HANDLED;
 }
 
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+
 int xen_pcpu_hotplug(int type)
 {
 	schedule_work(&xen_pcpu_work);
@@ -372,6 +416,8 @@ int xen_pcpu_index(uint32_t id, bool is_
 }
 EXPORT_SYMBOL_GPL(xen_pcpu_index);
 
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
 static int __init xen_pcpu_init(void)
 {
 	int err;
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ head/include/xen/pcpu.h	2011-12-05 13:25:56.000000000 +0100
@@ -0,0 +1,10 @@
+#ifndef _XEN_SYSCTL_H
+#define _XEN_SYSCTL_H
+
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+
+int register_pcpu_notifier(struct notifier_block *);
+void unregister_pcpu_notifier(struct notifier_block *);
+
+#endif /* _XEN_SYSCTL_H */