From: jbeulich@suse.com
Subject: kexec: properly handle pCPU hotplug
Patch-mainline: n/a
--- head.orig/arch/x86/kernel/machine_kexec_xen.c 2011-12-02 17:22:49.000000000 +0100
+++ head/arch/x86/kernel/machine_kexec_xen.c 2011-12-05 12:23:02.000000000 +0100
@@ -1,3 +1,14 @@
+int machine_kexec_setup_resource(struct resource *hypervisor,
+ struct resource *phys_cpu)
+{
+ /* The per-cpu crash note resources belong to the hypervisor resource */
+ insert_resource(hypervisor, phys_cpu);
+ if (!phys_cpu->parent) /* outside of hypervisor range */
+ insert_resource(&iomem_resource, phys_cpu);
+
+ return 0;
+}
+
int __init machine_kexec_setup_resources(struct resource *hypervisor,
struct resource *phys_cpus,
int nr_phys_cpus)
@@ -8,12 +19,8 @@ int __init machine_kexec_setup_resources
if (crashk_res.end > crashk_res.start)
insert_resource(&iomem_resource, &crashk_res);
- /* The per-cpu crash note resources belong to the hypervisor resource */
- for (k = 0; k < nr_phys_cpus; k++) {
- insert_resource(hypervisor, phys_cpus + k);
- if (!phys_cpus[k].parent) /* outside of hypervisor range */
- insert_resource(&iomem_resource, phys_cpus + k);
- }
+ for (k = 0; k < nr_phys_cpus; k++)
+ machine_kexec_setup_resource(hypervisor, phys_cpus + k);
return xen_create_contiguous_region((unsigned long)&vmcoreinfo_note,
get_order(sizeof(vmcoreinfo_note)),
--- head.orig/drivers/xen/core/Makefile 2012-02-17 14:37:35.000000000 +0100
+++ head/drivers/xen/core/Makefile 2012-02-17 14:37:50.000000000 +0100
@@ -4,8 +4,7 @@
obj-y := evtchn.o gnttab.o reboot.o machine_reboot.o
-obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += firmware.o
-obj-$(CONFIG_ACPI_HOTPLUG_CPU) += pcpu.o
+obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += firmware.o pcpu.o
obj-$(CONFIG_PROC_FS) += xen_proc.o
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o
--- head.orig/drivers/xen/core/machine_kexec.c 2011-12-21 11:09:57.000000000 +0100
+++ head/drivers/xen/core/machine_kexec.c 2011-12-07 14:28:59.000000000 +0100
@@ -4,31 +4,181 @@
*/
#include <linux/kexec.h>
+#include <linux/slab.h>
#include <xen/interface/kexec.h>
#include <xen/interface/platform.h>
#include <linux/reboot.h>
#include <linux/mm.h>
#include <linux/bootmem.h>
+#include <xen/pcpu.h>
extern void machine_kexec_setup_load_arg(xen_kexec_image_t *xki,
struct kimage *image);
extern int machine_kexec_setup_resources(struct resource *hypervisor,
struct resource *phys_cpus,
int nr_phys_cpus);
+extern int machine_kexec_setup_resource(struct resource *hypervisor,
+ struct resource *phys_cpu);
extern void machine_kexec_register_resources(struct resource *res);
-static int __initdata xen_max_nr_phys_cpus;
+static unsigned int xen_nr_phys_cpus, xen_max_nr_phys_cpus;
static struct resource xen_hypervisor_res;
-static struct resource *__initdata xen_phys_cpus;
+static struct resource *xen_phys_cpus;
+static struct xen_phys_cpu_entry {
+ struct xen_phys_cpu_entry *next;
+ struct resource res;
+} *xen_phys_cpu_list;
size_t vmcoreinfo_size_xen;
unsigned long paddr_vmcoreinfo_xen;
+static int fill_crash_res(struct resource *res, unsigned int cpu)
+{
+ xen_kexec_range_t range = {
+ .range = KEXEC_RANGE_MA_CPU,
+ .nr = cpu
+ };
+ int rc = HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range);
+
+ if (!rc && !range.size)
+ rc = -ENODEV;
+ if (!rc) {
+ res->name = "Crash note";
+ res->start = range.start;
+ res->end = range.start + range.size - 1;
+ res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ }
+
+ return rc;
+}
+
+static struct resource *find_crash_res(const struct resource *r,
+ unsigned int *idx)
+{
+ unsigned int i;
+ struct xen_phys_cpu_entry *ent;
+
+ for (i = 0; i < xen_max_nr_phys_cpus; ++i) {
+ struct resource *res = xen_phys_cpus + i;
+
+ if (res->parent && res->start == r->start
+ && res->end == r->end) {
+ if (idx)
+ *idx = i;
+ return res;
+ }
+ }
+
+ for (ent = xen_phys_cpu_list; ent; ent = ent->next, ++i)
+ if (ent->res.parent && ent->res.start == r->start
+ && ent->res.end == r->end) {
+ if (idx)
+ *idx = i;
+ return &ent->res;
+ }
+
+ return NULL;
+}
+
+static int kexec_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ unsigned int i, cpu = (unsigned long)hcpu;
+ struct xen_phys_cpu_entry *ent;
+ struct resource *res = NULL, r;
+
+ if (xen_nr_phys_cpus < xen_max_nr_phys_cpus)
+ xen_nr_phys_cpus = xen_max_nr_phys_cpus;
+ switch (action) {
+ case CPU_ONLINE:
+ for (i = 0; i < xen_max_nr_phys_cpus; ++i)
+ if (!xen_phys_cpus[i].parent) {
+ res = xen_phys_cpus + i;
+ break;
+ }
+ if (!res)
+ for (ent = xen_phys_cpu_list; ent; ent = ent->next)
+ if (!ent->res.parent) {
+ res = &ent->res;
+ break;
+ }
+ if (!res) {
+ ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+ res = ent ? &ent->res : NULL;
+ } else
+ ent = NULL;
+ if (res && !fill_crash_res(res, cpu)
+ && !machine_kexec_setup_resource(&xen_hypervisor_res,
+ res)) {
+ if (ent) {
+ ent->next = xen_phys_cpu_list;
+ xen_phys_cpu_list = ent;
+ ++xen_nr_phys_cpus;
+ }
+ } else {
+ pr_warn("Could not set up crash note for pCPU#%u\n",
+ cpu);
+ kfree(ent);
+ }
+ break;
+
+ case CPU_DEAD:
+ if (!fill_crash_res(&r, cpu))
+ res = find_crash_res(&r, NULL);
+ if (!res) {
+ unsigned long *map;
+ xen_platform_op_t op;
+
+ map = kcalloc(BITS_TO_LONGS(xen_nr_phys_cpus),
+ sizeof(long), GFP_KERNEL);
+ if (!map)
+ break;
+
+ op.cmd = XENPF_get_cpuinfo;
+ op.u.pcpu_info.xen_cpuid = 0;
+ if (HYPERVISOR_platform_op(&op) == 0)
+ i = op.u.pcpu_info.max_present + 1;
+ else
+ i = xen_nr_phys_cpus;
+
+ for (cpu = 0; cpu < i; ++cpu) {
+ unsigned int idx;
+
+ if (fill_crash_res(&r, cpu))
+ continue;
+ if (find_crash_res(&r, &idx)) {
+ BUG_ON(idx >= xen_nr_phys_cpus);
+ __set_bit(idx, map);
+ }
+ }
+
+ for (i = 0; i < xen_max_nr_phys_cpus; ++i)
+ if (xen_phys_cpus[i].parent && !test_bit(i, map)) {
+ res = xen_phys_cpus + i;
+ break;
+ }
+ for (ent = xen_phys_cpu_list; !res && ent;
+ ent = ent->next, ++i)
+ if (ent->res.parent && !test_bit(i, map))
+ res = &ent->res;
+ kfree(map);
+ }
+ if (res)
+ release_resource(res);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block kexec_cpu_notifier = {
+ .notifier_call = kexec_cpu_callback
+};
+
void __init xen_machine_kexec_setup_resources(void)
{
xen_kexec_range_t range;
xen_platform_op_t op;
- struct resource *res;
unsigned int k = 0, nr = 0;
int rc;
@@ -79,21 +229,9 @@ void __init xen_machine_kexec_setup_reso
/* fill in xen_phys_cpus with per-cpu crash note information */
- for (k = 0; k < xen_max_nr_phys_cpus; k++) {
- memset(&range, 0, sizeof(range));
- range.range = KEXEC_RANGE_MA_CPU;
- range.nr = k;
-
- if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range)
- || range.size == 0)
- continue;
-
- res = xen_phys_cpus + nr++;
- res->name = "Crash note";
- res->start = range.start;
- res->end = range.start + range.size - 1;
- res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
- }
+ for (k = 0; k < xen_max_nr_phys_cpus; k++)
+ if (!fill_crash_res(xen_phys_cpus + nr, k))
+ ++nr;
if (nr == 0)
goto free;
@@ -149,7 +287,10 @@ void __init xen_machine_kexec_setup_reso
goto err;
}
- xen_max_nr_phys_cpus = nr;
+ xen_nr_phys_cpus = nr;
+ rc = register_pcpu_notifier(&kexec_cpu_notifier);
+ if (rc)
+ pr_warn("kexec: pCPU notifier registration failed (%d)\n", rc);
return;
@@ -157,7 +298,7 @@ void __init xen_machine_kexec_setup_reso
free_bootmem(__pa(xen_phys_cpus),
xen_max_nr_phys_cpus * sizeof(*xen_phys_cpus));
err:
- xen_max_nr_phys_cpus = 0;
+ xen_nr_phys_cpus = 0;
}
#ifndef CONFIG_X86
@@ -167,7 +308,7 @@ void __init xen_machine_kexec_register_r
struct resource *r;
request_resource(res, &xen_hypervisor_res);
- for (k = 0; k < xen_max_nr_phys_cpus; k++) {
+ for (k = 0; k < xen_nr_phys_cpus; k++) {
r = xen_phys_cpus + k;
if (r->parent == NULL) /* out of xen_hypervisor_res range */
request_resource(res, r);
--- head.orig/drivers/xen/core/pcpu.c 2012-02-10 13:22:33.000000000 +0100
+++ head/drivers/xen/core/pcpu.c 2012-02-10 13:25:08.000000000 +0100
@@ -2,7 +2,6 @@
* pcpu.c - management physical cpu in dom0 environment
*/
#include <linux/acpi.h>
-#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/export.h>
#include <linux/interrupt.h>
@@ -12,6 +11,7 @@
#include <asm/hypervisor.h>
#include <xen/interface/platform.h>
#include <xen/evtchn.h>
+#include <xen/pcpu.h>
#include <acpi/processor.h>
struct pcpu {
@@ -35,6 +35,44 @@ static DEFINE_MUTEX(xen_pcpu_lock);
static LIST_HEAD(xen_pcpus);
+static BLOCKING_NOTIFIER_HEAD(pcpu_chain);
+
+static inline void *notifier_param(const struct pcpu *pcpu)
+{
+ return (void *)(unsigned long)pcpu->dev.id;
+}
+
+int register_pcpu_notifier(struct notifier_block *nb)
+{
+ int err;
+
+ get_pcpu_lock();
+
+ err = blocking_notifier_chain_register(&pcpu_chain, nb);
+
+ if (!err) {
+ struct pcpu *pcpu;
+
+ list_for_each_entry(pcpu, &xen_pcpus, pcpu_list)
+ if (xen_pcpu_online(pcpu->flags))
+ nb->notifier_call(nb, CPU_ONLINE,
+ notifier_param(pcpu));
+ }
+
+ put_pcpu_lock();
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(register_pcpu_notifier);
+
+void unregister_pcpu_notifier(struct notifier_block *nb)
+{
+ get_pcpu_lock();
+ blocking_notifier_chain_unregister(&pcpu_chain, nb);
+ put_pcpu_lock();
+}
+EXPORT_SYMBOL_GPL(unregister_pcpu_notifier);
+
static int xen_pcpu_down(uint32_t xen_id)
{
xen_platform_op_t op;
@@ -150,12 +188,16 @@ static int xen_pcpu_online_check(struct
if (xen_pcpu_online(info->flags) && !xen_pcpu_online(pcpu->flags)) {
/* the pcpu is onlined */
pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
+ blocking_notifier_call_chain(&pcpu_chain, CPU_ONLINE,
+ notifier_param(pcpu));
kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE);
result = 1;
} else if (!xen_pcpu_online(info->flags) &&
xen_pcpu_online(pcpu->flags)) {
/* The pcpu is offlined now */
pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE;
+ blocking_notifier_call_chain(&pcpu_chain, CPU_DEAD,
+ notifier_param(pcpu));
kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE);
result = 1;
}
@@ -337,6 +379,8 @@ static irqreturn_t xen_pcpu_interrupt(in
return IRQ_HANDLED;
}
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+
int xen_pcpu_hotplug(int type)
{
schedule_work(&xen_pcpu_work);
@@ -372,6 +416,8 @@ int xen_pcpu_index(uint32_t id, bool is_
}
EXPORT_SYMBOL_GPL(xen_pcpu_index);
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
static int __init xen_pcpu_init(void)
{
int err;
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ head/include/xen/pcpu.h 2011-12-05 13:25:56.000000000 +0100
@@ -0,0 +1,10 @@
+#ifndef _XEN_SYSCTL_H
+#define _XEN_SYSCTL_H
+
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+
+int register_pcpu_notifier(struct notifier_block *);
+void unregister_pcpu_notifier(struct notifier_block *);
+
+#endif /* _XEN_SYSCTL_H */