Blob Blame History Raw
From: jbeulich@suse.com
Subject: x86: pCPU handling
Patch-mainline: n/a

- make microcode loader pCPU hotplug aware
- introduce {rd,wr}msr_safe_on_pcpu() and add/enable users

--- head.orig/arch/x86/Kconfig	2012-04-11 17:04:26.000000000 +0200
+++ head/arch/x86/Kconfig	2012-04-11 17:14:18.000000000 +0200
@@ -1060,6 +1060,7 @@ config MICROCODE_OLD_INTERFACE
 
 config X86_MSR
 	tristate "/dev/cpu/*/msr - Model-specific register support"
+	select XEN_DOMCTL if XEN_PRIVILEGED_GUEST
 	---help---
 	  This device gives privileged processes access to the x86
 	  Model-Specific Registers (MSRs).  It is a character device with
--- head.orig/arch/x86/kernel/microcode_core-xen.c	2012-04-11 13:26:23.000000000 +0200
+++ head/arch/x86/kernel/microcode_core-xen.c	2012-04-11 17:14:21.000000000 +0200
@@ -41,6 +41,8 @@
 #include <asm/processor.h>
 #include <asm/cpu_device_id.h>
 
+#include <xen/pcpu.h>
+
 MODULE_DESCRIPTION("Microcode Update Driver");
 MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
 MODULE_LICENSE("GPL");
@@ -184,6 +186,54 @@ static int request_microcode(const char 
 	return error;
 }
 
+static const char amd_uc_name[] = "amd-ucode/microcode_amd.bin";
+static const char amd_uc_fmt[] = "amd-ucode/microcode_amd_fam%x.bin";
+static const char intel_uc_fmt[] = "intel-ucode/%02x-%02x-%02x";
+
+static int ucode_cpu_callback(struct notifier_block *nfb,
+			      unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct xen_platform_op op;
+	char buf[36];
+	const char *uc_name = buf;
+
+	switch (action) {
+	case CPU_ONLINE:
+		op.cmd = XENPF_get_cpu_version;
+		op.u.pcpu_version.xen_cpuid = cpu;
+		if (HYPERVISOR_platform_op(&op))
+			break;
+		if (op.u.pcpu_version.family == boot_cpu_data.x86
+		    && op.u.pcpu_version.model == boot_cpu_data.x86_model
+		    && op.u.pcpu_version.stepping == boot_cpu_data.x86_mask)
+			break;
+		if (strncmp(op.u.pcpu_version.vendor_id,
+			    "GenuineIntel", 12) == 0)
+			snprintf(buf, sizeof(buf), intel_uc_fmt,
+				 op.u.pcpu_version.family,
+				 op.u.pcpu_version.model,
+				 op.u.pcpu_version.stepping);
+		else if (strncmp(op.u.pcpu_version.vendor_id,
+				 "AuthenicAMD", 12) == 0) {
+			if (op.u.pcpu_version.family >= 0x15)
+				snprintf(buf, sizeof(buf), amd_uc_fmt,
+					 op.u.pcpu_version.family);
+			else
+				uc_name = amd_uc_name;
+		} else
+			break;
+		request_microcode(uc_name);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block ucode_cpu_notifier = {
+	.notifier_call = ucode_cpu_callback
+};
+
 #ifdef MODULE
 /* Autoload on Intel and AMD systems */
 static const struct x86_cpu_id microcode_id[] = {
@@ -206,15 +256,13 @@ static int __init microcode_init(void)
 	int error;
 
 	if (c->x86_vendor == X86_VENDOR_INTEL)
-		snprintf(buf, sizeof(buf), "intel-ucode/%02x-%02x-%02x",
+		snprintf(buf, sizeof(buf), intel_uc_fmt,
 			 c->x86, c->x86_model, c->x86_mask);
 	else if (c->x86_vendor == X86_VENDOR_AMD) {
 		if (c->x86 >= 0x15)
-			snprintf(buf, sizeof(buf),
-				 "amd-ucode/microcode_amd_fam%xh.bin",
-				 c->x86);
+			snprintf(buf, sizeof(buf), amd_uc_fmt, c->x86);
 		else
-			fw_name = "amd-ucode/microcode_amd.bin";
+			fw_name = amd_uc_name;
 	} else {
 		pr_err("no support for this CPU vendor\n");
 		return -ENODEV;
@@ -236,12 +284,17 @@ static int __init microcode_init(void)
 	pr_info("Microcode Update Driver: v" MICROCODE_VERSION
 		" <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
 
+	error = register_pcpu_notifier(&ucode_cpu_notifier);
+	if (error)
+		pr_warn("pCPU notifier registration failed (%d)\n", error);
+
 	return 0;
 }
 module_init(microcode_init);
 
 static void __exit microcode_exit(void)
 {
+	unregister_pcpu_notifier(&ucode_cpu_notifier);
 	microcode_dev_exit();
 	platform_device_unregister(microcode_pdev);
 
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ head/arch/x86/kernel/msr-xen.c	2011-02-03 14:42:26.000000000 +0100
@@ -0,0 +1,337 @@
+#ifndef CONFIG_XEN_PRIVILEGED_GUEST
+#include "msr.c"
+#else
+/* ----------------------------------------------------------------------- *
+ *
+ *   Copyright 2010 Novell, Inc.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * x86 MSR access device
+ *
+ * This device is accessed by lseek() to the appropriate register number
+ * and then read/write in chunks of 8 bytes.  A larger size means multiple
+ * reads or writes of the same register.
+ *
+ * This driver uses /dev/xen/cpu/%d/msr where %d correlates to the minor
+ * number, and on an SMP box will direct the access to pCPU %d.
+ */
+
+static int msr_init(void);
+static void msr_exit(void);
+
+#define msr_init(args...) _msr_init(args)
+#define msr_exit(args...) _msr_exit(args)
+#include "msr.c"
+#undef msr_exit
+#undef msr_init
+
+#include <linux/slab.h>
+#include <xen/pcpu.h>
+
+static struct class *pmsr_class;
+static unsigned int minor_bias = 10;
+static unsigned int nr_xen_cpu_ids;
+static unsigned long *xen_cpu_online_map;
+
+#define PMSR_DEV(cpu) MKDEV(MSR_MAJOR, (cpu) + minor_bias)
+
+static unsigned int pmsr_minor(struct inode *inode)
+{
+	return iminor(inode) - minor_bias;
+}
+
+static ssize_t pmsr_read(struct file *file, char __user *buf,
+			 size_t count, loff_t *ppos)
+{
+	u32 __user *tmp = (u32 __user *) buf;
+	u32 data[2];
+	u32 reg = *ppos;
+	unsigned int cpu = pmsr_minor(file->f_path.dentry->d_inode);
+	int err = 0;
+	ssize_t bytes = 0;
+
+	if (count % 8)
+		return -EINVAL;	/* Invalid chunk size */
+
+	for (; count; count -= 8) {
+		err = rdmsr_safe_on_pcpu(cpu, reg, &data[0], &data[1]);
+		if (err)
+			break;
+		if (copy_to_user(tmp, &data, 8)) {
+			err = -EFAULT;
+			break;
+		}
+		tmp += 2;
+		bytes += 8;
+	}
+
+	return bytes ? bytes : err;
+}
+
+static ssize_t pmsr_write(struct file *file, const char __user *buf,
+			  size_t count, loff_t *ppos)
+{
+	const u32 __user *tmp = (const u32 __user *)buf;
+	u32 data[2];
+	u32 reg = *ppos;
+	unsigned int cpu = pmsr_minor(file->f_path.dentry->d_inode);
+	int err = 0;
+	ssize_t bytes = 0;
+
+	if (count % 8)
+		return -EINVAL;	/* Invalid chunk size */
+
+	for (; count; count -= 8) {
+		if (copy_from_user(&data, tmp, 8)) {
+			err = -EFAULT;
+			break;
+		}
+		err = wrmsr_safe_on_pcpu(cpu, reg, data[0], data[1]);
+		if (err)
+			break;
+		tmp += 2;
+		bytes += 8;
+	}
+
+	return bytes ? bytes : err;
+}
+
+static long pmsr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
+{
+	u32 __user *uregs = (u32 __user *)arg;
+	u32 regs[8];
+	unsigned int cpu = pmsr_minor(file->f_path.dentry->d_inode);
+	int err;
+
+	switch (ioc) {
+	case X86_IOC_RDMSR_REGS:
+		if (!(file->f_mode & FMODE_READ)) {
+			err = -EBADF;
+			break;
+		}
+		if (copy_from_user(&regs, uregs, sizeof regs)) {
+			err = -EFAULT;
+			break;
+		}
+		err = rdmsr_safe_regs_on_pcpu(cpu, regs);
+		if (err)
+			break;
+		if (copy_to_user(uregs, &regs, sizeof regs))
+			err = -EFAULT;
+		break;
+
+	case X86_IOC_WRMSR_REGS:
+		if (!(file->f_mode & FMODE_WRITE)) {
+			err = -EBADF;
+			break;
+		}
+		if (copy_from_user(&regs, uregs, sizeof regs)) {
+			err = -EFAULT;
+			break;
+		}
+		err = wrmsr_safe_regs_on_pcpu(cpu, regs);
+		if (err)
+			break;
+		if (copy_to_user(uregs, &regs, sizeof regs))
+			err = -EFAULT;
+		break;
+
+	default:
+		err = -ENOTTY;
+		break;
+	}
+
+	return err;
+}
+
+static int pmsr_open(struct inode *inode, struct file *file)
+{
+	unsigned int cpu;
+
+	cpu = pmsr_minor(file->f_path.dentry->d_inode);
+	if (cpu >= nr_xen_cpu_ids || !test_bit(cpu, xen_cpu_online_map))
+		return -ENXIO;	/* No such CPU */
+
+	return 0;
+}
+
+/*
+ * File operations we support
+ */
+static const struct file_operations pmsr_fops = {
+	.owner = THIS_MODULE,
+	.llseek = msr_seek,
+	.read = pmsr_read,
+	.write = pmsr_write,
+	.open = pmsr_open,
+	.unlocked_ioctl = pmsr_ioctl,
+	.compat_ioctl = pmsr_ioctl,
+};
+
+static int pmsr_device_create(unsigned int cpu)
+{
+	struct device *dev;
+
+	if (cpu >= nr_xen_cpu_ids) {
+		static bool warned;
+		unsigned long *map;
+
+		if ((minor_bias + cpu) >> MINORBITS) {
+			if (!warned) {
+				warned = true;
+				pr_warn("Physical MSRs of CPUs beyond %u"
+					" will not be accessible\n",
+					MINORMASK - minor_bias);
+			}
+			return -EDOM;
+		}
+
+		map = kcalloc(BITS_TO_LONGS(cpu + 1), sizeof(*map),
+			      GFP_KERNEL);
+		if (!map) {
+			if (!warned) {
+				warned = true;
+				pr_warn("Physical MSRs of CPUs beyond %u"
+					" may not be accessible\n",
+					nr_xen_cpu_ids - 1);
+			}
+			return -ENOMEM;
+		}
+
+		memcpy(map, xen_cpu_online_map,
+		       BITS_TO_LONGS(nr_xen_cpu_ids)
+		       * sizeof(*xen_cpu_online_map));
+		nr_xen_cpu_ids = min_t(unsigned int,
+				     BITS_TO_LONGS(cpu + 1) * BITS_PER_LONG,
+				     MINORMASK + 1 - minor_bias);
+		kfree(xchg(&xen_cpu_online_map, map));
+	}
+	set_bit(cpu, xen_cpu_online_map);
+	dev = device_create(pmsr_class, NULL, PMSR_DEV(cpu), NULL,
+			    "pmsr%d", cpu);
+	return IS_ERR(dev) ? PTR_ERR(dev) : 0;
+}
+
+static void pmsr_device_destroy(unsigned int cpu)
+{
+	clear_bit(cpu, xen_cpu_online_map);
+	device_destroy(pmsr_class, PMSR_DEV(cpu));
+}
+
+static int pmsr_cpu_callback(struct notifier_block *nfb,
+			     unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+		pmsr_device_create(cpu);
+		break;
+	case CPU_DEAD:
+		pmsr_device_destroy(cpu);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block pmsr_cpu_notifier = {
+	.notifier_call = pmsr_cpu_callback,
+};
+
+static char *pmsr_devnode(struct device *dev, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "xen/cpu/%u/msr",
+			 MINOR(dev->devt) - minor_bias);
+}
+
+static int __init msr_init(void)
+{
+	int err;
+	xen_platform_op_t op;
+
+	err = _msr_init();
+	if (err || !is_initial_xendomain())
+		return err;
+
+	op.cmd = XENPF_get_cpuinfo;
+	op.u.pcpu_info.xen_cpuid = 0;
+	do {
+		err = HYPERVISOR_platform_op(&op);
+	} while (err == -EBUSY);
+	if (err)
+		goto out;
+	nr_xen_cpu_ids = BITS_TO_LONGS(op.u.pcpu_info.max_present + 1)
+			 * BITS_PER_LONG;
+
+	while (minor_bias < NR_CPUS)
+		minor_bias *= 10;
+	if ((minor_bias + nr_xen_cpu_ids - 1) >> MINORBITS)
+		minor_bias = NR_CPUS;
+	if ((minor_bias + nr_xen_cpu_ids - 1) >> MINORBITS)
+		nr_xen_cpu_ids = MINORMASK + 1 - NR_CPUS;
+
+	xen_cpu_online_map = kcalloc(BITS_TO_LONGS(nr_xen_cpu_ids),
+				     sizeof(*xen_cpu_online_map),
+				     GFP_KERNEL);
+	if (!xen_cpu_online_map) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (__register_chrdev(MSR_MAJOR, minor_bias,
+			      MINORMASK + 1 - minor_bias,
+			      "pcpu/msr", &pmsr_fops)) {
+		pr_err("msr: unable to get minors for pmsr\n");
+		goto out;
+	}
+	pmsr_class = class_create(THIS_MODULE, "pmsr");
+	if (IS_ERR(pmsr_class)) {
+		err = PTR_ERR(pmsr_class);
+		goto out_chrdev;
+	}
+	pmsr_class->devnode = pmsr_devnode;
+	err = register_pcpu_notifier(&pmsr_cpu_notifier);
+
+	if (!err && !nr_xen_cpu_ids)
+		err = -ENODEV;
+	if (!err)
+		return 0;
+
+	class_destroy(pmsr_class);
+
+out_chrdev:
+	__unregister_chrdev(MSR_MAJOR, minor_bias,
+			    MINORMASK + 1 - minor_bias, "pcpu/msr");
+out:
+	if (err)
+		pr_warn("msr: can't initialize physical MSR access (%d)\n",
+			err);
+	nr_xen_cpu_ids = 0;
+	kfree(xen_cpu_online_map);
+	return 0;
+}
+
+static void __exit msr_exit(void)
+{
+	if (nr_xen_cpu_ids) {
+		unsigned int cpu = 0;
+
+		unregister_pcpu_notifier(&pmsr_cpu_notifier);
+		for_each_set_bit(cpu, xen_cpu_online_map, nr_xen_cpu_ids)
+			pmsr_device_destroy(cpu);
+		class_destroy(pmsr_class);
+		__unregister_chrdev(MSR_MAJOR, minor_bias,
+				    MINORMASK + 1 - minor_bias, "pcpu/msr");
+		kfree(xen_cpu_online_map);
+	}
+	_msr_exit();
+}
+#endif /* CONFIG_XEN_PRIVILEGED_GUEST */
--- head.orig/drivers/hwmon/Kconfig	2012-04-10 17:02:11.000000000 +0200
+++ head/drivers/hwmon/Kconfig	2012-04-11 17:14:08.000000000 +0200
@@ -435,7 +435,8 @@ config SENSORS_GPIO_FAN
 
 config SENSORS_CORETEMP
 	tristate "Intel Core/Core2/Atom temperature sensor"
-	depends on X86 && PCI && !XEN && EXPERIMENTAL
+	depends on X86 && PCI && !XEN_UNPRIVILEGED_GUEST && EXPERIMENTAL
+	select XEN_DOMCTL if XEN
 	help
 	  If you say yes here you get support for the temperature
 	  sensor inside your CPU. Most of the family 6 CPUs
@@ -1154,8 +1155,9 @@ config SENSORS_TWL4030_MADC
 
 config SENSORS_VIA_CPUTEMP
 	tristate "VIA CPU temperature sensor"
-	depends on X86 && !XEN
+	depends on X86 && !XEN_UNPRIVILEGED_GUEST
 	select HWMON_VID
+	select XEN_DOMCTL if XEN
 	help
 	  If you say yes here you get support for the temperature
 	  sensor inside your CPU. Supported are all known variants of
--- head.orig/drivers/xen/core/Makefile	2012-02-17 14:38:43.000000000 +0100
+++ head/drivers/xen/core/Makefile	2012-02-17 14:39:21.000000000 +0100
@@ -13,4 +13,4 @@ obj-$(CONFIG_KEXEC)		+= machine_kexec.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o
 obj-$(CONFIG_XEN_DOMCTL)	+= domctl.o
 CFLAGS_domctl.o			:= -D__XEN_PUBLIC_XEN_H__ -D__XEN_PUBLIC_GRANT_TABLE_H__
-CFLAGS_domctl.o			+= -D__XEN_TOOLS__ -imacros xen/interface/domctl.h
+CFLAGS_domctl.o			+= -D__XEN_TOOLS__ -imacros xen/interface/domctl.h -imacros xen/interface/sysctl.h
--- head.orig/drivers/xen/core/domctl.c	2011-10-04 17:03:34.000000000 +0200
+++ head/drivers/xen/core/domctl.c	2011-11-10 08:45:48.000000000 +0100
@@ -93,6 +93,113 @@ union xen_domctl {
 	} v5, v6, v7, v8;
 };
 
+struct xen_sysctl_physinfo_v6 {
+	uint32_t threads_per_core;
+	uint32_t cores_per_socket;
+	uint32_t nr_cpus;
+	uint32_t nr_nodes;
+	uint32_t cpu_khz;
+	uint64_aligned_t total_pages;
+	uint64_aligned_t free_pages;
+	uint64_aligned_t scrub_pages;
+	uint32_t hw_cap[8];
+	uint32_t max_cpu_id;
+	union {
+		XEN_GUEST_HANDLE(uint32) cpu_to_node;
+		uint64_aligned_t _ctn_align;
+	};
+	uint32_t capabilities;
+};
+
+struct xen_sysctl_physinfo_v7 {
+	uint32_t threads_per_core;
+	uint32_t cores_per_socket;
+	uint32_t nr_cpus;
+	uint32_t max_node_id;
+	uint32_t cpu_khz;
+	uint64_aligned_t total_pages;
+	uint64_aligned_t free_pages;
+	uint64_aligned_t scrub_pages;
+	uint32_t hw_cap[8];
+	uint32_t max_cpu_id;
+	union {
+		XEN_GUEST_HANDLE(uint32) cpu_to_node;
+		uint64_aligned_t _ctn_align;
+	};
+	uint32_t capabilities;
+};
+
+#define XEN_SYSCTL_pm_op_get_cputopo 0x20
+struct xen_get_cputopo_v6 {
+	uint32_t max_cpus;
+	union {
+		XEN_GUEST_HANDLE(uint32) cpu_to_core;
+		uint64_aligned_t _ctc_align;
+	};
+	union {
+		XEN_GUEST_HANDLE(uint32) cpu_to_socket;
+		uint64_aligned_t _cts_align;
+	};
+	uint32_t nr_cpus;
+};
+
+struct xen_sysctl_pm_op_v6 {
+	uint32_t cmd;
+	uint32_t cpuid;
+	union {
+		struct xen_get_cputopo_v6 get_topo;
+	};
+};
+#define xen_sysctl_pm_op_v7 xen_sysctl_pm_op_v6
+
+struct xen_sysctl_topologyinfo_v8 {
+	uint32_t max_cpu_index;
+	union {
+		XEN_GUEST_HANDLE(uint32) cpu_to_core;
+		uint64_aligned_t _ctc_align;
+	};
+	union {
+		XEN_GUEST_HANDLE(uint32) cpu_to_socket;
+		uint64_aligned_t _cts_align;
+	};
+	union {
+		XEN_GUEST_HANDLE(uint32) cpu_to_node;
+		uint64_aligned_t _ctn_align;
+	};
+};
+
+union xen_sysctl {
+	/* v6: Xen 3.4.x */
+	struct {
+		uint32_t cmd;
+		uint32_t interface_version;
+		union {
+			struct xen_sysctl_physinfo_v6 physinfo;
+			struct xen_sysctl_pm_op_v6 pm_op;
+		};
+	} v6;
+	/* v7: Xen 4.0.x */
+	struct {
+		uint32_t cmd;
+		uint32_t interface_version;
+		union {
+			struct xen_sysctl_physinfo_v7 physinfo;
+			struct xen_sysctl_pm_op_v7 pm_op;
+		};
+	} v7;
+	/*
+	 * v8: Xen 4.1.x
+	 * v9: Xen 4.2+
+	 */
+	struct {
+		uint32_t cmd;
+		uint32_t interface_version;
+		union {
+			struct xen_sysctl_topologyinfo_v8 topologyinfo;
+		};
+	} v8, v9;
+};
+
 /* The actual code comes here */
 
 static inline int hypervisor_domctl(void *domctl)
@@ -100,6 +207,11 @@ static inline int hypervisor_domctl(void
 	return _hypercall1(int, domctl, domctl);
 }
 
+static inline int hypervisor_sysctl(void *sysctl)
+{
+	return _hypercall1(int, sysctl, sysctl);
+}
+
 int xen_guest_address_size(int domid)
 {
 	union xen_domctl domctl;
@@ -275,6 +387,176 @@ int xen_set_physical_cpu_affinity(int pc
 }
 EXPORT_SYMBOL_GPL(xen_set_physical_cpu_affinity);
 
+int xen_get_topology_info(unsigned int cpu, u32 *core, u32 *sock, u32 *node)
+{
+	union xen_sysctl sysctl;
+	uint32_t *cores = NULL, *socks = NULL, *nodes = NULL;
+	unsigned int nr;
+	int rc;
+
+	if (core)
+		cores = kmalloc((cpu + 1) * sizeof(*cores), GFP_KERNEL);
+	if (sock)
+		socks = kmalloc((cpu + 1) * sizeof(*socks), GFP_KERNEL);
+	if (node)
+		nodes = kmalloc((cpu + 1) * sizeof(*nodes), GFP_KERNEL);
+	if ((core && !cores) || (sock && !socks) || (node && !nodes)) {
+		kfree(cores);
+		kfree(socks);
+		kfree(nodes);
+		return -ENOMEM;
+	}
+
+#define topologyinfo(ver) do {						\
+	memset(&sysctl, 0, sizeof(sysctl));				\
+	sysctl.v##ver.cmd = XEN_SYSCTL_topologyinfo;			\
+	sysctl.v##ver.interface_version = ver;				\
+	sysctl.v##ver.topologyinfo.max_cpu_index = cpu;			\
+	set_xen_guest_handle(sysctl.v##ver.topologyinfo.cpu_to_core,	\
+			     cores);					\
+	set_xen_guest_handle(sysctl.v##ver.topologyinfo.cpu_to_socket,	\
+			     socks);					\
+	set_xen_guest_handle(sysctl.v##ver.topologyinfo.cpu_to_node,	\
+			     nodes);					\
+	rc = hypervisor_sysctl(&sysctl);				\
+	nr = sysctl.v##ver.topologyinfo.max_cpu_index + 1;		\
+} while (0)
+
+	BUILD_BUG_ON(XEN_SYSCTL_INTERFACE_VERSION > 9);
+	topologyinfo(9);
+#if CONFIG_XEN_COMPAT < 0x040200
+	if (rc)
+		topologyinfo(8);
+#endif
+
+#if CONFIG_XEN_COMPAT < 0x040100
+#define pm_op_cputopo(ver) do {						\
+	memset(&sysctl, 0, sizeof(sysctl));				\
+	sysctl.v##ver.cmd = XEN_SYSCTL_pm_op;				\
+	sysctl.v##ver.interface_version = ver;				\
+	sysctl.v##ver.pm_op.cmd = XEN_SYSCTL_pm_op_get_cputopo;		\
+	sysctl.v##ver.pm_op.cpuid = 0;					\
+	sysctl.v##ver.pm_op.get_topo.max_cpus = cpu + 1;		\
+	set_xen_guest_handle(sysctl.v##ver.pm_op.get_topo.cpu_to_core,	\
+			     cores);					\
+	set_xen_guest_handle(sysctl.v##ver.pm_op.get_topo.cpu_to_socket,\
+			     socks);					\
+	rc = hypervisor_sysctl(&sysctl);				\
+	memset(&sysctl, 0, sizeof(sysctl));				\
+	sysctl.v##ver.cmd = XEN_SYSCTL_physinfo;			\
+	sysctl.v##ver.interface_version = ver;				\
+	sysctl.v##ver.physinfo.max_cpu_id = cpu;			\
+	set_xen_guest_handle(sysctl.v##ver.physinfo.cpu_to_node, nodes);\
+	rc = hypervisor_sysctl(&sysctl) ?: rc;				\
+	nr = sysctl.v##ver.physinfo.max_cpu_id + 1;			\
+} while (0)
+
+	if (rc)
+		pm_op_cputopo(7);
+#endif
+#if CONFIG_XEN_COMPAT < 0x040000
+	if (rc)
+		pm_op_cputopo(6);
+#endif
+
+	if (!rc && cpu >= nr)
+		rc = -EDOM;
+
+	if (!rc && core && (*core = cores[cpu]) == INVALID_TOPOLOGY_ID)
+		rc = -ENOENT;
+	kfree(cores);
+
+	if (!rc && sock && (*sock = socks[cpu]) == INVALID_TOPOLOGY_ID)
+		rc = -ENOENT;
+	kfree(socks);
+
+	if (!rc && node && (*node = nodes[cpu]) == INVALID_TOPOLOGY_ID)
+		rc = -ENOENT;
+	kfree(nodes);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(xen_get_topology_info);
+
+#include <xen/pcpu.h>
+#include <asm/msr.h>
+
+int rdmsr_safe_on_pcpu(unsigned int pcpu, u32 msr_no, u32 *l, u32 *h)
+{
+	int err = xen_set_physical_cpu_affinity(pcpu);
+
+	switch (err) {
+	case 0:
+		err = rdmsr_safe(msr_no, l, h);
+		WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1));
+		break;
+	case -EINVAL:
+		/* Fall back in case this is due to dom0_vcpus_pinned. */
+		err = rdmsr_safe_on_cpu(pcpu, msr_no, l, h) ?: 1;
+		break;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rdmsr_safe_on_pcpu);
+
+int wrmsr_safe_on_pcpu(unsigned int pcpu, u32 msr_no, u32 l, u32 h)
+{
+	int err = xen_set_physical_cpu_affinity(pcpu);
+
+	switch (err) {
+	case 0:
+		err = wrmsr_safe(msr_no, l, h);
+		WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1));
+		break;
+	case -EINVAL:
+		/* Fall back in case this is due to dom0_vcpus_pinned. */
+		err = wrmsr_safe_on_cpu(pcpu, msr_no, l, h) ?: 1;
+		break;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(wrmsr_safe_on_pcpu);
+
+int rdmsr_safe_regs_on_pcpu(unsigned int pcpu, u32 *regs)
+{
+	int err = xen_set_physical_cpu_affinity(pcpu);
+
+	switch (err) {
+	case 0:
+		err = rdmsr_safe_regs(regs);
+		WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1));
+		break;
+	case -EINVAL:
+		/* Fall back in case this is due to dom0_vcpus_pinned. */
+		err = rdmsr_safe_regs_on_cpu(pcpu, regs) ?: 1;
+		break;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rdmsr_safe_regs_on_pcpu);
+
+int wrmsr_safe_regs_on_pcpu(unsigned int pcpu, u32 *regs)
+{
+	int err = xen_set_physical_cpu_affinity(pcpu);
+
+	switch (err) {
+	case 0:
+		err = wrmsr_safe_regs(regs);
+		WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1));
+		break;
+	case -EINVAL:
+		/* Fall back in case this is due to dom0_vcpus_pinned. */
+		err = wrmsr_safe_regs_on_cpu(pcpu, regs) ?: 1;
+		break;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(wrmsr_safe_regs_on_pcpu);
+
 #endif /* CONFIG_X86 */
 
 MODULE_LICENSE("GPL");
--- head.orig/drivers/xen/core/domctl.h	2010-11-23 16:20:20.000000000 +0100
+++ head/drivers/xen/core/domctl.h	2011-02-03 14:42:26.000000000 +0100
@@ -1,3 +1,4 @@
 int xen_guest_address_size(int domid);
 int xen_guest_blkif_protocol(int domid);
 int xen_set_physical_cpu_affinity(int pcpu);
+int xen_get_topology_info(unsigned int cpu, u32 *core, u32 *socket, u32 *node);
--- head.orig/include/xen/pcpu.h	2011-12-05 13:25:56.000000000 +0100
+++ head/include/xen/pcpu.h	2011-12-05 13:25:56.000000000 +0100
@@ -7,4 +7,13 @@
 int register_pcpu_notifier(struct notifier_block *);
 void unregister_pcpu_notifier(struct notifier_block *);
 
+#ifdef CONFIG_X86
+int __must_check rdmsr_safe_on_pcpu(unsigned int pcpu, u32 msr_no,
+				    u32 *l, u32 *h);
+int __must_check wrmsr_safe_on_pcpu(unsigned int pcpu, u32 msr_no,
+				    u32 l, u32 h);
+int __must_check rdmsr_safe_regs_on_pcpu(unsigned int pcpu, u32 *regs);
+int __must_check wrmsr_safe_regs_on_pcpu(unsigned int pcpu, u32 *regs);
+#endif
+
 #endif /* _XEN_SYSCTL_H */