Tree - kernel/kernel-source - Pagure for openSUSE

kernel / kernel-source

Source
Stats
Files

Commit: 5b61e5cd5f4dce0537eefbed7d29c3958ed1880f
Blob Blame History Raw
 Subject: xen3 common
From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 931:f1a207ccb493)
Patch-mainline: obsolete
Acked-by: jbeulich@novell.com
 
List of files that don't require modification anymore (and hence
removed from this patch), for reference and in case upstream wants to
take the forward porting patches:
2.6.22/include/linux/sched.h
2.6.22/kernel/softlockup.c
2.6.22/kernel/timer.c
2.6.25/mm/highmem.c
2.6.30/include/linux/pci_regs.h
 
--- head-2009-09-02.orig/drivers/Makefile	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/Makefile	2009-09-02 11:50:15.000000000 +0200
@@ -32,6 +32,7 @@ obj-$(CONFIG_PARPORT)		+= parport/
 obj-y				+= base/ block/ misc/ mfd/
 obj-$(CONFIG_NUBUS)		+= nubus/
 obj-y				+= macintosh/
+obj-$(CONFIG_XEN)		+= xen/
 obj-$(CONFIG_SCSI)		+= scsi/
 obj-$(CONFIG_ATA)		+= ata/
 obj-$(CONFIG_IDE)		+= ide/
--- head-2009-09-02.orig/drivers/acpi/Makefile	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/acpi/Makefile	2009-09-02 11:50:15.000000000 +0200
@@ -61,3 +61,6 @@ obj-$(CONFIG_ACPI_SBS)		+= sbs.o
 processor-y			:= processor_core.o processor_throttling.o
 processor-y			+= processor_idle.o processor_thermal.o
 processor-$(CONFIG_CPU_FREQ)	+= processor_perflib.o
+ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
+processor-objs	+= processor_perflib.o processor_extcntl.o
+endif
--- head-2009-09-02.orig/drivers/acpi/acpica/hwsleep.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/acpi/acpica/hwsleep.c	2009-09-02 11:50:15.000000000 +0200
@@ -235,7 +235,11 @@ acpi_status asmlinkage acpi_enter_sleep_
 	u32 pm1b_control;
 	struct acpi_bit_register_info *sleep_type_reg_info;
 	struct acpi_bit_register_info *sleep_enable_reg_info;
+#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
 	u32 in_value;
+#else
+	int err;
+#endif
 	struct acpi_object_list arg_list;
 	union acpi_object arg;
 	acpi_status status;
@@ -344,6 +348,7 @@ acpi_status asmlinkage acpi_enter_sleep_
 
 	/* Write #2: Write both SLP_TYP + SLP_EN */
 
+#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
 	status = acpi_hw_write_pm1_control(pm1a_control, pm1b_control);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
@@ -383,6 +388,16 @@ acpi_status asmlinkage acpi_enter_sleep_
 		/* Spin until we wake */
 
 	} while (!in_value);
+#else
+	/* PV ACPI just need check hypercall return value */
+	err = acpi_notify_hypervisor_state(sleep_state,
+			PM1Acontrol, PM1Bcontrol);
+	if (err) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "Hypervisor failure [%d]\n", err));
+		return_ACPI_STATUS(AE_ERROR);
+	}
+#endif
 
 	return_ACPI_STATUS(AE_OK);
 }
--- head-2009-09-02.orig/drivers/acpi/processor_core.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/acpi/processor_core.c	2009-09-02 11:50:15.000000000 +0200
@@ -645,7 +645,8 @@ static int acpi_processor_get_info(struc
 	 */
 	if (pr->id == -1) {
 		if (ACPI_FAILURE
-		    (acpi_processor_hotadd_init(pr->handle, &pr->id))) {
+		    (acpi_processor_hotadd_init(pr->handle, &pr->id)) &&
+		    !processor_cntl_external()) {
 			return -ENODEV;
 		}
 	}
@@ -696,7 +697,11 @@ static int acpi_processor_get_info(struc
 	return 0;
 }
 
+#ifndef CONFIG_XEN
 static DEFINE_PER_CPU(void *, processor_device_array);
+#else
+static void *processor_device_array[NR_ACPI_CPUS];
+#endif
 
 static int __cpuinit acpi_processor_start(struct acpi_device *device)
 {
@@ -704,30 +709,46 @@ static int __cpuinit acpi_processor_star
 	struct acpi_processor *pr;
 	struct sys_device *sysdev;
 
+ 	processor_extcntl_init();
+
 	pr = acpi_driver_data(device);
 
 	result = acpi_processor_get_info(device);
-	if (result) {
+	if (result ||
+	    ((pr->id == -1) && !processor_cntl_external())) {
 		/* Processor is physically not present */
 		return 0;
 	}
 
-	BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0));
+	BUG_ON(!processor_cntl_external() &&
+	       ((pr->id >= nr_cpu_ids) || (pr->id < 0)));
 
 	/*
 	 * Buggy BIOS check
 	 * ACPI id of processors can be reported wrongly by the BIOS.
 	 * Don't trust it blindly
 	 */
+#ifndef CONFIG_XEN
 	if (per_cpu(processor_device_array, pr->id) != NULL &&
 	    per_cpu(processor_device_array, pr->id) != device) {
+#else
+	BUG_ON(pr->acpi_id >= NR_ACPI_CPUS);
+	if (processor_device_array[pr->acpi_id] != NULL &&
+	    processor_device_array[pr->acpi_id] != device) {
+#endif
 		printk(KERN_WARNING "BIOS reported wrong ACPI id "
 			"for the processor\n");
 		return -ENODEV;
 	}
+#ifndef CONFIG_XEN
 	per_cpu(processor_device_array, pr->id) = device;
 
 	per_cpu(processors, pr->id) = pr;
+#else
+	processor_device_array[pr->acpi_id] = device;
+	if (pr->id != -1)
+		per_cpu(processors, pr->id) = pr;
+#endif
 
 	result = acpi_processor_add_fs(device);
 	if (result)
@@ -742,15 +763,27 @@ static int __cpuinit acpi_processor_star
 	acpi_processor_set_pdc(pr);
 	arch_acpi_processor_cleanup_pdc(pr);
 
-#ifdef CONFIG_CPU_FREQ
+#if defined(CONFIG_CPU_FREQ) || defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL)
 	acpi_processor_ppc_has_changed(pr);
 #endif
-	acpi_processor_get_throttling_info(pr);
-	acpi_processor_get_limit_info(pr);
 
+	/*
+	 * pr->id may equal to -1 while processor_cntl_external enabled.
+	 * throttle and thermal module don't support this case.
+	 * Tx only works when dom0 vcpu == pcpu num by far, as we give
+	 * control to dom0.
+	 */
+	if (pr->id != -1) {
+		acpi_processor_get_throttling_info(pr);
+		acpi_processor_get_limit_info(pr);
+	}
 
 	acpi_processor_power_init(pr, device);
 
+	result = processor_extcntl_prepare(pr);
+	if (result)
+		goto end;
+
 	pr->cdev = thermal_cooling_device_register("Processor", device,
 						&processor_cooling_ops);
 	if (IS_ERR(pr->cdev)) {
@@ -878,7 +911,7 @@ static int acpi_processor_remove(struct 
 
 	pr = acpi_driver_data(device);
 
-	if (pr->id >= nr_cpu_ids)
+	if (!processor_cntl_external() && pr->id >= nr_cpu_ids)
 		goto free;
 
 	if (type == ACPI_BUS_REMOVAL_EJECT) {
@@ -899,8 +932,14 @@ static int acpi_processor_remove(struct 
 		pr->cdev = NULL;
 	}
 
+#ifndef CONFIG_XEN
 	per_cpu(processors, pr->id) = NULL;
 	per_cpu(processor_device_array, pr->id) = NULL;
+#else
+	if (pr->id != -1)
+		per_cpu(processors, pr->id) = NULL;
+	processor_device_array[pr->acpi_id] = NULL;
+#endif
 
 free:
 	free_cpumask_var(pr->throttling.shared_cpu_map);
@@ -963,6 +1002,10 @@ int acpi_processor_device_add(acpi_handl
 	if (!pr)
 		return -ENODEV;
 
+	if (processor_cntl_external())
+		processor_notify_external(pr,
+			PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD);
+
 	if ((pr->id >= 0) && (pr->id < nr_cpu_ids)) {
 		kobject_uevent(&(*device)->dev.kobj, KOBJ_ONLINE);
 	}
@@ -1002,6 +1045,10 @@ static void __ref acpi_processor_hotplug
 			break;
 		}
 
+		if (processor_cntl_external())
+			processor_notify_external(pr,
+					PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD);
+
 		if (pr->id >= 0 && (pr->id < nr_cpu_ids)) {
 			kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
 			break;
@@ -1033,6 +1080,11 @@ static void __ref acpi_processor_hotplug
 
 		if ((pr->id < nr_cpu_ids) && (cpu_present(pr->id)))
 			kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
+
+		if (processor_cntl_external())
+			processor_notify_external(pr, PROCESSOR_HOTPLUG,
+							HOTPLUG_TYPE_REMOVE);
+
 		break;
 	default:
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
@@ -1097,6 +1149,11 @@ static acpi_status acpi_processor_hotadd
 
 static int acpi_processor_handle_eject(struct acpi_processor *pr)
 {
+#ifdef CONFIG_XEN
+	if (pr->id == -1)
+		return (0);
+#endif
+
 	if (cpu_online(pr->id))
 		cpu_down(pr->id);
 
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ head-2009-09-02/drivers/acpi/processor_extcntl.c	2009-09-02 11:50:15.000000000 +0200
@@ -0,0 +1,241 @@
+/*
+ * processor_extcntl.c - channel to external control logic
+ *
+ *  Copyright (C) 2008, Intel corporation
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/acpi.h>
+#include <linux/pm.h>
+#include <linux/cpu.h>
+
+#include <acpi/processor.h>
+
+#define ACPI_PROCESSOR_COMPONENT        0x01000000
+#define ACPI_PROCESSOR_CLASS            "processor"
+#define ACPI_PROCESSOR_DRIVER_NAME      "ACPI Processor Driver"
+#define _COMPONENT              ACPI_PROCESSOR_COMPONENT
+ACPI_MODULE_NAME("acpi_processor")
+
+static int processor_extcntl_parse_csd(struct acpi_processor *pr);
+static int processor_extcntl_get_performance(struct acpi_processor *pr);
+/*
+ * External processor control logic may register with its own set of
+ * ops to get ACPI related notification. One example is like VMM.
+ */
+const struct processor_extcntl_ops *processor_extcntl_ops;
+EXPORT_SYMBOL(processor_extcntl_ops);
+
+static int processor_notify_smm(void)
+{
+	acpi_status status;
+	static int is_done = 0;
+
+	/* only need successfully notify BIOS once */
+	/* avoid double notification which may lead to unexpected result */
+	if (is_done)
+		return 0;
+
+	/* Can't write pstate_cnt to smi_cmd if either value is zero */
+	if ((!acpi_fadt.smi_cmd) || (!acpi_fadt.pstate_cnt)) {
+		ACPI_DEBUG_PRINT((ACPI_DB_INFO,"No SMI port or pstate_cnt\n"));
+		return 0;
+	}
+
+	ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+		"Writing pstate_cnt [0x%x] to smi_cmd [0x%x]\n",
+		acpi_fadt.pstate_cnt, acpi_fadt.smi_cmd));
+
+	/* FADT v1 doesn't support pstate_cnt, many BIOS vendors use
+	 * it anyway, so we need to support it... */
+	if (acpi_fadt_is_v1) {
+		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+			"Using v1.0 FADT reserved value for pstate_cnt\n"));
+	}
+
+	status = acpi_os_write_port(acpi_fadt.smi_cmd,
+				    (u32) acpi_fadt.pstate_cnt, 8);
+	if (ACPI_FAILURE(status))
+		return status;
+
+	is_done = 1;
+
+	return 0;
+}
+
+int processor_notify_external(struct acpi_processor *pr, int event, int type)
+{
+	int ret = -EINVAL;
+
+	if (!processor_cntl_external())
+		return -EINVAL;
+
+	switch (event) {
+	case PROCESSOR_PM_INIT:
+	case PROCESSOR_PM_CHANGE:
+		if ((type >= PM_TYPE_MAX) ||
+			!processor_extcntl_ops->pm_ops[type])
+			break;
+
+		ret = processor_extcntl_ops->pm_ops[type](pr, event);
+		break;
+	case PROCESSOR_HOTPLUG:
+		if (processor_extcntl_ops->hotplug)
+			ret = processor_extcntl_ops->hotplug(pr, type);
+		break;
+	default:
+		printk(KERN_ERR "Unsupport processor events %d.\n", event);
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * External control logic can decide to grab full or part of physical
+ * processor control bits. Take a VMM for example, physical processors
+ * are owned by VMM and thus existence information like hotplug is
+ * always required to be notified to VMM. Similar is processor idle
+ * state which is also necessarily controlled by VMM. But for other
+ * control bits like performance/throttle states, VMM may choose to
+ * control or not upon its own policy.
+ */
+void processor_extcntl_init(void)
+{
+	if (!processor_extcntl_ops)
+		arch_acpi_processor_init_extcntl(&processor_extcntl_ops);
+}
+
+/*
+ * This is called from ACPI processor init, and targeted to hold
+ * some tricky housekeeping jobs to satisfy external control model.
+ * For example, we may put dependency parse stub here for idle
+ * and performance state. Those information may be not available
+ * if splitting from dom0 control logic like cpufreq driver.
+ */
+int processor_extcntl_prepare(struct acpi_processor *pr)
+{
+	/* parse cstate dependency information */
+	if (processor_pm_external())
+		processor_extcntl_parse_csd(pr);
+
+	/* Initialize performance states */
+	if (processor_pmperf_external())
+		processor_extcntl_get_performance(pr);
+
+	return 0;
+}
+
+/*
+ * Currently no _CSD is implemented which is why existing ACPI code
+ * doesn't parse _CSD at all. But to keep interface complete with
+ * external control logic, we put a placeholder here for future
+ * compatibility.
+ */
+static int processor_extcntl_parse_csd(struct acpi_processor *pr)
+{
+	int i;
+
+	for (i = 0; i < pr->power.count; i++) {
+		if (!pr->power.states[i].valid)
+			continue;
+
+		/* No dependency by default */
+		pr->power.states[i].domain_info = NULL;
+		pr->power.states[i].csd_count = 0;
+	}
+
+	return 0;
+}
+
+/*
+ * Existing ACPI module does parse performance states at some point,
+ * when acpi-cpufreq driver is loaded which however is something
+ * we'd like to disable to avoid confliction with external control
+ * logic. So we have to collect raw performance information here
+ * when ACPI processor object is found and started.
+ */
+static int processor_extcntl_get_performance(struct acpi_processor *pr)
+{
+	int ret;
+	struct acpi_processor_performance *perf;
+	struct acpi_psd_package *pdomain;
+
+	if (pr->performance)
+		return -EBUSY;
+
+	perf = kzalloc(sizeof(struct acpi_processor_performance), GFP_KERNEL);
+	if (!perf)
+		return -ENOMEM;
+
+	pr->performance = perf;
+	/* Get basic performance state information */
+	ret = acpi_processor_get_performance_info(pr);
+	if (ret < 0)
+		goto err_out;
+
+	/*
+	 * Well, here we need retrieve performance dependency information
+	 * from _PSD object. The reason why existing interface is not used
+	 * is due to the reason that existing interface sticks to Linux cpu
+	 * id to construct some bitmap, however we want to split ACPI
+	 * processor objects from Linux cpu id logic. For example, even
+	 * when Linux is configured as UP, we still want to parse all ACPI
+	 * processor objects to external logic. In this case, it's preferred
+	 * to use ACPI ID instead.
+	 */
+	pdomain = &pr->performance->domain_info;
+	pdomain->num_processors = 0;
+	ret = acpi_processor_get_psd(pr);
+	if (ret < 0) {
+		/*
+		 * _PSD is optional - assume no coordination if absent (or
+		 * broken), matching native kernels' behavior.
+		 */
+		pdomain->num_entries = ACPI_PSD_REV0_ENTRIES;
+		pdomain->revision = ACPI_PSD_REV0_REVISION;
+		pdomain->domain = pr->acpi_id;
+		pdomain->coord_type = DOMAIN_COORD_TYPE_SW_ALL;
+		pdomain->num_processors = 1;
+	}
+
+	/* Some sanity check */
+	if ((pdomain->revision != ACPI_PSD_REV0_REVISION) ||
+	    (pdomain->num_entries != ACPI_PSD_REV0_ENTRIES) ||
+	    ((pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ALL) &&
+	     (pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ANY) &&
+	     (pdomain->coord_type != DOMAIN_COORD_TYPE_HW_ALL))) {
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	/* Last step is to notify BIOS that external logic exists */
+	processor_notify_smm();
+
+	processor_notify_external(pr, PROCESSOR_PM_INIT, PM_TYPE_PERF);
+
+	return 0;
+err_out:
+	pr->performance = NULL;
+	kfree(perf);
+	return ret;
+}
--- head-2009-09-02.orig/drivers/acpi/processor_idle.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/acpi/processor_idle.c	2009-09-02 11:50:15.000000000 +0200
@@ -417,7 +417,8 @@ static int acpi_processor_get_power_info
 				 */
 				cx.entry_method = ACPI_CSTATE_HALT;
 				snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
-			} else {
+			/* This doesn't apply to external control case */
+			} else if (!processor_pm_external()) {
 				continue;
 			}
 			if (cx.type == ACPI_STATE_C1 &&
@@ -456,6 +457,12 @@ static int acpi_processor_get_power_info
 
 		cx.power = obj->integer.value;
 
+#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
+		/* cache control methods to notify external logic */
+		if (processor_pm_external())
+			memcpy(&cx.reg, reg, sizeof(*reg));
+#endif
+
 		current_count++;
 		memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));
 
@@ -1225,6 +1232,11 @@ int __cpuinit acpi_processor_power_init(
 				 acpi_driver_data(device));
 	if (!entry)
 		return -EIO;
+
+	if (processor_pm_external())
+		processor_notify_external(pr,
+			PROCESSOR_PM_INIT, PM_TYPE_IDLE);
+
 	return 0;
 }
 
--- head-2009-09-02.orig/drivers/acpi/processor_perflib.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/acpi/processor_perflib.c	2009-09-02 11:50:15.000000000 +0200
@@ -76,6 +76,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the fre
 
 static int acpi_processor_ppc_status;
 
+#ifdef CONFIG_CPU_FREQ
 static int acpi_processor_ppc_notifier(struct notifier_block *nb,
 				       unsigned long event, void *data)
 {
@@ -118,6 +119,7 @@ static int acpi_processor_ppc_notifier(s
 static struct notifier_block acpi_ppc_notifier_block = {
 	.notifier_call = acpi_processor_ppc_notifier,
 };
+#endif	/* CONFIG_CPU_FREQ */
 
 static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
 {
@@ -162,9 +164,15 @@ int acpi_processor_ppc_has_changed(struc
 	if (ret < 0)
 		return (ret);
 	else
+#ifdef CONFIG_CPU_FREQ
 		return cpufreq_update_policy(pr->id);
+#elif CONFIG_PROCESSOR_EXTERNAL_CONTROL
+		return processor_notify_external(pr,
+				PROCESSOR_PM_CHANGE, PM_TYPE_PERF);
+#endif
 }
 
+#ifdef CONFIG_CPU_FREQ
 void acpi_processor_ppc_init(void)
 {
 	if (!cpufreq_register_notifier
@@ -183,6 +191,7 @@ void acpi_processor_ppc_exit(void)
 
 	acpi_processor_ppc_status &= ~PPC_REGISTERED;
 }
+#endif	/* CONFIG_CPU_FREQ */
 
 static int acpi_processor_get_performance_control(struct acpi_processor *pr)
 {
@@ -330,7 +339,10 @@ static int acpi_processor_get_performanc
 	return result;
 }
 
-static int acpi_processor_get_performance_info(struct acpi_processor *pr)
+#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
+static
+#endif
+int acpi_processor_get_performance_info(struct acpi_processor *pr)
 {
 	int result = 0;
 	acpi_status status = AE_OK;
@@ -371,6 +383,7 @@ static int acpi_processor_get_performanc
 	return result;
 }
 
+#ifdef CONFIG_CPU_FREQ
 int acpi_processor_notify_smm(struct module *calling_module)
 {
 	acpi_status status;
@@ -431,8 +444,12 @@ int acpi_processor_notify_smm(struct mod
 }
 
 EXPORT_SYMBOL(acpi_processor_notify_smm);
+#endif	/* CONFIG_CPU_FREQ */
 
-static int acpi_processor_get_psd(struct acpi_processor	*pr)
+#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
+static
+#endif
+int acpi_processor_get_psd(struct acpi_processor *pr)
 {
 	int result = 0;
 	acpi_status status = AE_OK;
--- head-2009-09-02.orig/drivers/acpi/sleep.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/acpi/sleep.c	2009-09-02 11:50:15.000000000 +0200
@@ -60,6 +60,7 @@ static struct notifier_block tts_notifie
 static int acpi_sleep_prepare(u32 acpi_state)
 {
 #ifdef CONFIG_ACPI_SLEEP
+#ifndef CONFIG_ACPI_PV_SLEEP
 	/* do we have a wakeup address for S2 and S3? */
 	if (acpi_state == ACPI_STATE_S3) {
 		if (!acpi_wakeup_address) {
@@ -69,6 +70,7 @@ static int acpi_sleep_prepare(u32 acpi_s
 				(acpi_physical_address)acpi_wakeup_address);
 
 	}
+#endif
 	ACPI_FLUSH_CPU_CACHE();
 	acpi_enable_wakeup_device_prep(acpi_state);
 #endif
@@ -244,7 +246,14 @@ static int acpi_suspend_enter(suspend_st
 		break;
 
 	case ACPI_STATE_S3:
+#ifdef CONFIG_ACPI_PV_SLEEP
+		/* Hyperviosr will save and restore CPU context
+		 * and then we can skip low level housekeeping here.
+		 */
+		acpi_enter_sleep_state(acpi_state);
+#else
 		do_suspend_lowlevel();
+#endif
 		break;
 	}
 
--- head-2009-09-02.orig/drivers/char/agp/intel-agp.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/char/agp/intel-agp.c	2009-09-02 11:50:15.000000000 +0200
@@ -265,6 +265,13 @@ static struct page *i8xx_alloc_pages(voi
 	if (page == NULL)
 		return NULL;
 
+#ifdef CONFIG_XEN
+	if (xen_create_contiguous_region((unsigned long)page_address(page), 2, 32)) {
+		__free_pages(page, 2);
+		return NULL;
+	}
+#endif
+
 	if (set_pages_uc(page, 4) < 0) {
 		set_pages_wb(page, 4);
 		__free_pages(page, 2);
@@ -281,6 +288,9 @@ static void i8xx_destroy_pages(struct pa
 		return;
 
 	set_pages_wb(page, 4);
+#ifdef CONFIG_XEN
+	xen_destroy_contiguous_region((unsigned long)page_address(page), 2);
+#endif
 	put_page(page);
 	__free_pages(page, 2);
 	atomic_dec(&agp_bridge->current_memory_agp);
--- head-2009-09-02.orig/drivers/char/mem.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/char/mem.c	2009-09-02 11:50:15.000000000 +0200
@@ -110,6 +110,7 @@ void __attribute__((weak)) unxlate_dev_m
 {
 }
 
+#ifndef ARCH_HAS_DEV_MEM
 /*
  * This funcion reads the *physical* memory. The f_pos points directly to the 
  * memory location. 
@@ -254,6 +255,7 @@ static ssize_t write_mem(struct file * f
 	*ppos += written;
 	return written;
 }
+#endif
 
 int __attribute__((weak)) phys_mem_access_prot_allowed(struct file *file,
 	unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
@@ -345,6 +347,9 @@ static int mmap_mem(struct file * file, 
 static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
 {
 	unsigned long pfn;
+#ifdef CONFIG_XEN
+	unsigned long i, count;
+#endif
 
 	/* Turn a kernel-virtual address into a physical page frame */
 	pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
@@ -359,6 +364,13 @@ static int mmap_kmem(struct file * file,
 	if (!pfn_valid(pfn))
 		return -EIO;
 
+#ifdef CONFIG_XEN
+	count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	for (i = 0; i < count; i++)
+		if ((pfn + i) != mfn_to_local_pfn(pfn_to_mfn(pfn + i)))
+			return -EIO;
+#endif
+
 	vma->vm_pgoff = pfn;
 	return mmap_mem(file, vma);
 }
@@ -774,6 +786,7 @@ static int open_port(struct inode * inod
 #define open_kmem	open_mem
 #define open_oldmem	open_mem
 
+#ifndef ARCH_HAS_DEV_MEM
 static const struct file_operations mem_fops = {
 	.llseek		= memory_lseek,
 	.read		= read_mem,
@@ -782,6 +795,9 @@ static const struct file_operations mem_
 	.open		= open_mem,
 	.get_unmapped_area = get_unmapped_area_mem,
 };
+#else
+extern const struct file_operations mem_fops;
+#endif
 
 #ifdef CONFIG_DEVKMEM
 static const struct file_operations kmem_fops = {
--- head-2009-09-02.orig/drivers/char/tpm/Makefile	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/char/tpm/Makefile	2009-09-02 11:50:15.000000000 +0200
@@ -9,3 +9,5 @@ obj-$(CONFIG_TCG_TIS) += tpm_tis.o
 obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
 obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
 obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
+obj-$(CONFIG_TCG_XEN) += tpm_xenu.o
+tpm_xenu-y = tpm_xen.o tpm_vtpm.o
--- head-2009-09-02.orig/drivers/char/tpm/tpm.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/char/tpm/tpm.h	2009-09-02 11:50:15.000000000 +0200
@@ -108,6 +108,9 @@ struct tpm_chip {
 	struct dentry **bios_dir;
 
 	struct list_head list;
+#ifdef CONFIG_XEN
+	void *priv;
+#endif
 	void (*release) (struct device *);
 };
 
@@ -266,6 +269,18 @@ struct tpm_cmd_t {
 
 ssize_t	tpm_getcap(struct device *, __be32, cap_t *, const char *);
 
+#ifdef CONFIG_XEN
+static inline void *chip_get_private(const struct tpm_chip *chip)
+{
+	return chip->priv;
+}
+
+static inline void chip_set_private(struct tpm_chip *chip, void *priv)
+{
+	chip->priv = priv;
+}
+#endif
+
 extern void tpm_get_timeouts(struct tpm_chip *);
 extern void tpm_gen_interrupt(struct tpm_chip *);
 extern void tpm_continue_selftest(struct tpm_chip *);
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ head-2009-09-02/drivers/char/tpm/tpm_vtpm.c	2009-09-02 11:50:15.000000000 +0200
@@ -0,0 +1,542 @@
+/*
+ * Copyright (C) 2006 IBM Corporation
+ *
+ * Authors:
+ * Stefan Berger <stefanb@us.ibm.com>
+ *
+ * Generic device driver part for device drivers in a virtualized
+ * environment.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ */
+
+#include <asm/uaccess.h>
+#include <linux/list.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include "tpm.h"
+#include "tpm_vtpm.h"
+
+/* read status bits */
+enum {
+	STATUS_BUSY = 0x01,
+	STATUS_DATA_AVAIL = 0x02,
+	STATUS_READY = 0x04
+};
+
+struct transmission {
+	struct list_head next;
+
+	unsigned char *request;
+	size_t  request_len;
+	size_t  request_buflen;
+
+	unsigned char *response;
+	size_t  response_len;
+	size_t  response_buflen;
+
+	unsigned int flags;
+};
+
+enum {
+	TRANSMISSION_FLAG_WAS_QUEUED = 0x1
+};
+
+
+enum {
+	DATAEX_FLAG_QUEUED_ONLY = 0x1
+};
+
+
+/* local variables */
+
+/* local function prototypes */
+static int _vtpm_send_queued(struct tpm_chip *chip);
+
+
+/* =============================================================
+ * Some utility functions
+ * =============================================================
+ */
+static void vtpm_state_init(struct vtpm_state *vtpms)
+{
+	vtpms->current_request = NULL;
+	spin_lock_init(&vtpms->req_list_lock);
+	init_waitqueue_head(&vtpms->req_wait_queue);
+	INIT_LIST_HEAD(&vtpms->queued_requests);
+
+	vtpms->current_response = NULL;
+	spin_lock_init(&vtpms->resp_list_lock);
+	init_waitqueue_head(&vtpms->resp_wait_queue);
+
+	vtpms->disconnect_time = jiffies;
+}
+
+
+static inline struct transmission *transmission_alloc(void)
+{
+	return kzalloc(sizeof(struct transmission), GFP_ATOMIC);
+}
+
+static unsigned char *
+transmission_set_req_buffer(struct transmission *t,
+                            unsigned char *buffer, size_t len)
+{
+	if (t->request_buflen < len) {
+		kfree(t->request);
+		t->request = kmalloc(len, GFP_KERNEL);
+		if (!t->request) {
+			t->request_buflen = 0;
+			return NULL;
+		}
+		t->request_buflen = len;
+	}
+
+	memcpy(t->request, buffer, len);
+	t->request_len = len;
+
+	return t->request;
+}
+
+static unsigned char *
+transmission_set_res_buffer(struct transmission *t,
+                            const unsigned char *buffer, size_t len)
+{
+	if (t->response_buflen < len) {
+		kfree(t->response);
+		t->response = kmalloc(len, GFP_ATOMIC);
+		if (!t->response) {
+			t->response_buflen = 0;
+			return NULL;
+		}
+		t->response_buflen = len;
+	}
+
+	memcpy(t->response, buffer, len);
+	t->response_len = len;
+
+	return t->response;
+}
+
+static inline void transmission_free(struct transmission *t)
+{
+	kfree(t->request);
+	kfree(t->response);
+	kfree(t);
+}
+
+/* =============================================================
+ * Interface with the lower layer driver
+ * =============================================================
+ */
+/*
+ * Lower layer uses this function to make a response available.
+ */
+int vtpm_vd_recv(const struct tpm_chip *chip,
+                 const unsigned char *buffer, size_t count,
+                 void *ptr)
+{
+	unsigned long flags;
+	int ret_size = 0;
+	struct transmission *t;
+	struct vtpm_state *vtpms;
+
+	vtpms = (struct vtpm_state *)chip_get_private(chip);
+
+	/*
+	 * The list with requests must contain one request
+	 * only and the element there must be the one that
+	 * was passed to me from the front-end.
+	 */
+	spin_lock_irqsave(&vtpms->resp_list_lock, flags);
+	if (vtpms->current_request != ptr) {
+		spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
+		return 0;
+	}
+
+	if ((t = vtpms->current_request)) {
+		transmission_free(t);
+		vtpms->current_request = NULL;
+	}
+
+	t = transmission_alloc();
+	if (t) {
+		if (!transmission_set_res_buffer(t, buffer, count)) {
+			transmission_free(t);
+			spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
+			return -ENOMEM;
+		}
+		ret_size = count;
+		vtpms->current_response = t;
+		wake_up_interruptible(&vtpms->resp_wait_queue);
+	}
+	spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
+
+	return ret_size;
+}
+
+
+/*
+ * Lower layer indicates its status (connected/disconnected)
+ */
+void vtpm_vd_status(const struct tpm_chip *chip, u8 vd_status)
+{
+	struct vtpm_state *vtpms;
+
+	vtpms = (struct vtpm_state *)chip_get_private(chip);
+
+	vtpms->vd_status = vd_status;
+	if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
+		vtpms->disconnect_time = jiffies;
+	}
+}
+
+/* =============================================================
+ * Interface with the generic TPM driver
+ * =============================================================
+ */
+static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+	int rc = 0;
+	unsigned long flags;
+	struct vtpm_state *vtpms;
+
+	vtpms = (struct vtpm_state *)chip_get_private(chip);
+
+	/*
+	 * Check if the previous operation only queued the command
+	 * In this case there won't be a response, so I just
+	 * return from here and reset that flag. In any other
+	 * case I should receive a response from the back-end.
+	 */
+	spin_lock_irqsave(&vtpms->resp_list_lock, flags);
+	if ((vtpms->flags & DATAEX_FLAG_QUEUED_ONLY) != 0) {
+		vtpms->flags &= ~DATAEX_FLAG_QUEUED_ONLY;
+		spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
+		/*
+		 * The first few commands (measurements) must be
+		 * queued since it might not be possible to talk to the
+		 * TPM, yet.
+		 * Return a response of up to 30 '0's.
+		 */
+
+		count = min_t(size_t, count, 30);
+		memset(buf, 0x0, count);
+		return count;
+	}
+	/*
+	 * Check whether something is in the responselist and if
+	 * there's nothing in the list wait for something to appear.
+	 */
+
+	if (!vtpms->current_response) {
+		spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
+		interruptible_sleep_on_timeout(&vtpms->resp_wait_queue,
+		                               1000);
+		spin_lock_irqsave(&vtpms->resp_list_lock ,flags);
+	}
+
+	if (vtpms->current_response) {
+		struct transmission *t = vtpms->current_response;
+		vtpms->current_response = NULL;
+		rc = min(count, t->response_len);
+		memcpy(buf, t->response, rc);
+		transmission_free(t);
+	}
+
+	spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
+	return rc;
+}
+
+static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+	int rc = 0;
+	unsigned long flags;
+	struct transmission *t = transmission_alloc();
+	struct vtpm_state *vtpms;
+
+	vtpms = (struct vtpm_state *)chip_get_private(chip);
+
+	if (!t)
+		return -ENOMEM;
+	/*
+	 * If there's a current request, it must be the
+	 * previous request that has timed out.
+	 */
+	spin_lock_irqsave(&vtpms->req_list_lock, flags);
+	if (vtpms->current_request != NULL) {
+		printk("WARNING: Sending although there is a request outstanding.\n"
+		       "         Previous request must have timed out.\n");
+		transmission_free(vtpms->current_request);
+		vtpms->current_request = NULL;
+	}
+	spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
+
+	/*
+	 * Queue the packet if the driver below is not
+	 * ready, yet, or there is any packet already
+	 * in the queue.
+	 * If the driver below is ready, unqueue all
+	 * packets first before sending our current
+	 * packet.
+	 * For each unqueued packet, except for the
+	 * last (=current) packet, call the function
+	 * tpm_xen_recv to wait for the response to come
+	 * back.
+	 */
+	if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
+		if (time_after(jiffies,
+		               vtpms->disconnect_time + HZ * 10)) {
+			rc = -ENOENT;
+		} else {
+			goto queue_it;
+		}
+	} else {
+		/*
+		 * Send all queued packets.
+		 */
+		if (_vtpm_send_queued(chip) == 0) {
+
+			vtpms->current_request = t;
+
+			rc = vtpm_vd_send(vtpms->tpm_private,
+			                  buf,
+			                  count,
+			                  t);
+			/*
+			 * The generic TPM driver will call
+			 * the function to receive the response.
+			 */
+			if (rc < 0) {
+				vtpms->current_request = NULL;
+				goto queue_it;
+			}
+		} else {
+queue_it:
+			if (!transmission_set_req_buffer(t, buf, count)) {
+				transmission_free(t);
+				rc = -ENOMEM;
+				goto exit;
+			}
+			/*
+			 * An error occurred. Don't event try
+			 * to send the current request. Just
+			 * queue it.
+			 */
+			spin_lock_irqsave(&vtpms->req_list_lock, flags);
+			vtpms->flags |= DATAEX_FLAG_QUEUED_ONLY;
+			list_add_tail(&t->next, &vtpms->queued_requests);
+			spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
+		}
+	}
+
+exit:
+	return rc;
+}
+
+
+/*
+ * Send all queued requests.
+ */
+static int _vtpm_send_queued(struct tpm_chip *chip)
+{
+	int rc;
+	int error = 0;
+	long flags;
+	unsigned char buffer[1];
+	struct vtpm_state *vtpms;
+	vtpms = (struct vtpm_state *)chip_get_private(chip);
+
+	spin_lock_irqsave(&vtpms->req_list_lock, flags);
+
+	while (!list_empty(&vtpms->queued_requests)) {
+		/*
+		 * Need to dequeue them.
+		 * Read the result into a dummy buffer.
+		 */
+		struct transmission *qt = (struct transmission *)
+		                          vtpms->queued_requests.next;
+		list_del(&qt->next);
+		vtpms->current_request = qt;
+		spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
+
+		rc = vtpm_vd_send(vtpms->tpm_private,
+		                  qt->request,
+		                  qt->request_len,
+		                  qt);
+
+		if (rc < 0) {
+			spin_lock_irqsave(&vtpms->req_list_lock, flags);
+			if ((qt = vtpms->current_request) != NULL) {
+				/*
+				 * requeue it at the beginning
+				 * of the list
+				 */
+				list_add(&qt->next,
+				         &vtpms->queued_requests);
+			}
+			vtpms->current_request = NULL;
+			error = 1;
+			break;
+		}
+		/*
+		 * After this point qt is not valid anymore!
+		 * It is freed when the front-end is delivering
+		 * the data by calling tpm_recv
+		 */
+		/*
+		 * Receive response into provided dummy buffer
+		 */
+		rc = vtpm_recv(chip, buffer, sizeof(buffer));
+		spin_lock_irqsave(&vtpms->req_list_lock, flags);
+	}
+
+	spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
+
+	return error;
+}
+
+static void vtpm_cancel(struct tpm_chip *chip)
+{
+	unsigned long flags;
+	struct vtpm_state *vtpms = (struct vtpm_state *)chip_get_private(chip);
+
+	spin_lock_irqsave(&vtpms->resp_list_lock,flags);
+
+	if (!vtpms->current_response && vtpms->current_request) {
+		spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
+		interruptible_sleep_on(&vtpms->resp_wait_queue);
+		spin_lock_irqsave(&vtpms->resp_list_lock,flags);
+	}
+
+	if (vtpms->current_response) {
+		struct transmission *t = vtpms->current_response;
+		vtpms->current_response = NULL;
+		transmission_free(t);
+	}
+
+	spin_unlock_irqrestore(&vtpms->resp_list_lock,flags);
+}
+
+static u8 vtpm_status(struct tpm_chip *chip)
+{
+	u8 rc = 0;
+	unsigned long flags;
+	struct vtpm_state *vtpms;
+
+	vtpms = (struct vtpm_state *)chip_get_private(chip);
+
+	spin_lock_irqsave(&vtpms->resp_list_lock, flags);
+	/*
+	 * Data are available if:
+	 *  - there's a current response
+	 *  - the last packet was queued only (this is fake, but necessary to
+	 *      get the generic TPM layer to call the receive function.)
+	 */
+	if (vtpms->current_response ||
+	    0 != (vtpms->flags & DATAEX_FLAG_QUEUED_ONLY)) {
+		rc = STATUS_DATA_AVAIL;
+	} else if (!vtpms->current_response && !vtpms->current_request) {
+		rc = STATUS_READY;
+	}
+
+	spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
+	return rc;
+}
+
+static struct file_operations vtpm_ops = {
+	.owner = THIS_MODULE,
+	.llseek = no_llseek,
+	.open = tpm_open,
+	.read = tpm_read,
+	.write = tpm_write,
+	.release = tpm_release,
+};
+
+static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
+static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
+static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
+static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
+static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
+static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
+		   NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
+static DEVICE_ATTR(cancel, S_IWUSR |S_IWGRP, NULL, tpm_store_cancel);
+
+static struct attribute *vtpm_attrs[] = {
+	&dev_attr_pubek.attr,
+	&dev_attr_pcrs.attr,
+	&dev_attr_enabled.attr,
+	&dev_attr_active.attr,
+	&dev_attr_owned.attr,
+	&dev_attr_temp_deactivated.attr,
+	&dev_attr_caps.attr,
+	&dev_attr_cancel.attr,
+	NULL,
+};
+
+static struct attribute_group vtpm_attr_grp = { .attrs = vtpm_attrs };
+
+#define TPM_LONG_TIMEOUT   (10 * 60 * HZ)
+
+static struct tpm_vendor_specific tpm_vtpm = {
+	.recv = vtpm_recv,
+	.send = vtpm_send,
+	.cancel = vtpm_cancel,
+	.status = vtpm_status,
+	.req_complete_mask = STATUS_BUSY | STATUS_DATA_AVAIL,
+	.req_complete_val  = STATUS_DATA_AVAIL,
+	.req_canceled = STATUS_READY,
+	.attr_group = &vtpm_attr_grp,
+	.miscdev = {
+		.fops = &vtpm_ops,
+	},
+	.duration = {
+		TPM_LONG_TIMEOUT,
+		TPM_LONG_TIMEOUT,
+		TPM_LONG_TIMEOUT,
+	},
+};
+
+struct tpm_chip *init_vtpm(struct device *dev,
+                           struct tpm_private *tp)
+{
+	long rc;
+	struct tpm_chip *chip;
+	struct vtpm_state *vtpms;
+
+	vtpms = kzalloc(sizeof(struct vtpm_state), GFP_KERNEL);
+	if (!vtpms)
+		return ERR_PTR(-ENOMEM);
+
+	vtpm_state_init(vtpms);
+	vtpms->tpm_private = tp;
+
+	chip = tpm_register_hardware(dev, &tpm_vtpm);
+	if (!chip) {
+		rc = -ENODEV;
+		goto err_free_mem;
+	}
+
+	chip_set_private(chip, vtpms);
+
+	return chip;
+
+err_free_mem:
+	kfree(vtpms);
+
+	return ERR_PTR(rc);
+}
+
+void cleanup_vtpm(struct device *dev)
+{
+	struct tpm_chip *chip = dev_get_drvdata(dev);
+	struct vtpm_state *vtpms = (struct vtpm_state*)chip_get_private(chip);
+	tpm_remove_hardware(dev);
+	kfree(vtpms);
+}
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ head-2009-09-02/drivers/char/tpm/tpm_vtpm.h	2009-09-02 11:50:15.000000000 +0200
@@ -0,0 +1,55 @@
+#ifndef TPM_VTPM_H
+#define TPM_VTPM_H
+
+struct tpm_chip;
+struct tpm_private;
+
+struct vtpm_state {
+	struct transmission *current_request;
+	spinlock_t           req_list_lock;
+	wait_queue_head_t    req_wait_queue;
+
+	struct list_head     queued_requests;
+
+	struct transmission *current_response;
+	spinlock_t           resp_list_lock;
+	wait_queue_head_t    resp_wait_queue;     // processes waiting for responses
+
+	u8                   vd_status;
+	u8                   flags;
+
+	unsigned long        disconnect_time;
+
+	/*
+	 * The following is a private structure of the underlying
+	 * driver. It is passed as parameter in the send function.
+	 */
+	struct tpm_private *tpm_private;
+};
+
+
+enum vdev_status {
+	TPM_VD_STATUS_DISCONNECTED = 0x0,
+	TPM_VD_STATUS_CONNECTED = 0x1
+};
+
+/* this function is called from tpm_vtpm.c */
+int vtpm_vd_send(struct tpm_private * tp,
+                 const u8 * buf, size_t count, void *ptr);
+
+/* these functions are offered by tpm_vtpm.c */
+struct tpm_chip *init_vtpm(struct device *,
+                           struct tpm_private *);
+void cleanup_vtpm(struct device *);
+int vtpm_vd_recv(const struct tpm_chip* chip,
+                 const unsigned char *buffer, size_t count, void *ptr);
+void vtpm_vd_status(const struct tpm_chip *, u8 status);
+
+static inline struct tpm_private *tpm_private_from_dev(struct device *dev)
+{
+	struct tpm_chip *chip = dev_get_drvdata(dev);
+	struct vtpm_state *vtpms = chip_get_private(chip);
+	return vtpms->tpm_private;
+}
+
+#endif
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ head-2009-09-02/drivers/char/tpm/tpm_xen.c	2009-09-02 11:50:15.000000000 +0200
@@ -0,0 +1,722 @@
+/*
+ * Copyright (c) 2005, IBM Corporation
+ *
+ * Author: Stefan Berger, stefanb@us.ibm.com
+ * Grant table support: Mahadevan Gomathisankaran
+ *
+ * This code has been derived from drivers/xen/netfront/netfront.c
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <asm/uaccess.h>
+#include <xen/evtchn.h>
+#include <xen/interface/grant_table.h>
+#include <xen/interface/io/tpmif.h>
+#include <xen/gnttab.h>
+#include <xen/xenbus.h>
+#include "tpm.h"
+#include "tpm_vtpm.h"
+
+#undef DEBUG
+
+/* local structures */
+struct tpm_private {
+	struct tpm_chip *chip;
+
+	tpmif_tx_interface_t *tx;
+	atomic_t refcnt;
+	unsigned int irq;
+	u8 is_connected;
+	u8 is_suspended;
+
+	spinlock_t tx_lock;
+
+	struct tx_buffer *tx_buffers[TPMIF_TX_RING_SIZE];
+
+	atomic_t tx_busy;
+	void *tx_remember;
+
+	domid_t backend_id;
+	wait_queue_head_t wait_q;
+
+	struct xenbus_device *dev;
+	int ring_ref;
+};
+
+struct tx_buffer {
+	unsigned int size;	// available space in data
+	unsigned int len;	// used space in data
+	unsigned char *data;	// pointer to a page
+};
+
+
+/* locally visible variables */
+static grant_ref_t gref_head;
+static struct tpm_private *my_priv;
+
+/* local function prototypes */
+static irqreturn_t tpmif_int(int irq,
+                             void *tpm_priv,
+                             struct pt_regs *ptregs);
+static void tpmif_rx_action(unsigned long unused);
+static int tpmif_connect(struct xenbus_device *dev,
+                         struct tpm_private *tp,
+                         domid_t domid);
+static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0);
+static int tpmif_allocate_tx_buffers(struct tpm_private *tp);
+static void tpmif_free_tx_buffers(struct tpm_private *tp);
+static void tpmif_set_connected_state(struct tpm_private *tp,
+                                      u8 newstate);
+static int tpm_xmit(struct tpm_private *tp,
+                    const u8 * buf, size_t count, int userbuffer,
+                    void *remember);
+static void destroy_tpmring(struct tpm_private *tp);
+void __exit tpmif_exit(void);
+
+#define DPRINTK(fmt, args...) \
+    pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
+#define IPRINTK(fmt, args...) \
+    printk(KERN_INFO "xen_tpm_fr: " fmt, ##args)
+#define WPRINTK(fmt, args...) \
+    printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args)
+
+#define GRANT_INVALID_REF	0
+
+
+static inline int
+tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len,
+               int isuserbuffer)
+{
+	int copied = len;
+
+	if (len > txb->size)
+		copied = txb->size;
+	if (isuserbuffer) {
+		if (copy_from_user(txb->data, src, copied))
+			return -EFAULT;
+	} else {
+		memcpy(txb->data, src, copied);
+	}
+	txb->len = len;
+	return copied;
+}
+
+static inline struct tx_buffer *tx_buffer_alloc(void)
+{
+	struct tx_buffer *txb;
+
+	txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL);
+	if (!txb)
+		return NULL;
+
+	txb->len = 0;
+	txb->size = PAGE_SIZE;
+	txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
+	if (txb->data == NULL) {
+		kfree(txb);
+		txb = NULL;
+	}
+
+	return txb;
+}
+
+
+static inline void tx_buffer_free(struct tx_buffer *txb)
+{
+	if (txb) {
+		free_page((long)txb->data);
+		kfree(txb);
+	}
+}
+
+/**************************************************************
+ Utility function for the tpm_private structure
+**************************************************************/
+static void tpm_private_init(struct tpm_private *tp)
+{
+	spin_lock_init(&tp->tx_lock);
+	init_waitqueue_head(&tp->wait_q);
+	atomic_set(&tp->refcnt, 1);
+}
+
+static void tpm_private_put(void)
+{
+	if (!atomic_dec_and_test(&my_priv->refcnt))
+		return;
+
+	tpmif_free_tx_buffers(my_priv);
+	kfree(my_priv);
+	my_priv = NULL;
+}
+
+static struct tpm_private *tpm_private_get(void)
+{
+	int err;
+
+	if (my_priv) {
+		atomic_inc(&my_priv->refcnt);
+		return my_priv;
+	}
+
+	my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
+	if (!my_priv)
+		return NULL;
+
+	tpm_private_init(my_priv);
+	err = tpmif_allocate_tx_buffers(my_priv);
+	if (err < 0)
+		tpm_private_put();
+
+	return my_priv;
+}
+
+/**************************************************************
+
+ The interface to let the tpm plugin register its callback
+ function and send data to another partition using this module
+
+**************************************************************/
+
+static DEFINE_MUTEX(suspend_lock);
+/*
+ * Send data via this module by calling this function
+ */
+int vtpm_vd_send(struct tpm_private *tp,
+                 const u8 * buf, size_t count, void *ptr)
+{
+	int sent;
+
+	mutex_lock(&suspend_lock);
+	sent = tpm_xmit(tp, buf, count, 0, ptr);
+	mutex_unlock(&suspend_lock);
+
+	return sent;
+}
+
+/**************************************************************
+ XENBUS support code
+**************************************************************/
+
+static int setup_tpmring(struct xenbus_device *dev,
+                         struct tpm_private *tp)
+{
+	tpmif_tx_interface_t *sring;
+	int err;
+
+	tp->ring_ref = GRANT_INVALID_REF;
+
+	sring = (void *)__get_free_page(GFP_KERNEL);
+	if (!sring) {
+		xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
+		return -ENOMEM;
+	}
+	tp->tx = sring;
+
+	err = xenbus_grant_ring(dev, virt_to_mfn(tp->tx));
+	if (err < 0) {
+		free_page((unsigned long)sring);
+		tp->tx = NULL;
+		xenbus_dev_fatal(dev, err, "allocating grant reference");
+		goto fail;
+	}
+	tp->ring_ref = err;
+
+	err = tpmif_connect(dev, tp, dev->otherend_id);
+	if (err)
+		goto fail;
+
+	return 0;
+fail:
+	destroy_tpmring(tp);
+	return err;
+}
+
+
+static void destroy_tpmring(struct tpm_private *tp)
+{
+	tpmif_set_connected_state(tp, 0);
+
+	if (tp->ring_ref != GRANT_INVALID_REF) {
+		gnttab_end_foreign_access(tp->ring_ref, (unsigned long)tp->tx);
+		tp->ring_ref = GRANT_INVALID_REF;
+		tp->tx = NULL;
+	}
+
+	if (tp->irq)
+		unbind_from_irqhandler(tp->irq, tp);
+
+	tp->irq = 0;
+}
+
+
+static int talk_to_backend(struct xenbus_device *dev,
+                           struct tpm_private *tp)
+{
+	const char *message = NULL;
+	int err;
+	struct xenbus_transaction xbt;
+
+	err = setup_tpmring(dev, tp);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "setting up ring");
+		goto out;
+	}
+
+again:
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "starting transaction");
+		goto destroy_tpmring;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename,
+	                    "ring-ref","%u", tp->ring_ref);
+	if (err) {
+		message = "writing ring-ref";
+		goto abort_transaction;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
+			    irq_to_evtchn_port(tp->irq));
+	if (err) {
+		message = "writing event-channel";
+		goto abort_transaction;
+	}
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err == -EAGAIN)
+		goto again;
+	if (err) {
+		xenbus_dev_fatal(dev, err, "completing transaction");
+		goto destroy_tpmring;
+	}
+
+	xenbus_switch_state(dev, XenbusStateConnected);
+
+	return 0;
+
+abort_transaction:
+	xenbus_transaction_end(xbt, 1);
+	if (message)
+		xenbus_dev_error(dev, err, "%s", message);
+destroy_tpmring:
+	destroy_tpmring(tp);
+out:
+	return err;
+}
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void backend_changed(struct xenbus_device *dev,
+			    enum xenbus_state backend_state)
+{
+	struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
+	DPRINTK("\n");
+
+	switch (backend_state) {
+	case XenbusStateInitialising:
+	case XenbusStateInitWait:
+	case XenbusStateInitialised:
+	case XenbusStateReconfiguring:
+	case XenbusStateReconfigured:
+	case XenbusStateUnknown:
+		break;
+
+	case XenbusStateConnected:
+		tpmif_set_connected_state(tp, 1);
+		break;
+
+	case XenbusStateClosing:
+		tpmif_set_connected_state(tp, 0);
+		xenbus_frontend_closed(dev);
+		break;
+
+	case XenbusStateClosed:
+		tpmif_set_connected_state(tp, 0);
+		if (tp->is_suspended == 0)
+			device_unregister(&dev->dev);
+		xenbus_frontend_closed(dev);
+		break;
+	}
+}
+
+static int tpmfront_probe(struct xenbus_device *dev,
+                          const struct xenbus_device_id *id)
+{
+	int err;
+	int handle;
+	struct tpm_private *tp = tpm_private_get();
+
+	if (!tp)
+		return -ENOMEM;
+
+	tp->chip = init_vtpm(&dev->dev, tp);
+	if (IS_ERR(tp->chip))
+		return PTR_ERR(tp->chip);
+
+	err = xenbus_scanf(XBT_NIL, dev->nodename,
+	                   "handle", "%i", &handle);
+	if (XENBUS_EXIST_ERR(err))
+		return err;
+
+	if (err < 0) {
+		xenbus_dev_fatal(dev,err,"reading virtual-device");
+		return err;
+	}
+
+	tp->dev = dev;
+
+	err = talk_to_backend(dev, tp);
+	if (err) {
+		tpm_private_put();
+		return err;
+	}
+
+	return 0;
+}
+
+
+static int tpmfront_remove(struct xenbus_device *dev)
+{
+	struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
+	destroy_tpmring(tp);
+	cleanup_vtpm(&dev->dev);
+	return 0;
+}
+
+static int tpmfront_suspend(struct xenbus_device *dev)
+{
+	struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
+	u32 ctr;
+
+	/* Take the lock, preventing any application from sending. */
+	mutex_lock(&suspend_lock);
+	tp->is_suspended = 1;
+
+	for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) {
+		if ((ctr % 10) == 0)
+			printk("TPM-FE [INFO]: Waiting for outstanding "
+			       "request.\n");
+		/* Wait for a request to be responded to. */
+		interruptible_sleep_on_timeout(&tp->wait_q, 100);
+	}
+
+	return 0;
+}
+
+static int tpmfront_suspend_finish(struct tpm_private *tp)
+{
+	tp->is_suspended = 0;
+	/* Allow applications to send again. */
+	mutex_unlock(&suspend_lock);
+	return 0;
+}
+
+static int tpmfront_suspend_cancel(struct xenbus_device *dev)
+{
+	struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
+	return tpmfront_suspend_finish(tp);
+}
+
+static int tpmfront_resume(struct xenbus_device *dev)
+{
+	struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
+	destroy_tpmring(tp);
+	return talk_to_backend(dev, tp);
+}
+
+static int tpmif_connect(struct xenbus_device *dev,
+                         struct tpm_private *tp,
+                         domid_t domid)
+{
+	int err;
+
+	tp->backend_id = domid;
+
+	err = bind_listening_port_to_irqhandler(
+		domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp);
+	if (err <= 0) {
+		WPRINTK("bind_listening_port_to_irqhandler failed "
+			"(err=%d)\n", err);
+		return err;
+	}
+	tp->irq = err;
+
+	return 0;
+}
+
+static struct xenbus_device_id tpmfront_ids[] = {
+	{ "vtpm" },
+	{ "" }
+};
+
+static struct xenbus_driver tpmfront = {
+	.name = "vtpm",
+	.owner = THIS_MODULE,
+	.ids = tpmfront_ids,
+	.probe = tpmfront_probe,
+	.remove =  tpmfront_remove,
+	.resume = tpmfront_resume,
+	.otherend_changed = backend_changed,
+	.suspend = tpmfront_suspend,
+	.suspend_cancel = tpmfront_suspend_cancel,
+};
+
+static void __init init_tpm_xenbus(void)
+{
+	xenbus_register_frontend(&tpmfront);
+}
+
+static int tpmif_allocate_tx_buffers(struct tpm_private *tp)
+{
+	unsigned int i;
+
+	for (i = 0; i < TPMIF_TX_RING_SIZE; i++) {
+		tp->tx_buffers[i] = tx_buffer_alloc();
+		if (!tp->tx_buffers[i]) {
+			tpmif_free_tx_buffers(tp);
+			return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+static void tpmif_free_tx_buffers(struct tpm_private *tp)
+{
+	unsigned int i;
+
+	for (i = 0; i < TPMIF_TX_RING_SIZE; i++)
+		tx_buffer_free(tp->tx_buffers[i]);
+}
+
+static void tpmif_rx_action(unsigned long priv)
+{
+	struct tpm_private *tp = (struct tpm_private *)priv;
+	int i = 0;
+	unsigned int received;
+	unsigned int offset = 0;
+	u8 *buffer;
+	tpmif_tx_request_t *tx = &tp->tx->ring[i].req;
+
+	atomic_set(&tp->tx_busy, 0);
+	wake_up_interruptible(&tp->wait_q);
+
+	received = tx->size;
+
+	buffer = kmalloc(received, GFP_ATOMIC);
+	if (!buffer)
+		return;
+
+	for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) {
+		struct tx_buffer *txb = tp->tx_buffers[i];
+		tpmif_tx_request_t *tx;
+		unsigned int tocopy;
+
+		tx = &tp->tx->ring[i].req;
+		tocopy = tx->size;
+		if (tocopy > PAGE_SIZE)
+			tocopy = PAGE_SIZE;
+
+		memcpy(&buffer[offset], txb->data, tocopy);
+
+		gnttab_release_grant_reference(&gref_head, tx->ref);
+
+		offset += tocopy;
+	}
+
+	vtpm_vd_recv(tp->chip, buffer, received, tp->tx_remember);
+	kfree(buffer);
+}
+
+
+static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
+{
+	struct tpm_private *tp = tpm_priv;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tp->tx_lock, flags);
+	tpmif_rx_tasklet.data = (unsigned long)tp;
+	tasklet_schedule(&tpmif_rx_tasklet);
+	spin_unlock_irqrestore(&tp->tx_lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+
+static int tpm_xmit(struct tpm_private *tp,
+                    const u8 * buf, size_t count, int isuserbuffer,
+                    void *remember)
+{
+	tpmif_tx_request_t *tx;
+	TPMIF_RING_IDX i;
+	unsigned int offset = 0;
+
+	spin_lock_irq(&tp->tx_lock);
+
+	if (unlikely(atomic_read(&tp->tx_busy))) {
+		printk("tpm_xmit: There's an outstanding request/response "
+		       "on the way!\n");
+		spin_unlock_irq(&tp->tx_lock);
+		return -EBUSY;
+	}
+
+	if (tp->is_connected != 1) {
+		spin_unlock_irq(&tp->tx_lock);
+		return -EIO;
+	}
+
+	for (i = 0; count > 0 && i < TPMIF_TX_RING_SIZE; i++) {
+		struct tx_buffer *txb = tp->tx_buffers[i];
+		int copied;
+
+		if (!txb) {
+			DPRINTK("txb (i=%d) is NULL. buffers initilized?\n"
+				"Not transmitting anything!\n", i);
+			spin_unlock_irq(&tp->tx_lock);
+			return -EFAULT;
+		}
+
+		copied = tx_buffer_copy(txb, &buf[offset], count,
+		                        isuserbuffer);
+		if (copied < 0) {
+			/* An error occurred */
+			spin_unlock_irq(&tp->tx_lock);
+			return copied;
+		}
+		count -= copied;
+		offset += copied;
+
+		tx = &tp->tx->ring[i].req;
+		tx->addr = virt_to_machine(txb->data);
+		tx->size = txb->len;
+		tx->unused = 0;
+
+		DPRINTK("First 4 characters sent by TPM-FE are "
+			"0x%02x 0x%02x 0x%02x 0x%02x\n",
+		        txb->data[0],txb->data[1],txb->data[2],txb->data[3]);
+
+		/* Get the granttable reference for this page. */
+		tx->ref = gnttab_claim_grant_reference(&gref_head);
+		if (tx->ref == -ENOSPC) {
+			spin_unlock_irq(&tp->tx_lock);
+			DPRINTK("Grant table claim reference failed in "
+				"func:%s line:%d file:%s\n",
+				__FUNCTION__, __LINE__, __FILE__);
+			return -ENOSPC;
+		}
+		gnttab_grant_foreign_access_ref(tx->ref,
+						tp->backend_id,
+						virt_to_mfn(txb->data),
+						0 /*RW*/);
+		wmb();
+	}
+
+	atomic_set(&tp->tx_busy, 1);
+	tp->tx_remember = remember;
+
+	mb();
+
+	notify_remote_via_irq(tp->irq);
+
+	spin_unlock_irq(&tp->tx_lock);
+	return offset;
+}
+
+
+static void tpmif_notify_upperlayer(struct tpm_private *tp)
+{
+	/* Notify upper layer about the state of the connection to the BE. */
+	vtpm_vd_status(tp->chip, (tp->is_connected
+				  ? TPM_VD_STATUS_CONNECTED
+				  : TPM_VD_STATUS_DISCONNECTED));
+}
+
+
+static void tpmif_set_connected_state(struct tpm_private *tp, u8 is_connected)
+{
+	/*
+	 * Don't notify upper layer if we are in suspend mode and
+	 * should disconnect - assumption is that we will resume
+	 * The mutex keeps apps from sending.
+	 */
+	if (is_connected == 0 && tp->is_suspended == 1)
+		return;
+
+	/*
+	 * Unlock the mutex if we are connected again
+	 * after being suspended - now resuming.
+	 * This also removes the suspend state.
+	 */
+	if (is_connected == 1 && tp->is_suspended == 1)
+		tpmfront_suspend_finish(tp);
+
+	if (is_connected != tp->is_connected) {
+		tp->is_connected = is_connected;
+		tpmif_notify_upperlayer(tp);
+	}
+}
+
+
+
+/* =================================================================
+ * Initialization function.
+ * =================================================================
+ */
+
+
+static int __init tpmif_init(void)
+{
+	struct tpm_private *tp;
+
+	if (is_initial_xendomain())
+		return -EPERM;
+
+	tp = tpm_private_get();
+	if (!tp)
+		return -ENOMEM;
+
+	IPRINTK("Initialising the vTPM driver.\n");
+	if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE,
+					  &gref_head) < 0) {
+		tpm_private_put();
+		return -EFAULT;
+	}
+
+	init_tpm_xenbus();
+	return 0;
+}
+
+
+module_init(tpmif_init);
+
+MODULE_LICENSE("Dual BSD/GPL");
--- head-2009-09-02.orig/drivers/ide/ide-lib.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/ide/ide-lib.c	2009-09-02 11:50:15.000000000 +0200
@@ -18,12 +18,12 @@ void ide_toggle_bounce(ide_drive_t *driv
 {
 	u64 addr = BLK_BOUNCE_HIGH;	/* dma64_addr_t */
 
-	if (!PCI_DMA_BUS_IS_PHYS) {
-		addr = BLK_BOUNCE_ANY;
-	} else if (on && drive->media == ide_disk) {
+	if (on && drive->media == ide_disk) {
 		struct device *dev = drive->hwif->dev;
 
-		if (dev && dev->dma_mask)
+		if (!PCI_DMA_BUS_IS_PHYS)
+			addr = BLK_BOUNCE_ANY;
+		else if (dev && dev->dma_mask)
 			addr = *dev->dma_mask;
 	}
 
--- head-2009-09-02.orig/drivers/oprofile/buffer_sync.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/oprofile/buffer_sync.c	2009-09-02 11:50:15.000000000 +0200
@@ -8,6 +8,10 @@
  * @author Barry Kasindorf
  * @author Robert Richter <robert.richter@amd.com>
  *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
  * This is the core of the buffer management. Each
  * CPU buffer is processed and entered into the
  * global event buffer. Such processing is necessary
@@ -42,6 +46,8 @@ static cpumask_var_t marked_cpus;
 static DEFINE_SPINLOCK(task_mortuary);
 static void process_task_mortuary(void);
 
+static int cpu_current_domain[NR_CPUS];
+
 /* Take ownership of the task struct and place it on the
  * list for processing. Only after two full buffer syncs
  * does the task eventually get freed, because by then
@@ -60,7 +66,6 @@ task_free_notify(struct notifier_block *
 	return NOTIFY_OK;
 }
 
-
 /* The task is on its way out. A sync of the buffer means we can catch
  * any remaining samples for this task.
  */
@@ -153,6 +158,11 @@ static void end_sync(void)
 int sync_start(void)
 {
 	int err;
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		cpu_current_domain[i] = COORDINATOR_DOMAIN;
+	}
 
 	if (!alloc_cpumask_var(&marked_cpus, GFP_KERNEL))
 		return -ENOMEM;
@@ -285,13 +295,29 @@ static void add_cpu_switch(int i)
 	last_cookie = INVALID_COOKIE;
 }
 
-static void add_kernel_ctx_switch(unsigned int in_kernel)
+static void add_cpu_mode_switch(unsigned int cpu_mode)
 {
 	add_event_entry(ESCAPE_CODE);
-	if (in_kernel)
+	switch (cpu_mode) {
+	case CPU_MODE_USER:
+		add_event_entry(USER_ENTER_SWITCH_CODE);
+		break;
+	case CPU_MODE_KERNEL:
 		add_event_entry(KERNEL_ENTER_SWITCH_CODE);
-	else
-		add_event_entry(KERNEL_EXIT_SWITCH_CODE);
+		break;
+	case CPU_MODE_XEN:
+		add_event_entry(XEN_ENTER_SWITCH_CODE);
+	  	break;
+	default:
+		break;
+	}
+}
+
+static void add_domain_switch(unsigned long domain_id)
+{
+	add_event_entry(ESCAPE_CODE);
+	add_event_entry(DOMAIN_SWITCH_CODE);
+	add_event_entry(domain_id);
 }
 
 static void
@@ -372,12 +398,12 @@ static inline void add_sample_entry(unsi
  * for later lookup from userspace. Return 0 on failure.
  */
 static int
-add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel)
+add_sample(struct mm_struct *mm, struct op_sample *s, int cpu_mode)
 {
 	unsigned long cookie;
 	off_t offset;
 
-	if (in_kernel) {
+	if (cpu_mode >= CPU_MODE_KERNEL) {
 		add_sample_entry(s->eip, s->event);
 		return 1;
 	}
@@ -502,9 +528,10 @@ void sync_buffer(int cpu)
 	unsigned long val;
 	struct task_struct *new;
 	unsigned long cookie = 0;
-	int in_kernel = 1;
+	int cpu_mode = CPU_MODE_KERNEL;
 	sync_buffer_state state = sb_buffer_start;
 	unsigned int i;
+	int domain_switch = 0;
 	unsigned long available;
 	unsigned long flags;
 	struct op_entry entry;
@@ -514,6 +541,11 @@ void sync_buffer(int cpu)
 
 	add_cpu_switch(cpu);
 
+	/* We need to assign the first samples in this CPU buffer to the
+	   same domain that we were processing at the last sync_buffer */
+	if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN)
+		add_domain_switch(cpu_current_domain[cpu]);
+
 	op_cpu_buffer_reset(cpu);
 	available = op_cpu_buffer_entries(cpu);
 
@@ -522,6 +554,13 @@ void sync_buffer(int cpu)
 		if (!sample)
 			break;
 
+		if (domain_switch) {
+			cpu_current_domain[cpu] = sample->eip;
+			add_domain_switch(sample->eip);
+			domain_switch = 0;
+			continue;
+		}
+
 		if (is_code(sample->eip)) {
 			flags = sample->event;
 			if (flags & TRACE_BEGIN) {
@@ -530,10 +569,10 @@ void sync_buffer(int cpu)
 			}
 			if (flags & KERNEL_CTX_SWITCH) {
 				/* kernel/userspace switch */
-				in_kernel = flags & IS_KERNEL;
+				cpu_mode = flags & CPU_MODE_MASK;
 				if (state == sb_buffer_start)
 					state = sb_sample_start;
-				add_kernel_ctx_switch(flags & IS_KERNEL);
+				add_cpu_mode_switch(cpu_mode);
 			}
 			if (flags & USER_CTX_SWITCH
 			    && op_cpu_buffer_get_data(&entry, &val)) {
@@ -546,16 +585,23 @@ void sync_buffer(int cpu)
 					cookie = get_exec_dcookie(mm);
 				add_user_ctx_switch(new, cookie);
 			}
+			if (flags & DOMAIN_SWITCH)
+				domain_switch = 1;
 			if (op_cpu_buffer_get_size(&entry))
 				add_data(&entry, mm);
 			continue;
 		}
 
+		if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
+			add_sample_entry(sample->eip, sample->event);
+			continue;
+		}
+
 		if (state < sb_bt_start)
 			/* ignore sample */
 			continue;
 
-		if (add_sample(mm, sample, in_kernel))
+		if (add_sample(mm, sample, cpu_mode))
 			continue;
 
 		/* ignore backtraces if failed to add a sample */
@@ -566,6 +612,10 @@ void sync_buffer(int cpu)
 	}
 	release_mm(mm);
 
+	/* We reset domain to COORDINATOR at each CPU switch */
+	if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN)
+		add_domain_switch(COORDINATOR_DOMAIN);
+
 	mark_done(cpu);
 
 	mutex_unlock(&buffer_mutex);
--- head-2009-09-02.orig/drivers/oprofile/cpu_buffer.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/oprofile/cpu_buffer.c	2009-09-02 11:50:15.000000000 +0200
@@ -8,6 +8,10 @@
  * @author Barry Kasindorf <barry.kasindorf@amd.com>
  * @author Robert Richter <robert.richter@amd.com>
  *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
  * Each CPU has a local buffer that stores PC value/event
  * pairs. We also log context switches when we notice them.
  * Eventually each CPU's buffer is processed into the global
@@ -55,6 +59,8 @@ static void wq_sync_buffer(struct work_s
 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
 static int work_enabled;
 
+static int32_t current_domain = COORDINATOR_DOMAIN;
+
 unsigned long oprofile_get_cpu_buffer_size(void)
 {
 	return oprofile_cpu_buffer_size;
@@ -99,7 +105,7 @@ int alloc_cpu_buffers(void)
 		struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i);
 
 		b->last_task = NULL;
-		b->last_is_kernel = -1;
+		b->last_cpu_mode = -1;
 		b->tracing = 0;
 		b->buffer_size = buffer_size;
 		b->sample_received = 0;
@@ -217,7 +223,7 @@ unsigned long op_cpu_buffer_entries(int 
 
 static int
 op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
-	    int is_kernel, struct task_struct *task)
+	    int cpu_mode, struct task_struct *task)
 {
 	struct op_entry entry;
 	struct op_sample *sample;
@@ -230,16 +236,15 @@ op_add_code(struct oprofile_cpu_buffer *
 		flags |= TRACE_BEGIN;
 
 	/* notice a switch from user->kernel or vice versa */
-	is_kernel = !!is_kernel;
-	if (cpu_buf->last_is_kernel != is_kernel) {
-		cpu_buf->last_is_kernel = is_kernel;
-		flags |= KERNEL_CTX_SWITCH;
-		if (is_kernel)
-			flags |= IS_KERNEL;
+	if (cpu_buf->last_cpu_mode != cpu_mode) {
+		cpu_buf->last_cpu_mode = cpu_mode;
+		flags |= KERNEL_CTX_SWITCH | cpu_mode;
 	}
 
 	/* notice a task switch */
-	if (cpu_buf->last_task != task) {
+	/* if not processing other domain samples */
+	if (cpu_buf->last_task != task &&
+	    current_domain == COORDINATOR_DOMAIN) {
 		cpu_buf->last_task = task;
 		flags |= USER_CTX_SWITCH;
 	}
@@ -288,14 +293,14 @@ op_add_sample(struct oprofile_cpu_buffer
 /*
  * This must be safe from any context.
  *
- * is_kernel is needed because on some architectures you cannot
+ * cpu_mode is needed because on some architectures you cannot
  * tell if you are in kernel or user space simply by looking at
- * pc. We tag this in the buffer by generating kernel enter/exit
- * events whenever is_kernel changes
+ * pc. We tag this in the buffer by generating kernel/user (and
+ * xen) enter events whenever cpu_mode changes
  */
 static int
 log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
-	   unsigned long backtrace, int is_kernel, unsigned long event)
+	   unsigned long backtrace, int cpu_mode, unsigned long event)
 {
 	cpu_buf->sample_received++;
 
@@ -304,7 +309,7 @@ log_sample(struct oprofile_cpu_buffer *c
 		return 0;
 	}
 
-	if (op_add_code(cpu_buf, backtrace, is_kernel, current))
+	if (op_add_code(cpu_buf, backtrace, cpu_mode, current))
 		goto fail;
 
 	if (op_add_sample(cpu_buf, pc, event))
@@ -444,6 +449,25 @@ fail:
 	return;
 }
 
+int oprofile_add_domain_switch(int32_t domain_id)
+{
+	struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
+
+	/* should have space for switching into and out of domain
+	   (2 slots each) plus one sample and one cpu mode switch */
+	if (((nr_available_slots(cpu_buf) < 6) &&
+	     (domain_id != COORDINATOR_DOMAIN)) ||
+	    (nr_available_slots(cpu_buf) < 2))
+		return 0;
+
+	add_code(cpu_buf, DOMAIN_SWITCH);
+	add_sample(cpu_buf, domain_id, 0);
+
+	current_domain = domain_id;
+
+	return 1;
+}
+
 /*
  * This serves to avoid cpu buffer overflow, and makes sure
  * the task mortuary progresses
--- head-2009-09-02.orig/drivers/oprofile/cpu_buffer.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/oprofile/cpu_buffer.h	2009-09-02 11:50:15.000000000 +0200
@@ -40,7 +40,7 @@ struct op_entry;
 struct oprofile_cpu_buffer {
 	unsigned long buffer_size;
 	struct task_struct *last_task;
-	int last_is_kernel;
+	int last_cpu_mode;
 	int tracing;
 	unsigned long sample_received;
 	unsigned long sample_lost_overflow;
@@ -62,7 +62,7 @@ static inline void op_cpu_buffer_reset(i
 {
 	struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu);
 
-	cpu_buf->last_is_kernel = -1;
+	cpu_buf->last_cpu_mode = -1;
 	cpu_buf->last_task = NULL;
 }
 
@@ -112,9 +112,13 @@ int op_cpu_buffer_get_data(struct op_ent
 }
 
 /* extra data flags */
-#define KERNEL_CTX_SWITCH	(1UL << 0)
-#define IS_KERNEL		(1UL << 1)
+#define CPU_MODE_USER		0
+#define CPU_MODE_KERNEL		1
+#define CPU_MODE_XEN		2
+#define CPU_MODE_MASK		3
 #define TRACE_BEGIN		(1UL << 2)
 #define USER_CTX_SWITCH		(1UL << 3)
+#define KERNEL_CTX_SWITCH	(1UL << 4)
+#define DOMAIN_SWITCH		(1UL << 5)
 
 #endif /* OPROFILE_CPU_BUFFER_H */
--- head-2009-09-02.orig/drivers/oprofile/event_buffer.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/oprofile/event_buffer.h	2009-09-02 11:50:15.000000000 +0200
@@ -30,6 +30,9 @@ void wake_up_buffer_waiter(void);
 #define INVALID_COOKIE ~0UL
 #define NO_COOKIE 0UL
 
+/* Constant used to refer to coordinator domain (Xen) */
+#define COORDINATOR_DOMAIN -1
+
 extern const struct file_operations event_buffer_fops;
 
 /* mutex between sync_cpu_buffers() and the
--- head-2009-09-02.orig/drivers/oprofile/oprof.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/oprofile/oprof.c	2009-09-02 11:50:15.000000000 +0200
@@ -5,6 +5,10 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #include <linux/kernel.h>
@@ -33,6 +37,32 @@ static DEFINE_MUTEX(start_mutex);
  */
 static int timer = 0;
 
+int oprofile_set_active(int active_domains[], unsigned int adomains)
+{
+	int err;
+
+	if (!oprofile_ops.set_active)
+		return -EINVAL;
+
+	mutex_lock(&start_mutex);
+	err = oprofile_ops.set_active(active_domains, adomains);
+	mutex_unlock(&start_mutex);
+	return err;
+}
+
+int oprofile_set_passive(int passive_domains[], unsigned int pdomains)
+{
+	int err;
+
+	if (!oprofile_ops.set_passive)
+		return -EINVAL;
+
+	mutex_lock(&start_mutex);
+	err = oprofile_ops.set_passive(passive_domains, pdomains);
+	mutex_unlock(&start_mutex);
+	return err;
+}
+
 int oprofile_setup(void)
 {
 	int err;
--- head-2009-09-02.orig/drivers/oprofile/oprof.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/oprofile/oprof.h	2009-09-02 11:50:15.000000000 +0200
@@ -36,4 +36,7 @@ void oprofile_timer_init(struct oprofile
 
 int oprofile_set_backtrace(unsigned long depth);
 
+int oprofile_set_active(int active_domains[], unsigned int adomains);
+int oprofile_set_passive(int passive_domains[], unsigned int pdomains);
+
 #endif /* OPROF_H */
--- head-2009-09-02.orig/drivers/oprofile/oprofile_files.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/drivers/oprofile/oprofile_files.c	2009-09-02 11:50:15.000000000 +0200
@@ -5,10 +5,16 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #include <linux/fs.h>
 #include <linux/oprofile.h>
+#include <asm/uaccess.h>
+#include <linux/ctype.h>
 
 #include "event_buffer.h"
 #include "oprofile_stats.h"
@@ -123,6 +129,195 @@ static const struct file_operations dump
 	.write		= dump_write,
 };
 
+#define TMPBUFSIZE 512
+
+static unsigned int adomains = 0;
+static int active_domains[MAX_OPROF_DOMAINS + 1];
+static DEFINE_MUTEX(adom_mutex);
+
+static ssize_t adomain_write(struct file * file, char const __user * buf,
+			     size_t count, loff_t * offset)
+{
+	char *tmpbuf;
+	char *startp, *endp;
+	int i;
+	unsigned long val;
+	ssize_t retval = count;
+
+	if (*offset)
+		return -EINVAL;
+	if (count > TMPBUFSIZE - 1)
+		return -EINVAL;
+
+	if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
+		return -ENOMEM;
+
+	if (copy_from_user(tmpbuf, buf, count)) {
+		kfree(tmpbuf);
+		return -EFAULT;
+	}
+	tmpbuf[count] = 0;
+
+	mutex_lock(&adom_mutex);
+
+	startp = tmpbuf;
+	/* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
+	for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
+		val = simple_strtoul(startp, &endp, 0);
+		if (endp == startp)
+			break;
+		while (ispunct(*endp) || isspace(*endp))
+			endp++;
+		active_domains[i] = val;
+		if (active_domains[i] != val)
+			/* Overflow, force error below */
+			i = MAX_OPROF_DOMAINS + 1;
+		startp = endp;
+	}
+	/* Force error on trailing junk */
+	adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
+
+	kfree(tmpbuf);
+
+	if (adomains > MAX_OPROF_DOMAINS
+	    || oprofile_set_active(active_domains, adomains)) {
+		adomains = 0;
+		retval = -EINVAL;
+	}
+
+	mutex_unlock(&adom_mutex);
+	return retval;
+}
+
+static ssize_t adomain_read(struct file * file, char __user * buf,
+			    size_t count, loff_t * offset)
+{
+	char * tmpbuf;
+	size_t len;
+	int i;
+	ssize_t retval;
+
+	if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
+		return -ENOMEM;
+
+	mutex_lock(&adom_mutex);
+
+	len = 0;
+	for (i = 0; i < adomains; i++)
+		len += snprintf(tmpbuf + len,
+				len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
+				"%u ", active_domains[i]);
+	WARN_ON(len > TMPBUFSIZE);
+	if (len != 0 && len <= TMPBUFSIZE)
+		tmpbuf[len-1] = '\n';
+
+	mutex_unlock(&adom_mutex);
+
+	retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
+
+	kfree(tmpbuf);
+	return retval;
+}
+
+
+static const struct file_operations active_domain_ops = {
+	.read		= adomain_read,
+	.write		= adomain_write,
+};
+
+static unsigned int pdomains = 0;
+static int passive_domains[MAX_OPROF_DOMAINS];
+static DEFINE_MUTEX(pdom_mutex);
+
+static ssize_t pdomain_write(struct file * file, char const __user * buf,
+			     size_t count, loff_t * offset)
+{
+	char *tmpbuf;
+	char *startp, *endp;
+	int i;
+	unsigned long val;
+	ssize_t retval = count;
+
+	if (*offset)
+		return -EINVAL;
+	if (count > TMPBUFSIZE - 1)
+		return -EINVAL;
+
+	if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
+		return -ENOMEM;
+
+	if (copy_from_user(tmpbuf, buf, count)) {
+		kfree(tmpbuf);
+		return -EFAULT;
+	}
+	tmpbuf[count] = 0;
+
+	mutex_lock(&pdom_mutex);
+
+	startp = tmpbuf;
+	/* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
+	for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
+		val = simple_strtoul(startp, &endp, 0);
+		if (endp == startp)
+			break;
+		while (ispunct(*endp) || isspace(*endp))
+			endp++;
+		passive_domains[i] = val;
+		if (passive_domains[i] != val)
+			/* Overflow, force error below */
+			i = MAX_OPROF_DOMAINS + 1;
+		startp = endp;
+	}
+	/* Force error on trailing junk */
+	pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
+
+	kfree(tmpbuf);
+
+	if (pdomains > MAX_OPROF_DOMAINS
+	    || oprofile_set_passive(passive_domains, pdomains)) {
+		pdomains = 0;
+		retval = -EINVAL;
+	}
+
+	mutex_unlock(&pdom_mutex);
+	return retval;
+}
+
+static ssize_t pdomain_read(struct file * file, char __user * buf,
+			    size_t count, loff_t * offset)
+{
+	char * tmpbuf;
+	size_t len;
+	int i;
+	ssize_t retval;
+
+	if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
+		return -ENOMEM;
+
+	mutex_lock(&pdom_mutex);
+
+	len = 0;
+	for (i = 0; i < pdomains; i++)
+		len += snprintf(tmpbuf + len,
+				len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
+				"%u ", passive_domains[i]);
+	WARN_ON(len > TMPBUFSIZE);
+	if (len != 0 && len <= TMPBUFSIZE)
+		tmpbuf[len-1] = '\n';
+
+	mutex_unlock(&pdom_mutex);
+
+	retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
+
+	kfree(tmpbuf);
+	return retval;
+}
+
+static const struct file_operations passive_domain_ops = {
+	.read		= pdomain_read,
+	.write		= pdomain_write,
+};
+
 void oprofile_create_files(struct super_block *sb, struct dentry *root)
 {
 	/* reinitialize default values */
@@ -132,6 +327,8 @@ void oprofile_create_files(struct super_
 
 	oprofilefs_create_file(sb, root, "enable", &enable_fops);
 	oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
+	oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
+	oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
 	oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
 	oprofilefs_create_ulong(sb, root, "buffer_size", &oprofile_buffer_size);
 	oprofilefs_create_ulong(sb, root, "buffer_watershed", &oprofile_buffer_watershed);
--- head-2009-09-02.orig/fs/aio.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/fs/aio.c	2009-09-02 11:50:15.000000000 +0200
@@ -36,6 +36,11 @@
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 
+#ifdef CONFIG_EPOLL
+#include <linux/poll.h>
+#include <linux/eventpoll.h>
+#endif
+
 #if DEBUG > 1
 #define dprintk		printk
 #else
@@ -1034,6 +1039,11 @@ put_rq:
 	if (waitqueue_active(&ctx->wait))
 		wake_up(&ctx->wait);
 
+#ifdef CONFIG_EPOLL
+	if (ctx->file && waitqueue_active(&ctx->poll_wait))
+		wake_up(&ctx->poll_wait);
+#endif
+
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 	return ret;
 }
@@ -1041,6 +1051,8 @@ put_rq:
 /* aio_read_evt
  *	Pull an event off of the ioctx's event ring.  Returns the number of 
  *	events fetched (0 or 1 ;-)
+ *	If ent parameter is 0, just returns the number of events that would
+ *	be fetched.
  *	FIXME: make this use cmpxchg.
  *	TODO: make the ringbuffer user mmap()able (requires FIXME).
  */
@@ -1063,13 +1075,18 @@ static int aio_read_evt(struct kioctx *i
 
 	head = ring->head % info->nr;
 	if (head != ring->tail) {
-		struct io_event *evp = aio_ring_event(info, head, KM_USER1);
-		*ent = *evp;
-		head = (head + 1) % info->nr;
-		smp_mb(); /* finish reading the event before updatng the head */
-		ring->head = head;
-		ret = 1;
-		put_aio_ring_event(evp, KM_USER1);
+		if (ent) { /* event requested */
+			struct io_event *evp =
+				aio_ring_event(info, head, KM_USER1);
+			*ent = *evp;
+			head = (head + 1) % info->nr;
+			/* finish reading the event before updatng the head */
+			smp_mb();
+			ring->head = head;
+			ret = 1;
+			put_aio_ring_event(evp, KM_USER1);
+		} else /* only need to know availability */
+			ret = 1;
 	}
 	spin_unlock(&info->ring_lock);
 
@@ -1254,6 +1271,13 @@ static void io_destroy(struct kioctx *io
 
 	aio_cancel_all(ioctx);
 	wait_for_all_aios(ioctx);
+#ifdef CONFIG_EPOLL
+	/* forget the poll file, but it's up to the user to close it */
+	if (ioctx->file) {
+		ioctx->file->private_data = 0;
+		ioctx->file = 0;
+	}
+#endif
 
 	/*
 	 * Wake up any waiters.  The setting of ctx->dead must be seen
@@ -1264,6 +1288,67 @@ static void io_destroy(struct kioctx *io
 	put_ioctx(ioctx);	/* once for the lookup */
 }
 
+#ifdef CONFIG_EPOLL
+
+static int aio_queue_fd_close(struct inode *inode, struct file *file)
+{
+	struct kioctx *ioctx = file->private_data;
+	if (ioctx) {
+		file->private_data = 0;
+		spin_lock_irq(&ioctx->ctx_lock);
+		ioctx->file = 0;
+		spin_unlock_irq(&ioctx->ctx_lock);
+	}
+	return 0;
+}
+
+static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
+{	unsigned int pollflags = 0;
+	struct kioctx *ioctx = file->private_data;
+
+	if (ioctx) {
+
+		spin_lock_irq(&ioctx->ctx_lock);
+		/* Insert inside our poll wait queue */
+		poll_wait(file, &ioctx->poll_wait, wait);
+
+		/* Check our condition */
+		if (aio_read_evt(ioctx, 0))
+			pollflags = POLLIN | POLLRDNORM;
+		spin_unlock_irq(&ioctx->ctx_lock);
+	}
+
+	return pollflags;
+}
+
+static const struct file_operations aioq_fops = {
+	.release	= aio_queue_fd_close,
+	.poll		= aio_queue_fd_poll
+};
+
+/* make_aio_fd:
+ *  Create a file descriptor that can be used to poll the event queue.
+ *  Based and piggybacked on the excellent epoll code.
+ */
+
+static int make_aio_fd(struct kioctx *ioctx)
+{
+	int error, fd;
+	struct inode *inode;
+	struct file *file;
+
+	error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
+	if (error)
+		return error;
+
+	/* associate the file with the IO context */
+	file->private_data = ioctx;
+	ioctx->file = file;
+	init_waitqueue_head(&ioctx->poll_wait);
+	return fd;
+}
+#endif
+
 /* sys_io_setup:
  *	Create an aio_context capable of receiving at least nr_events.
  *	ctxp must not point to an aio_context that already exists, and
@@ -1276,18 +1361,30 @@ static void io_destroy(struct kioctx *io
  *	resources are available.  May fail with -EFAULT if an invalid
  *	pointer is passed for ctxp.  Will fail with -ENOSYS if not
  *	implemented.
+ *
+ *	To request a selectable fd, the user context has to be initialized
+ *	to 1, instead of 0, and the return value is the fd.
+ *	This keeps the system call compatible, since a non-zero value
+ *	was not allowed so far.
  */
 SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
 {
 	struct kioctx *ioctx = NULL;
 	unsigned long ctx;
 	long ret;
+	int make_fd = 0;
 
 	ret = get_user(ctx, ctxp);
 	if (unlikely(ret))
 		goto out;
 
 	ret = -EINVAL;
+#ifdef CONFIG_EPOLL
+	if (ctx == 1) {
+		make_fd = 1;
+		ctx = 0;
+	}
+#endif
 	if (unlikely(ctx || nr_events == 0)) {
 		pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
 		         ctx, nr_events);
@@ -1298,8 +1395,12 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_e
 	ret = PTR_ERR(ioctx);
 	if (!IS_ERR(ioctx)) {
 		ret = put_user(ioctx->user_id, ctxp);
-		if (!ret)
-			return 0;
+#ifdef CONFIG_EPOLL
+		if (make_fd && ret >= 0)
+			ret = make_aio_fd(ioctx);
+#endif
+		if (ret >= 0)
+			return ret;
 
 		get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
 		io_destroy(ioctx);
--- head-2009-09-02.orig/fs/compat_ioctl.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/fs/compat_ioctl.c	2009-09-02 11:50:15.000000000 +0200
@@ -115,6 +115,13 @@
 #include <asm/fbio.h>
 #endif
 
+#ifdef CONFIG_XEN
+#include <xen/interface/xen.h>
+#include <xen/public/evtchn.h>
+#include <xen/public/privcmd.h>
+#include <xen/compat_ioctl.h>
+#endif
+
 static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd,
 			      unsigned long arg, struct file *f)
 {
@@ -2726,6 +2733,18 @@ IGNORE_IOCTL(FBIOGETCMAP32)
 IGNORE_IOCTL(FBIOSCURSOR32)
 IGNORE_IOCTL(FBIOGCURSOR32)
 #endif
+
+#ifdef CONFIG_XEN
+HANDLE_IOCTL(IOCTL_PRIVCMD_MMAP_32, privcmd_ioctl_32)
+HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_32, privcmd_ioctl_32)
+COMPATIBLE_IOCTL(IOCTL_PRIVCMD_HYPERCALL)
+COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_VIRQ)
+COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_INTERDOMAIN)
+COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_UNBOUND_PORT)
+COMPATIBLE_IOCTL(IOCTL_EVTCHN_UNBIND)
+COMPATIBLE_IOCTL(IOCTL_EVTCHN_NOTIFY)
+COMPATIBLE_IOCTL(IOCTL_EVTCHN_RESET)
+#endif
 };
 
 #define IOCTL_HASHSIZE 256
--- head-2009-09-02.orig/include/acpi/processor.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/include/acpi/processor.h	2009-09-02 11:50:15.000000000 +0200
@@ -17,6 +17,12 @@
 #define ACPI_PROCESSOR_MAX_THROTTLE	250	/* 25% */
 #define ACPI_PROCESSOR_MAX_DUTY_WIDTH	4
 
+#ifdef CONFIG_XEN
+#define NR_ACPI_CPUS			(NR_CPUS < 256 ? 256 : NR_CPUS)
+#else
+#define NR_ACPI_CPUS			NR_CPUS
+#endif /* CONFIG_XEN */
+
 #define ACPI_PDC_REVISION_ID		0x1
 
 #define ACPI_PSD_REV0_REVISION		0	/* Support for _PSD as in ACPI 3.0 */
@@ -42,6 +48,17 @@
 
 struct acpi_processor_cx;
 
+#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
+struct acpi_csd_package {
+	acpi_integer num_entries;
+	acpi_integer revision;
+	acpi_integer domain;
+	acpi_integer coord_type;
+	acpi_integer num_processors;
+	acpi_integer index;
+} __attribute__ ((packed));
+#endif
+
 struct acpi_power_register {
 	u8 descriptor;
 	u16 length;
@@ -74,6 +91,12 @@ struct acpi_processor_cx {
 	u32 power;
 	u32 usage;
 	u64 time;
+#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
+	/* Require raw information for external control logic */
+	struct acpi_power_register reg;
+	u32 csd_count;
+	struct acpi_csd_package *domain_info;
+#endif
 	struct acpi_processor_cx_policy promotion;
 	struct acpi_processor_cx_policy demotion;
 	char desc[ACPI_CX_DESC_LEN];
@@ -304,6 +327,9 @@ static inline void acpi_processor_ppc_ex
 {
 	return;
 }
+#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
+int acpi_processor_ppc_has_changed(struct acpi_processor *pr);
+#else
 static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr)
 {
 	static unsigned int printout = 1;
@@ -316,6 +342,7 @@ static inline int acpi_processor_ppc_has
 	}
 	return 0;
 }
+#endif				/* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
 #endif				/* CONFIG_CPU_FREQ */
 
 /* in processor_throttling.c */
@@ -353,4 +380,120 @@ static inline void acpi_thermal_cpufreq_
 }
 #endif
 
+/*
+ * Following are interfaces geared to external processor PM control
+ * logic like a VMM
+ */
+/* Events notified to external control logic */
+#define PROCESSOR_PM_INIT	1
+#define PROCESSOR_PM_CHANGE	2
+#define PROCESSOR_HOTPLUG	3
+
+/* Objects for the PM events */
+#define PM_TYPE_IDLE		0
+#define PM_TYPE_PERF		1
+#define PM_TYPE_THR		2
+#define PM_TYPE_MAX		3
+
+/* Processor hotplug events */
+#define HOTPLUG_TYPE_ADD	0
+#define HOTPLUG_TYPE_REMOVE	1
+
+#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
+struct processor_extcntl_ops {
+	/* Transfer processor PM events to external control logic */
+	int (*pm_ops[PM_TYPE_MAX])(struct acpi_processor *pr, int event);
+	/* Notify physical processor status to external control logic */
+	int (*hotplug)(struct acpi_processor *pr, int type);
+};
+extern const struct processor_extcntl_ops *processor_extcntl_ops;
+
+static inline int processor_cntl_external(void)
+{
+	return (processor_extcntl_ops != NULL);
+}
+
+static inline int processor_pm_external(void)
+{
+	return processor_cntl_external() &&
+		(processor_extcntl_ops->pm_ops[PM_TYPE_IDLE] != NULL);
+}
+
+static inline int processor_pmperf_external(void)
+{
+	return processor_cntl_external() &&
+		(processor_extcntl_ops->pm_ops[PM_TYPE_PERF] != NULL);
+}
+
+static inline int processor_pmthr_external(void)
+{
+	return processor_cntl_external() &&
+		(processor_extcntl_ops->pm_ops[PM_TYPE_THR] != NULL);
+}
+
+extern int processor_notify_external(struct acpi_processor *pr,
+			int event, int type);
+extern void processor_extcntl_init(void);
+extern int processor_extcntl_prepare(struct acpi_processor *pr);
+extern int acpi_processor_get_performance_info(struct acpi_processor *pr);
+extern int acpi_processor_get_psd(struct acpi_processor *pr);
+void arch_acpi_processor_init_extcntl(const struct processor_extcntl_ops **);
+#else
+static inline int processor_cntl_external(void) {return 0;}
+static inline int processor_pm_external(void) {return 0;}
+static inline int processor_pmperf_external(void) {return 0;}
+static inline int processor_pmthr_external(void) {return 0;}
+static inline int processor_notify_external(struct acpi_processor *pr,
+			int event, int type)
+{
+	return 0;
+}
+static inline void processor_extcntl_init(void) {}
+static inline int processor_extcntl_prepare(struct acpi_processor *pr)
+{
+	return 0;
+}
+#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
+
+#ifdef CONFIG_XEN
+static inline void xen_convert_pct_reg(struct xen_pct_register *xpct,
+	struct acpi_pct_register *apct)
+{
+	xpct->descriptor = apct->descriptor;
+	xpct->length     = apct->length;
+	xpct->space_id   = apct->space_id;
+	xpct->bit_width  = apct->bit_width;
+	xpct->bit_offset = apct->bit_offset;
+	xpct->reserved   = apct->reserved;
+	xpct->address    = apct->address;
+}
+
+static inline void xen_convert_pss_states(struct xen_processor_px *xpss,
+	struct acpi_processor_px *apss, int state_count)
+{
+	int i;
+	for(i=0; i<state_count; i++) {
+		xpss->core_frequency     = apss->core_frequency;
+		xpss->power              = apss->power;
+		xpss->transition_latency = apss->transition_latency;
+		xpss->bus_master_latency = apss->bus_master_latency;
+		xpss->control            = apss->control;
+		xpss->status             = apss->status;
+		xpss++;
+		apss++;
+	}
+}
+
+static inline void xen_convert_psd_pack(struct xen_psd_package *xpsd,
+	struct acpi_psd_package *apsd)
+{
+	xpsd->num_entries    = apsd->num_entries;
+	xpsd->revision       = apsd->revision;
+	xpsd->domain         = apsd->domain;
+	xpsd->coord_type     = apsd->coord_type;
+	xpsd->num_processors = apsd->num_processors;
+}
+
+#endif /* CONFIG_XEN */
+
 #endif
--- head-2009-09-02.orig/include/asm-generic/pci.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/include/asm-generic/pci.h	2009-09-02 11:50:15.000000000 +0200
@@ -30,7 +30,9 @@ pcibios_bus_to_resource(struct pci_dev *
 	res->end = region->end;
 }
 
+#ifndef pcibios_scan_all_fns
 #define pcibios_scan_all_fns(a, b)	0
+#endif
 
 #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
--- head-2009-09-02.orig/include/asm-generic/pgtable.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/include/asm-generic/pgtable.h	2009-09-02 11:50:15.000000000 +0200
@@ -99,6 +99,10 @@ static inline void ptep_set_wrprotect(st
 }
 #endif
 
+#ifndef arch_change_pte_range
+#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0
+#endif
+
 #ifndef __HAVE_ARCH_PTE_SAME
 #define pte_same(A,B)	(pte_val(A) == pte_val(B))
 #endif
--- head-2009-09-02.orig/include/linux/aio.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/include/linux/aio.h	2009-09-02 11:50:15.000000000 +0200
@@ -201,6 +201,12 @@ struct kioctx {
 
 	struct delayed_work	wq;
 
+#ifdef CONFIG_EPOLL
+	/* poll integration */
+	wait_queue_head_t       poll_wait;
+	struct file		*file;
+#endif
+
 	struct rcu_head		rcu_head;
 };
 
--- head-2009-09-02.orig/include/linux/highmem.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/include/linux/highmem.h	2009-09-02 11:50:15.000000000 +0200
@@ -130,12 +130,14 @@ alloc_zeroed_user_highpage_movable(struc
 	return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr);
 }
 
+#ifndef __HAVE_ARCH_CLEAR_HIGHPAGE
 static inline void clear_highpage(struct page *page)
 {
 	void *kaddr = kmap_atomic(page, KM_USER0);
 	clear_page(kaddr);
 	kunmap_atomic(kaddr, KM_USER0);
 }
+#endif
 
 static inline void zero_user_segments(struct page *page,
 	unsigned start1, unsigned end1,
@@ -189,6 +191,8 @@ static inline void copy_user_highpage(st
 
 #endif
 
+#ifndef __HAVE_ARCH_COPY_HIGHPAGE
+
 static inline void copy_highpage(struct page *to, struct page *from)
 {
 	char *vfrom, *vto;
@@ -200,4 +204,6 @@ static inline void copy_highpage(struct 
 	kunmap_atomic(vto, KM_USER1);
 }
 
+#endif
+
 #endif /* _LINUX_HIGHMEM_H */
--- head-2009-09-02.orig/include/linux/interrupt.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/include/linux/interrupt.h	2009-09-02 11:50:15.000000000 +0200
@@ -316,6 +316,12 @@ static inline int disable_irq_wake(unsig
 }
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
+#ifdef CONFIG_HAVE_IRQ_IGNORE_UNHANDLED
+int irq_ignore_unhandled(unsigned int irq);
+#else
+#define irq_ignore_unhandled(irq) 0
+#endif
+
 #ifndef __ARCH_SET_SOFTIRQ_PENDING
 #define set_softirq_pending(x) (local_softirq_pending() = (x))
 #define or_softirq_pending(x)  (local_softirq_pending() |= (x))
--- head-2009-09-02.orig/include/linux/kexec.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/include/linux/kexec.h	2009-09-02 11:50:15.000000000 +0200
@@ -46,6 +46,13 @@
 			    KEXEC_CORE_NOTE_NAME_BYTES +		\
 			    KEXEC_CORE_NOTE_DESC_BYTES )
 
+#ifndef KEXEC_ARCH_HAS_PAGE_MACROS
+#define kexec_page_to_pfn(page)  page_to_pfn(page)
+#define kexec_pfn_to_page(pfn)   pfn_to_page(pfn)
+#define kexec_virt_to_phys(addr) virt_to_phys(addr)
+#define kexec_phys_to_virt(addr) phys_to_virt(addr)
+#endif
+
 /*
  * This structure is used to hold the arguments that are used when loading
  * kernel binaries.
@@ -112,6 +119,12 @@ struct kimage {
 extern void machine_kexec(struct kimage *image);
 extern int machine_kexec_prepare(struct kimage *image);
 extern void machine_kexec_cleanup(struct kimage *image);
+#ifdef CONFIG_XEN
+extern int xen_machine_kexec_load(struct kimage *image);
+extern void xen_machine_kexec_unload(struct kimage *image);
+extern void xen_machine_kexec_setup_resources(void);
+extern void xen_machine_kexec_register_resources(struct resource *res);
+#endif
 extern asmlinkage long sys_kexec_load(unsigned long entry,
 					unsigned long nr_segments,
 					struct kexec_segment __user *segments,
--- head-2009-09-02.orig/include/linux/mm.h	2009-09-02 11:48:10.000000000 +0200
+++ head-2009-09-02/include/linux/mm.h	2009-09-02 11:50:15.000000000 +0200
@@ -104,6 +104,12 @@ extern unsigned int kobjsize(const void 
 #define VM_MIXEDMAP	0x10000000	/* Can contain "struct page" and pure PFN pages */
 #define VM_SAO		0x20000000	/* Strong Access Ordering (powerpc) */
 #define VM_PFN_AT_MMAP	0x40000000	/* PFNMAP vma that is fully mapped at mmap time */
+#ifdef CONFIG_XEN
+#define VM_FOREIGN	0x80000000	/* Has pages belonging to another VM */
+struct vm_foreign_map {
+	struct page **map;
+};
+#endif
 
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
@@ -197,6 +203,15 @@ struct vm_operations_struct {
 	 */
 	int (*access)(struct vm_area_struct *vma, unsigned long addr,
 		      void *buf, int len, int write);
+
+	/* Area-specific function for clearing the PTE at @ptep. Returns the
+	 * original value of @ptep. */
+	pte_t (*zap_pte)(struct vm_area_struct *vma,
+			 unsigned long addr, pte_t *ptep, int is_fullmm);
+
+	/* called before close() to indicate no more pages should be mapped */
+	void (*unmap)(struct vm_area_struct *area);
+
 #ifdef CONFIG_NUMA
 	/*
 	 * set_policy() op must add a reference to any non-NULL @new mempolicy
--- head-2009-09-02.orig/include/linux/oprofile.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/include/linux/oprofile.h	2009-09-02 11:50:15.000000000 +0200
@@ -16,6 +16,8 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <asm/atomic.h>
+
+#include <xen/interface/xenoprof.h>
  
 /* Each escaped entry is prefixed by ESCAPE_CODE
  * then one of the following codes, then the
@@ -28,14 +30,18 @@
 #define CPU_SWITCH_CODE			2
 #define COOKIE_SWITCH_CODE		3
 #define KERNEL_ENTER_SWITCH_CODE	4
-#define KERNEL_EXIT_SWITCH_CODE		5
+#define USER_ENTER_SWITCH_CODE		5
 #define MODULE_LOADED_CODE		6
 #define CTX_TGID_CODE			7
 #define TRACE_BEGIN_CODE		8
 #define TRACE_END_CODE			9
 #define XEN_ENTER_SWITCH_CODE		10
+#ifndef CONFIG_XEN
 #define SPU_PROFILING_CODE		11
 #define SPU_CTX_SWITCH_CODE		12
+#else
+#define DOMAIN_SWITCH_CODE		11
+#endif
 #define IBS_FETCH_CODE			13
 #define IBS_OP_CODE			14
 
@@ -49,6 +55,11 @@ struct oprofile_operations {
 	/* create any necessary configuration files in the oprofile fs.
 	 * Optional. */
 	int (*create_files)(struct super_block * sb, struct dentry * root);
+	/* setup active domains with Xen */
+	int (*set_active)(int *active_domains, unsigned int adomains);
+	/* setup passive domains with Xen */
+	int (*set_passive)(int *passive_domains, unsigned int pdomains);
+
 	/* Do any necessary interrupt setup. Optional. */
 	int (*setup)(void);
 	/* Do any necessary interrupt shutdown. Optional. */
@@ -107,6 +118,9 @@ void oprofile_add_pc(unsigned long pc, i
 /* add a backtrace entry, to be called from the ->backtrace callback */
 void oprofile_add_trace(unsigned long eip);
 
+/* add a domain switch entry */
+int oprofile_add_domain_switch(int32_t domain_id);
+
 
 /**
  * Create a file of the given name as a child of the given root, with
--- head-2009-09-02.orig/include/linux/page-flags.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/include/linux/page-flags.h	2009-09-02 11:50:15.000000000 +0200
@@ -104,6 +104,11 @@ enum pageflags {
 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 	PG_uncached,		/* Page has been mapped as uncached */
 #endif
+#ifdef CONFIG_XEN
+	PG_foreign,		/* Page is owned by foreign allocator. */
+	PG_netback,		/* Page is owned by netback */
+	PG_blkback,		/* Page is owned by blkback */
+#endif
 	__NR_PAGEFLAGS,
 
 	/* Filesystems */
@@ -317,6 +322,27 @@ static inline void SetPageUptodate(struc
 
 CLEARPAGEFLAG(Uptodate, uptodate)
 
+#define PageForeign(page)	test_bit(PG_foreign, &(page)->flags)
+#define SetPageForeign(_page, dtor) do {		\
+	set_bit(PG_foreign, &(_page)->flags);		\
+	BUG_ON((dtor) == (void (*)(struct page *, unsigned int))0); \
+	(_page)->index = (long)(dtor);			\
+} while (0)
+#define ClearPageForeign(page) do {			\
+	clear_bit(PG_foreign, &(page)->flags);		\
+	(page)->index = 0;				\
+} while (0)
+#define PageForeignDestructor(_page, order)		\
+	((void (*)(struct page *, unsigned int))(_page)->index)(_page, order)
+
+#define PageNetback(page)       test_bit(PG_netback, &(page)->flags)
+#define SetPageNetback(page)    set_bit(PG_netback, &(page)->flags)
+#define ClearPageNetback(page)  clear_bit(PG_netback, &(page)->flags)
+
+#define PageBlkback(page)       test_bit(PG_blkback, &(page)->flags)
+#define SetPageBlkback(page)    set_bit(PG_blkback, &(page)->flags)
+#define ClearPageBlkback(page)  clear_bit(PG_blkback, &(page)->flags)
+
 extern void cancel_dirty_page(struct page *page, unsigned int account_size);
 
 int test_clear_page_writeback(struct page *page);
@@ -393,6 +419,14 @@ PAGEFLAG_FALSE(MemError)
 #define __PG_MLOCKED		0
 #endif
 
+#if !defined(CONFIG_XEN)
+# define __PG_XEN		0
+#elif defined(CONFIG_X86)
+# define __PG_XEN		((1 << PG_pinned) | (1 << PG_foreign))
+#else
+# define __PG_XEN		(1 << PG_foreign)
+#endif
+
 /*
  * Flags checked when a page is freed.  Pages being freed should not have
  * these flags set.  It they are, there is a problem.
@@ -402,7 +436,7 @@ PAGEFLAG_FALSE(MemError)
 	 1 << PG_private | 1 << PG_private_2 | \
 	 1 << PG_buddy	 | 1 << PG_writeback | 1 << PG_reserved | \
 	 1 << PG_slab	 | 1 << PG_swapcache | 1 << PG_active | \
-	 1 << PG_unevictable | __PG_MLOCKED | 1 << PG_waiters)
+	 1 << PG_unevictable | __PG_MLOCKED | __PG_XEN | 1 << PG_waiters)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
--- head-2009-09-02.orig/include/linux/pci.h	2009-09-02 11:48:26.000000000 +0200
+++ head-2009-09-02/include/linux/pci.h	2009-09-02 11:50:15.000000000 +0200
@@ -866,6 +866,11 @@ static inline int pci_msi_enabled(void)
 {
 	return 0;
 }
+
+#ifdef CONFIG_XEN
+#define register_msi_get_owner(func) 0
+#define unregister_msi_get_owner(func) 0
+#endif
 #else
 extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec);
 extern void pci_msi_shutdown(struct pci_dev *dev);
@@ -878,6 +883,10 @@ extern void pci_disable_msix(struct pci_
 extern void msi_remove_pci_irq_vectors(struct pci_dev *dev);
 extern void pci_restore_msi_state(struct pci_dev *dev);
 extern int pci_msi_enabled(void);
+#ifdef CONFIG_XEN
+extern int register_msi_get_owner(int (*func)(struct pci_dev *dev));
+extern int unregister_msi_get_owner(int (*func)(struct pci_dev *dev));
+#endif
 #endif
 
 #ifndef CONFIG_PCIEASPM
--- head-2009-09-02.orig/include/linux/skbuff.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/include/linux/skbuff.h	2009-09-02 11:50:15.000000000 +0200
@@ -278,6 +278,8 @@ typedef unsigned char *sk_buff_data_t;
  *	@local_df: allow local fragmentation
  *	@cloned: Head may be cloned (check refcnt to be sure)
  *	@nohdr: Payload reference only, must not modify header
+ *	@proto_data_valid: Protocol data validated since arriving at localhost
+ *	@proto_csum_blank: Protocol csum must be added before leaving localhost
  *	@pkt_type: Packet class
  *	@fclone: skbuff clone status
  *	@ip_summed: Driver fed us an IP checksum
@@ -383,9 +385,13 @@ struct sk_buff {
 #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
 	__u8			do_not_encrypt:1;
 #endif
+#ifdef CONFIG_XEN
+	__u8			proto_data_valid:1,
+				proto_csum_blank:1;
+#endif
 	kmemcheck_bitfield_end(flags2);
 
-	/* 0/13/14 bit hole */
+	/* 0...15 bit hole */
 
 #ifdef CONFIG_NET_DMA
 	dma_cookie_t		dma_cookie;
--- head-2009-09-02.orig/include/linux/vermagic.h	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/include/linux/vermagic.h	2009-09-02 11:50:15.000000000 +0200
@@ -22,6 +22,11 @@
 #else
 #define MODULE_VERMAGIC_MODVERSIONS ""
 #endif
+#ifdef CONFIG_XEN
+#define MODULE_VERMAGIC_XEN "Xen "
+#else
+#define MODULE_VERMAGIC_XEN
+#endif
 #ifndef MODULE_ARCH_VERMAGIC
 #define MODULE_ARCH_VERMAGIC ""
 #endif
@@ -30,5 +35,5 @@
 	UTS_RELEASE " "							\
 	MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT 			\
 	MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS	\
-	MODULE_ARCH_VERMAGIC
+	MODULE_VERMAGIC_XEN MODULE_ARCH_VERMAGIC
 
--- head-2009-09-02.orig/kernel/irq/spurious.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/kernel/irq/spurious.c	2009-09-02 11:50:15.000000000 +0200
@@ -235,7 +235,7 @@ void note_interrupt(unsigned int irq, st
 		 */
 		if (time_after(jiffies, desc->last_unhandled + HZ/10))
 			desc->irqs_unhandled = 1;
-		else
+		else if (!irq_ignore_unhandled(irq))
 			desc->irqs_unhandled++;
 		desc->last_unhandled = jiffies;
 		if (unlikely(action_ret != IRQ_NONE))
--- head-2009-09-02.orig/kernel/kexec.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/kernel/kexec.c	2009-09-02 11:50:15.000000000 +0200
@@ -360,13 +360,26 @@ static int kimage_is_destination_range(s
 	return 0;
 }
 
-static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
+static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order, unsigned long limit)
 {
 	struct page *pages;
 
 	pages = alloc_pages(gfp_mask, order);
 	if (pages) {
 		unsigned int count, i;
+#ifdef CONFIG_XEN
+		int address_bits;
+
+		if (limit == ~0UL)
+			address_bits = BITS_PER_LONG;
+		else
+			address_bits = long_log2(limit);
+
+		if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) {
+			__free_pages(pages, order);
+			return NULL;
+		}
+#endif
 		pages->mapping = NULL;
 		set_page_private(pages, order);
 		count = 1 << order;
@@ -430,10 +443,10 @@ static struct page *kimage_alloc_normal_
 	do {
 		unsigned long pfn, epfn, addr, eaddr;
 
-		pages = kimage_alloc_pages(GFP_KERNEL, order);
+		pages = kimage_alloc_pages(GFP_KERNEL, order, KEXEC_CONTROL_MEMORY_LIMIT);
 		if (!pages)
 			break;
-		pfn   = page_to_pfn(pages);
+		pfn   = kexec_page_to_pfn(pages);
 		epfn  = pfn + count;
 		addr  = pfn << PAGE_SHIFT;
 		eaddr = epfn << PAGE_SHIFT;
@@ -467,6 +480,7 @@ static struct page *kimage_alloc_normal_
 	return pages;
 }
 
+#ifndef CONFIG_XEN
 static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
 						      unsigned int order)
 {
@@ -520,7 +534,7 @@ static struct page *kimage_alloc_crash_c
 		}
 		/* If I don't overlap any segments I have found my hole! */
 		if (i == image->nr_segments) {
-			pages = pfn_to_page(hole_start >> PAGE_SHIFT);
+			pages = kexec_pfn_to_page(hole_start >> PAGE_SHIFT);
 			break;
 		}
 	}
@@ -547,6 +561,13 @@ struct page *kimage_alloc_control_pages(
 
 	return pages;
 }
+#else /* !CONFIG_XEN */
+struct page *kimage_alloc_control_pages(struct kimage *image,
+					 unsigned int order)
+{
+	return kimage_alloc_normal_control_pages(image, order);
+}
+#endif
 
 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
 {
@@ -562,7 +583,7 @@ static int kimage_add_entry(struct kimag
 			return -ENOMEM;
 
 		ind_page = page_address(page);
-		*image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
+		*image->entry = kexec_virt_to_phys(ind_page) | IND_INDIRECTION;
 		image->entry = ind_page;
 		image->last_entry = ind_page +
 				      ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
@@ -621,13 +642,13 @@ static void kimage_terminate(struct kima
 #define for_each_kimage_entry(image, ptr, entry) \
 	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
 		ptr = (entry & IND_INDIRECTION)? \
-			phys_to_virt((entry & PAGE_MASK)): ptr +1)
+			kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1)
 
 static void kimage_free_entry(kimage_entry_t entry)
 {
 	struct page *page;
 
-	page = pfn_to_page(entry >> PAGE_SHIFT);
+	page = kexec_pfn_to_page(entry >> PAGE_SHIFT);
 	kimage_free_pages(page);
 }
 
@@ -639,6 +660,10 @@ static void kimage_free(struct kimage *i
 	if (!image)
 		return;
 
+#ifdef CONFIG_XEN
+	xen_machine_kexec_unload(image);
+#endif
+
 	kimage_free_extra_pages(image);
 	for_each_kimage_entry(image, ptr, entry) {
 		if (entry & IND_INDIRECTION) {
@@ -714,7 +739,7 @@ static struct page *kimage_alloc_page(st
 	 * have a match.
 	 */
 	list_for_each_entry(page, &image->dest_pages, lru) {
-		addr = page_to_pfn(page) << PAGE_SHIFT;
+		addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
 		if (addr == destination) {
 			list_del(&page->lru);
 			return page;
@@ -725,16 +750,16 @@ static struct page *kimage_alloc_page(st
 		kimage_entry_t *old;
 
 		/* Allocate a page, if we run out of memory give up */
-		page = kimage_alloc_pages(gfp_mask, 0);
+		page = kimage_alloc_pages(gfp_mask, 0, KEXEC_SOURCE_MEMORY_LIMIT);
 		if (!page)
 			return NULL;
 		/* If the page cannot be used file it away */
-		if (page_to_pfn(page) >
+		if (kexec_page_to_pfn(page) >
 				(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
 			list_add(&page->lru, &image->unuseable_pages);
 			continue;
 		}
-		addr = page_to_pfn(page) << PAGE_SHIFT;
+		addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
 
 		/* If it is the destination page we want use it */
 		if (addr == destination)
@@ -757,7 +782,7 @@ static struct page *kimage_alloc_page(st
 			struct page *old_page;
 
 			old_addr = *old & PAGE_MASK;
-			old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
+			old_page = kexec_pfn_to_page(old_addr >> PAGE_SHIFT);
 			copy_highpage(page, old_page);
 			*old = addr | (*old & ~PAGE_MASK);
 
@@ -813,7 +838,7 @@ static int kimage_load_normal_segment(st
 			result  = -ENOMEM;
 			goto out;
 		}
-		result = kimage_add_page(image, page_to_pfn(page)
+		result = kimage_add_page(image, kexec_page_to_pfn(page)
 								<< PAGE_SHIFT);
 		if (result < 0)
 			goto out;
@@ -845,6 +870,7 @@ out:
 	return result;
 }
 
+#ifndef CONFIG_XEN
 static int kimage_load_crash_segment(struct kimage *image,
 					struct kexec_segment *segment)
 {
@@ -867,7 +893,7 @@ static int kimage_load_crash_segment(str
 		char *ptr;
 		size_t uchunk, mchunk;
 
-		page = pfn_to_page(maddr >> PAGE_SHIFT);
+		page = kexec_pfn_to_page(maddr >> PAGE_SHIFT);
 		if (!page) {
 			result  = -ENOMEM;
 			goto out;
@@ -916,6 +942,13 @@ static int kimage_load_segment(struct ki
 
 	return result;
 }
+#else /* CONFIG_XEN */
+static int kimage_load_segment(struct kimage *image,
+				struct kexec_segment *segment)
+{
+	return kimage_load_normal_segment(image, segment);
+}
+#endif
 
 /*
  * Exec Kernel system call: for obvious reasons only root may call it.
@@ -1019,6 +1052,13 @@ SYSCALL_DEFINE4(kexec_load, unsigned lon
 		}
 		kimage_terminate(image);
 	}
+#ifdef CONFIG_XEN
+	if (image) {
+		result = xen_machine_kexec_load(image);
+		if (result)
+			goto out;
+	}
+#endif
 	/* Install the new kernel, and  Uninstall the old */
 	image = xchg(dest_image, image);
 
--- head-2009-09-02.orig/kernel/sysctl.c	2009-08-25 12:12:01.000000000 +0200
+++ head-2009-09-02/kernel/sysctl.c	2009-09-02 11:50:15.000000000 +0200
@@ -828,7 +828,7 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
-#if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
+#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) && !defined(CONFIG_ACPI_PV_SLEEP)
 	{
 		.procname	= "acpi_video_flags",
 		.data		= &acpi_realmode_flags,
--- head-2009-09-02.orig/mm/memory.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/mm/memory.c	2009-09-02 11:50:15.000000000 +0200
@@ -494,6 +494,12 @@ struct page *vm_normal_page(struct vm_ar
 {
 	unsigned long pfn = pte_pfn(pte);
 
+#if defined(CONFIG_XEN) && defined(CONFIG_X86)
+	/* XEN: Covers user-space grant mappings (even of local pages). */
+	if (unlikely(vma->vm_flags & VM_FOREIGN))
+		return NULL;
+#endif
+
 	if (HAVE_PTE_SPECIAL) {
 		if (likely(!pte_special(pte)))
 			goto check_pfn;
@@ -521,6 +527,9 @@ struct page *vm_normal_page(struct vm_ar
 
 check_pfn:
 	if (unlikely(pfn > highest_memmap_pfn)) {
+#ifdef CONFIG_XEN
+		if (!(vma->vm_flags & VM_RESERVED))
+#endif
 		print_bad_pte(vma, addr, pte, NULL);
 		return NULL;
 	}
@@ -803,8 +812,12 @@ static unsigned long zap_pte_range(struc
 				     page->index > details->last_index))
 					continue;
 			}
-			ptent = ptep_get_and_clear_full(mm, addr, pte,
-							tlb->fullmm);
+			if (unlikely(vma->vm_ops && vma->vm_ops->zap_pte))
+				ptent = vma->vm_ops->zap_pte(vma, addr, pte,
+							     tlb->fullmm);
+			else
+				ptent = ptep_get_and_clear_full(mm, addr, pte,
+								tlb->fullmm);
 			tlb_remove_tlb_entry(tlb, pte, addr);
 			if (unlikely(!page))
 				continue;
@@ -1064,6 +1077,7 @@ unsigned long zap_page_range(struct vm_a
 		tlb_finish_mmu(tlb, address, end);
 	return end;
 }
+EXPORT_SYMBOL(zap_page_range);
 
 /**
  * zap_vma_ptes - remove ptes mapping the vma
@@ -1274,6 +1288,28 @@ int __get_user_pages(struct task_struct 
 			continue;
 		}
 
+#ifdef CONFIG_XEN
+		if (vma && (vma->vm_flags & VM_FOREIGN)) {
+			struct vm_foreign_map *foreign_map =
+				vma->vm_private_data;
+			struct page **map = foreign_map->map;
+			int offset = (start - vma->vm_start) >> PAGE_SHIFT;
+			if (map[offset] != NULL) {
+			        if (pages) {
+			                struct page *page = map[offset];
+
+					pages[i] = page;
+					get_page(page);
+				}
+				if (vmas)
+					vmas[i] = vma;
+				i++;
+				start += PAGE_SIZE;
+				len--;
+				continue;
+			}
+		}
+#endif
 		if (!vma ||
 		    (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 		    (!ignore && !(vm_flags & vma->vm_flags)))
--- head-2009-09-02.orig/mm/mmap.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/mm/mmap.c	2009-09-02 11:50:15.000000000 +0200
@@ -1842,6 +1842,12 @@ static void unmap_region(struct mm_struc
 	tlb_finish_mmu(tlb, start, end);
 }
 
+static inline void unmap_vma(struct vm_area_struct *vma)
+{
+	if (unlikely(vma->vm_ops && vma->vm_ops->unmap))
+		vma->vm_ops->unmap(vma);
+}
+
 /*
  * Create a list of vma's touched by the unmap, removing them from the mm's
  * vma list as we go..
@@ -1857,6 +1863,7 @@ detach_vmas_to_be_unmapped(struct mm_str
 	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
 	do {
 		rb_erase(&vma->vm_rb, &mm->mm_rb);
+		unmap_vma(vma);
 		mm->map_count--;
 		tail_vma = vma;
 		vma = vma->vm_next;
@@ -2157,6 +2164,9 @@ void exit_mmap(struct mm_struct *mm)
 
 	arch_exit_mmap(mm);
 
+	for (vma = mm->mmap; vma; vma = vma->vm_next)
+		unmap_vma(vma);
+
 	vma = mm->mmap;
 	if (!vma)	/* Can happen if dup_mmap() received an OOM */
 		return;
--- head-2009-09-02.orig/mm/mprotect.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/mm/mprotect.c	2009-09-02 11:50:15.000000000 +0200
@@ -91,6 +91,8 @@ static inline void change_pmd_range(stru
 		next = pmd_addr_end(addr, end);
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
+		if (arch_change_pte_range(mm, pmd, addr, next, newprot))
+			continue;
 		change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
 	} while (pmd++, addr = next, addr != end);
 }
--- head-2009-09-02.orig/mm/page_alloc.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/mm/page_alloc.c	2009-09-02 11:50:15.000000000 +0200
@@ -559,6 +559,13 @@ static void __free_pages_ok(struct page 
 	int bad = 0;
 	int wasMlocked = TestClearPageMlocked(page);
 
+#ifdef CONFIG_XEN
+	if (PageForeign(page)) {
+		PageForeignDestructor(page, order);
+		return;
+	}
+#endif
+
 	kmemcheck_free_shadow(page, order);
 
 	for (i = 0 ; i < (1 << order) ; ++i)
@@ -1026,6 +1033,13 @@ static void free_hot_cold_page(struct pa
 	unsigned long flags;
 	int wasMlocked = TestClearPageMlocked(page);
 
+#ifdef CONFIG_XEN
+	if (PageForeign(page)) {
+		PageForeignDestructor(page, 0);
+		return;
+	}
+#endif
+
 	kmemcheck_free_shadow(page, 0);
 
 	if (PageAnon(page))
--- head-2009-09-02.orig/net/core/dev.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/net/core/dev.c	2009-09-02 11:50:15.000000000 +0200
@@ -136,6 +136,12 @@
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 
+#ifdef CONFIG_XEN
+#include <net/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#endif
+
 /*
  *	The list of packet types we will receive (as opposed to discard)
  *	and the routines to invoke.
@@ -1791,6 +1797,42 @@ static struct netdev_queue *dev_pick_tx(
 	return netdev_get_tx_queue(dev, queue_index);
 }
 
+#ifdef CONFIG_XEN
+inline int skb_checksum_setup(struct sk_buff *skb)
+{
+	if (skb->proto_csum_blank) {
+		if (skb->protocol != htons(ETH_P_IP))
+			goto out;
+		skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
+		if (skb->h.raw >= skb->tail)
+			goto out;
+		switch (skb->nh.iph->protocol) {
+		case IPPROTO_TCP:
+			skb->csum = offsetof(struct tcphdr, check);
+			break;
+		case IPPROTO_UDP:
+			skb->csum = offsetof(struct udphdr, check);
+			break;
+		default:
+			if (net_ratelimit())
+				printk(KERN_ERR "Attempting to checksum a non-"
+				       "TCP/UDP packet, dropping a protocol"
+				       " %d packet", skb->nh.iph->protocol);
+			goto out;
+		}
+		if ((skb->h.raw + skb->csum + 2) > skb->tail)
+			goto out;
+		skb->ip_summed = CHECKSUM_HW;
+		skb->proto_csum_blank = 0;
+	}
+	return 0;
+out:
+	return -EPROTO;
+}
+#else
+inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
+#endif
+
 /**
  *	dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
@@ -1823,6 +1865,12 @@ int dev_queue_xmit(struct sk_buff *skb)
 	struct Qdisc *q;
 	int rc = -ENOMEM;
 
+ 	/* If a checksum-deferred packet is forwarded to a device that needs a
+ 	 * checksum, correct the pointers and force checksumming.
+ 	 */
+ 	if (skb_checksum_setup(skb))
+ 		goto out_kfree_skb;
+
 	/* GSO will handle the following emulations directly. */
 	if (netif_needs_gso(dev, skb))
 		goto gso;
@@ -2292,6 +2340,19 @@ int netif_receive_skb(struct sk_buff *sk
 	}
 #endif
 
+#ifdef CONFIG_XEN
+	switch (skb->ip_summed) {
+	case CHECKSUM_UNNECESSARY:
+		skb->proto_data_valid = 1;
+		break;
+	case CHECKSUM_HW:
+		/* XXX Implement me. */
+	default:
+		skb->proto_data_valid = 0;
+		break;
+	}
+#endif
+
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
 		    ptype->dev == orig_dev) {
@@ -5672,6 +5733,7 @@ EXPORT_SYMBOL(unregister_netdevice_notif
 EXPORT_SYMBOL(net_enable_timestamp);
 EXPORT_SYMBOL(net_disable_timestamp);
 EXPORT_SYMBOL(dev_get_flags);
+EXPORT_SYMBOL(skb_checksum_setup);
 
 EXPORT_SYMBOL(dev_load);
 
--- head-2009-09-02.orig/net/core/skbuff.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/net/core/skbuff.c	2009-09-02 11:50:15.000000000 +0200
@@ -584,6 +584,10 @@ static struct sk_buff *__skb_clone(struc
 	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
 	n->cloned = 1;
 	n->nohdr = 0;
+#ifdef CONFIG_XEN
+	C(proto_data_valid);
+	C(proto_csum_blank);
+#endif
 	n->destructor = NULL;
 	C(tail);
 	C(end);
--- head-2009-09-02.orig/net/ipv4/netfilter/nf_nat_proto_tcp.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/net/ipv4/netfilter/nf_nat_proto_tcp.c	2009-09-02 11:50:15.000000000 +0200
@@ -75,6 +75,9 @@ tcp_manip_pkt(struct sk_buff *skb,
 	if (hdrsize < sizeof(*hdr))
 		return true;
 
+	if (skb_checksum_setup(skb))
+		return false;
+
 	inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
 	inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
 	return true;
--- head-2009-09-02.orig/net/ipv4/netfilter/nf_nat_proto_udp.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/net/ipv4/netfilter/nf_nat_proto_udp.c	2009-09-02 11:50:15.000000000 +0200
@@ -60,6 +60,10 @@ udp_manip_pkt(struct sk_buff *skb,
 		newport = tuple->dst.u.udp.port;
 		portptr = &hdr->dest;
 	}
+
+	if (skb_checksum_setup(skb))
+		return false;
+
 	if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 		inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
 		inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
--- head-2009-09-02.orig/net/ipv4/xfrm4_output.c	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/net/ipv4/xfrm4_output.c	2009-09-02 11:50:15.000000000 +0200
@@ -81,7 +81,7 @@ static int xfrm4_output_finish(struct sk
 #endif
 
 	skb->protocol = htons(ETH_P_IP);
-	return xfrm_output(skb);
+	return skb_checksum_setup(skb) ?: xfrm_output(skb);
 }
 
 int xfrm4_output(struct sk_buff *skb)
--- head-2009-09-02.orig/scripts/Makefile.build	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/scripts/Makefile.build	2009-09-02 11:50:15.000000000 +0200
@@ -76,6 +76,21 @@ ifndef obj
 $(warning kbuild: Makefile.build is included improperly)
 endif
 
+ifeq ($(CONFIG_XEN),y)
+Makefile.xen := $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD),$(objtree)/scripts)/Makefile.xen
+$(Makefile.xen): $(srctree)/scripts/Makefile.xen.awk $(srctree)/scripts/Makefile.build
+	@echo '  Updating $@'
+	$(if $(shell echo a | $(AWK) '{ print gensub(/a/, "AA", "g"); }'),\
+        ,$(error 'Your awk program does not define gensub.  Use gawk or another awk with gensub'))
+	@$(AWK) -f $< $(filter-out $<,$^) >$@
+
+xen-src-single-used-m	:= $(patsubst $(srctree)/%,%,$(wildcard $(addprefix $(srctree)/,$(single-used-m:.o=-xen.c))))
+xen-single-used-m	:= $(xen-src-single-used-m:-xen.c=.o)
+single-used-m		:= $(filter-out $(xen-single-used-m),$(single-used-m))
+
+-include $(Makefile.xen)
+endif
+
 # ===========================================================================
 
 ifneq ($(strip $(lib-y) $(lib-m) $(lib-n) $(lib-)),)
--- head-2009-09-02.orig/scripts/Makefile.lib	2009-09-02 11:33:02.000000000 +0200
+++ head-2009-09-02/scripts/Makefile.lib	2009-09-02 11:50:15.000000000 +0200
@@ -22,6 +22,12 @@ obj-m := $(filter-out $(obj-y),$(obj-m))
 
 lib-y := $(filter-out $(obj-y), $(sort $(lib-y) $(lib-m)))
 
+# Remove objects forcibly disabled
+
+obj-y := $(filter-out $(disabled-obj-y),$(obj-y))
+obj-m := $(filter-out $(disabled-obj-y),$(obj-m))
+lib-y := $(filter-out $(disabled-obj-y),$(lib-y))
+
 
 # Handle objects in subdirs
 # ---------------------------------------------------------------------------
kernel / kernel-source

Source Code

Files