Blob Blame History Raw
From: www.kernel.org
Subject: Update to 2.6.22
Patch-mainline: 2.6.22

Automatically created from "patches.kernel.org/patch-2.6.22" by xen-port-patches.py

Acked-by: jbeulich@novell.com

--- 12.2.orig/arch/x86/Kconfig	2012-02-08 11:31:09.000000000 +0100
+++ 12.2/arch/x86/Kconfig	2012-04-10 16:15:32.000000000 +0200
@@ -1670,7 +1670,7 @@ config PHYSICAL_START
 
 config RELOCATABLE
 	bool "Build a relocatable kernel"
-	depends on !X86_XEN
+	depends on !XEN
 	default y
 	---help---
 	  This builds a kernel image that retains relocation information
@@ -1729,7 +1729,6 @@ config COMPAT_VDSO
 	def_bool y
 	prompt "Compat VDSO support"
 	depends on X86_32 || IA32_EMULATION
-	depends on !X86_XEN
 	---help---
 	  Map the 32-bit VDSO to the predictable old-style address too.
 
@@ -1949,6 +1948,7 @@ config PCI
 	bool "PCI support"
 	default y
 	select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)
+	select ARCH_SUPPORTS_MSI if (XEN_UNPRIVILEGED_GUEST && XEN_PCIDEV_FRONTEND)
 	---help---
 	  Find out whether you have a PCI motherboard. PCI is the name of a
 	  bus system, i.e. the way the CPU talks to the other stuff inside
--- 12.2.orig/arch/x86/kernel/Makefile	2012-04-10 16:14:45.000000000 +0200
+++ 12.2/arch/x86/kernel/Makefile	2012-04-10 16:15:27.000000000 +0200
@@ -123,4 +123,4 @@ endif
 disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8237.o i8253.o i8259_$(BITS).o \
 	reboot.o smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o
 disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o
-%/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
+%/head_64.o %/head_64.s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
--- 12.2.orig/arch/x86/kernel/apic/apic-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/apic/apic-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -19,7 +19,6 @@
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/bootmem.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/mc146818rtc.h>
 #include <linux/kernel_stat.h>
--- 12.2.orig/arch/x86/kernel/asm-offsets_32.c	2012-02-08 11:30:59.000000000 +0100
+++ 12.2/arch/x86/kernel/asm-offsets_32.c	2012-02-08 11:31:30.000000000 +0100
@@ -69,11 +69,6 @@ void foo(void)
 	DEFINE(SYSENTER_stack_sp0, 0);
 #endif
 
-#ifdef CONFIG_XEN
-	BLANK();
-	OFFSET(XEN_START_mfn_list, start_info, mfn_list);
-#endif
-
 #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
 	BLANK();
 	OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
--- 12.2.orig/arch/x86/kernel/cpu/common-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/cpu/common-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -22,16 +22,40 @@
 #define phys_pkg_id(a,b) a
 #endif
 #endif
-#include <asm/pda.h>
 #include <asm/hypervisor.h>
 
 #include "cpu.h"
 
-DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
-EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
+DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
+	[GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 },
+	[GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 },
+	[GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 },
+	[GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 },
+#ifndef CONFIG_XEN
+	/*
+	 * Segments used for calling PnP BIOS have byte granularity.
+	 * They code segments and data segments have fixed 64k limits,
+	 * the transfer segment sizes are set at run time.
+	 */
+	[GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
+	[GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */
+	[GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */
+	[GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */
+	[GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */
+	/*
+	 * The APM segments have byte granularity and their bases
+	 * are set at run time.  All have 64k limits.
+	 */
+	[GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
+	/* 16-bit code */
+	[GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 },
+	[GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */
 
-struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(_cpu_pda);
+	[GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 },
+#endif
+	[GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 },
+} };
+EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
 
 static int cachesize_override __cpuinitdata = -1;
 static int disable_x86_fxsr __cpuinitdata;
@@ -375,7 +399,7 @@ __setup("serialnumber", x86_serial_nr_se
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  */
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 {
 	int i;
 
@@ -486,15 +510,22 @@ void __cpuinit identify_cpu(struct cpuin
 
 	/* Init Machine Check Exception if available. */
 	mcheck_init(c);
+}
 
-	if (c == &boot_cpu_data)
-		sysenter_setup();
+void __init identify_boot_cpu(void)
+{
+	identify_cpu(&boot_cpu_data);
+	sysenter_setup();
 	enable_sep_cpu();
+	mtrr_bp_init();
+}
 
-	if (c == &boot_cpu_data)
-		mtrr_bp_init();
-	else
-		mtrr_ap_init();
+void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
+{
+	BUG_ON(c == &boot_cpu_data);
+	identify_cpu(c);
+	enable_sep_cpu();
+	mtrr_ap_init();
 }
 
 #ifdef CONFIG_X86_HT
@@ -608,136 +639,47 @@ void __init early_cpu_init(void)
 #endif
 }
 
-/* Make sure %gs is initialized properly in idle threads */
+/* Make sure %fs is initialized properly in idle threads */
 struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
 {
 	memset(regs, 0, sizeof(struct pt_regs));
-	regs->xfs = __KERNEL_PDA;
+	regs->xfs = __KERNEL_PERCPU;
 	return regs;
 }
 
-static __cpuinit int alloc_gdt(int cpu)
-{
-	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
-	struct desc_struct *gdt;
-	struct i386_pda *pda;
-
-	gdt = (struct desc_struct *)cpu_gdt_descr->address;
-	pda = cpu_pda(cpu);
-
-	/*
-	 * This is a horrible hack to allocate the GDT.  The problem
-	 * is that cpu_init() is called really early for the boot CPU
-	 * (and hence needs bootmem) but much later for the secondary
-	 * CPUs, when bootmem will have gone away
-	 */
-	if (NODE_DATA(0)->bdata->node_bootmem_map) {
-		BUG_ON(gdt != NULL || pda != NULL);
-
-		gdt = alloc_bootmem_pages(PAGE_SIZE);
-		pda = alloc_bootmem(sizeof(*pda));
-		/* alloc_bootmem(_pages) panics on failure, so no check */
-
-		memset(gdt, 0, PAGE_SIZE);
-		memset(pda, 0, sizeof(*pda));
-	} else {
-		/* GDT and PDA might already have been allocated if
-		   this is a CPU hotplug re-insertion. */
-		if (gdt == NULL)
-			gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
-
-		if (pda == NULL)
-			pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
-
-		if (unlikely(!gdt || !pda)) {
-			free_pages((unsigned long)gdt, 0);
-			kfree(pda);
-			return 0;
-		}
-	}
-
- 	cpu_gdt_descr->address = (unsigned long)gdt;
-	cpu_pda(cpu) = pda;
-
-	return 1;
-}
-
-/* Initial PDA used by boot CPU */
-struct i386_pda boot_pda = {
-	._pda = &boot_pda,
-	.cpu_number = 0,
-	.pcurrent = &init_task,
-};
-
-static inline void set_kernel_fs(void)
-{
-	/* Set %fs for this CPU's PDA.  Memory clobber is to create a
-	   barrier with respect to any PDA operations, so the compiler
-	   doesn't move any before here. */
-	asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory");
-}
-
-/* Initialize the CPU's GDT and PDA.  The boot CPU does this for
-   itself, but secondaries find this done for them. */
-__cpuinit int init_gdt(int cpu, struct task_struct *idle)
-{
-	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
-	struct desc_struct *gdt;
-	struct i386_pda *pda;
-
-	/* For non-boot CPUs, the GDT and PDA should already have been
-	   allocated. */
-	if (!alloc_gdt(cpu)) {
-		printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu);
-		return 0;
-	}
-
-	gdt = (struct desc_struct *)cpu_gdt_descr->address;
-	pda = cpu_pda(cpu);
-
-	BUG_ON(gdt == NULL || pda == NULL);
-
-	/*
-	 * Initialize the per-CPU GDT with the boot GDT,
-	 * and set up the GDT descriptor:
-	 */
- 	memcpy(gdt, cpu_gdt_table, GDT_SIZE);
-	cpu_gdt_descr->size = GDT_SIZE - 1;
-
-	pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
-			(u32 *)&gdt[GDT_ENTRY_PDA].b,
-			(unsigned long)pda, sizeof(*pda) - 1,
-			0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
-
-	memset(pda, 0, sizeof(*pda));
-	pda->_pda = pda;
-	pda->cpu_number = cpu;
-	pda->pcurrent = idle;
-
-	return 1;
-}
-
-void __cpuinit cpu_set_gdt(int cpu)
+/* Current gdt points %fs at the "master" per-cpu area: after this,
+ * it's on the real one. */
+void switch_to_new_gdt(void)
 {
-	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+	struct Xgt_desc_struct gdt_descr;
 	unsigned long va, frames[16];
 	int f;
 
-	for (va = cpu_gdt_descr->address, f = 0;
-	     va < cpu_gdt_descr->address + cpu_gdt_descr->size;
+	gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+	gdt_descr.size = GDT_SIZE - 1;
+
+	for (va = gdt_descr.address, f = 0;
+	     va < gdt_descr.address + gdt_descr.size;
 	     va += PAGE_SIZE, f++) {
 		frames[f] = virt_to_mfn(va);
 		make_lowmem_page_readonly(
 			(void *)va, XENFEAT_writable_descriptor_tables);
 	}
-	BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8));
-
-	set_kernel_fs();
+	if (HYPERVISOR_set_gdt(frames, (gdt_descr.size + 1) / 8))
+		BUG();
+	asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
 }
 
-/* Common CPU init for both boot and secondary CPUs */
-static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void __cpuinit cpu_init(void)
 {
+	int cpu = smp_processor_id();
+	struct task_struct *curr = current;
 #ifndef CONFIG_X86_NO_TSS
 	struct tss_struct * t = &per_cpu(init_tss, cpu);
 #endif
@@ -759,6 +701,8 @@ static void __cpuinit _cpu_init(int cpu,
 		set_in_cr4(X86_CR4_TSD);
 	}
 
+	switch_to_new_gdt();
+
 	/*
 	 * Set up and load the per-CPU TSS and LDT
 	 */
@@ -796,38 +740,6 @@ static void __cpuinit _cpu_init(int cpu,
 	mxcsr_feature_mask_init();
 }
 
-/* Entrypoint to initialize secondary CPU */
-void __cpuinit secondary_cpu_init(void)
-{
-	int cpu = smp_processor_id();
-	struct task_struct *curr = current;
-
-	_cpu_init(cpu, curr);
-}
-
-/*
- * cpu_init() initializes state that is per-CPU. Some data is already
- * initialized (naturally) in the bootstrap process, such as the GDT
- * and IDT. We reload them nevertheless, this function acts as a
- * 'CPU state barrier', nothing should get across.
- */
-void __cpuinit cpu_init(void)
-{
-	int cpu = smp_processor_id();
-	struct task_struct *curr = current;
-
-	/* Set up the real GDT and PDA, so we can transition from the
-	   boot versions. */
-	if (!init_gdt(cpu, curr)) {
-		/* failed to allocate something; not much we can do... */
-		for (;;)
-			local_irq_enable();
-	}
-
-	cpu_set_gdt(cpu);
-	_cpu_init(cpu, curr);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 void __cpuinit cpu_uninit(void)
 {
--- 12.2.orig/arch/x86/kernel/cpu/mtrr/main-xen.c	2011-01-31 17:32:16.000000000 +0100
+++ 12.2/arch/x86/kernel/cpu/mtrr/main-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -167,7 +167,7 @@ mtrr_del(int reg, unsigned long base, un
 EXPORT_SYMBOL(mtrr_add);
 EXPORT_SYMBOL(mtrr_del);
 
-void __init mtrr_bp_init(void)
+__init void mtrr_bp_init(void)
 {
 }
 
--- 12.2.orig/arch/x86/kernel/e820_32-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/e820_32-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -162,26 +162,27 @@ static struct resource standard_io_resou
 
 static int __init romsignature(const unsigned char *rom)
 {
+	const unsigned short * const ptr = (const unsigned short *)rom;
 	unsigned short sig;
 
-	return probe_kernel_address((const unsigned short *)rom, sig) == 0 &&
-	       sig == ROMSIGNATURE;
+	return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
 }
 
-static int __init romchecksum(unsigned char *rom, unsigned long length)
+static int __init romchecksum(const unsigned char *rom, unsigned long length)
 {
-	unsigned char sum;
+	unsigned char sum, c;
 
-	for (sum = 0; length; length--)
-		sum += *rom++;
-	return sum == 0;
+	for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
+		sum += c;
+	return !length && !sum;
 }
 
 static void __init probe_roms(void)
 {
+	const unsigned char *rom;
 	unsigned long start, length, upper;
-	unsigned char *rom;
-	int	      i;
+	unsigned char c;
+	int i;
 
 #ifdef CONFIG_XEN
 	/* Nothing to do if not running in dom0. */
@@ -198,8 +199,11 @@ static void __init probe_roms(void)
 
 		video_rom_resource.start = start;
 
+		if (probe_kernel_address(rom + 2, c) != 0)
+			continue;
+
 		/* 0 < length <= 0x7f * 512, historically */
-		length = rom[2] * 512;
+		length = c * 512;
 
 		/* if checksum okay, trust length byte */
 		if (length && romchecksum(rom, length))
@@ -233,8 +237,11 @@ static void __init probe_roms(void)
 		if (!romsignature(rom))
 			continue;
 
+		if (probe_kernel_address(rom + 2, c) != 0)
+			continue;
+
 		/* 0 < length <= 0x7f * 512, historically */
-		length = rom[2] * 512;
+		length = c * 512;
 
 		/* but accept any length that fits if checksum okay */
 		if (!length || start + length > upper || !romchecksum(rom, length))
@@ -249,7 +256,7 @@ static void __init probe_roms(void)
 }
 
 #ifdef CONFIG_XEN
-static struct e820map machine_e820 __initdata;
+static struct e820map machine_e820;
 #define e820 machine_e820
 #endif
 
@@ -409,10 +416,8 @@ int __init sanitize_e820_map(struct e820
 		   ____________________33__
 		   ______________________4_
 	*/
-	printk("sanitize start\n");
 	/* if there's only one memory region, don't bother */
 	if (*pnr_map < 2) {
-		printk("sanitize bail 0\n");
 		return -1;
 	}
 
@@ -421,7 +426,6 @@ int __init sanitize_e820_map(struct e820
 	/* bail out if we find any unreasonable addresses in bios map */
 	for (i=0; i<old_nr; i++)
 		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
-			printk("sanitize bail 1\n");
 			return -1;
 		}
 
@@ -517,7 +521,6 @@ int __init sanitize_e820_map(struct e820
 	memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
 	*pnr_map = new_nr;
 
-	printk("sanitize end\n");
 	return 0;
 }
 
@@ -552,7 +555,6 @@ int __init copy_e820_map(struct e820entr
 		unsigned long long size = biosmap->size;
 		unsigned long long end = start + size;
 		unsigned long type = biosmap->type;
-		printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type);
 
 		/* Overflow in 64 bits? Ignore the memory map. */
 		if (start > end)
@@ -564,17 +566,11 @@ int __init copy_e820_map(struct e820entr
 		 * Not right. Fix it up.
 		 */
 		if (type == E820_RAM) {
-			printk("copy_e820_map() type is E820_RAM\n");
 			if (start < 0x100000ULL && end > 0xA0000ULL) {
-				printk("copy_e820_map() lies in range...\n");
-				if (start < 0xA0000ULL) {
-					printk("copy_e820_map() start < 0xA0000ULL\n");
+				if (start < 0xA0000ULL)
 					add_memory_region(start, 0xA0000ULL-start, type);
-				}
-				if (end <= 0x100000ULL) {
-					printk("copy_e820_map() end <= 0x100000ULL\n");
+				if (end <= 0x100000ULL)
 					continue;
-				}
 				start = 0x100000ULL;
 				size = end - start;
 			}
--- 12.2.orig/arch/x86/kernel/entry_32-xen.S	2011-04-26 09:08:37.000000000 +0200
+++ 12.2/arch/x86/kernel/entry_32-xen.S	2011-01-31 17:32:29.000000000 +0100
@@ -15,7 +15,7 @@
  * I changed all the .align's to 4 (16 byte alignment), as that's faster
  * on a 486.
  *
- * Stack layout in 'ret_from_system_call':
+ * Stack layout in 'syscall_exit':
  * 	ptrace needs to have all regs on the stack.
  *	if the order here is changed, it needs to be
  *	updated in fork.c:copy_process, signal.c:do_signal,
@@ -135,7 +135,7 @@ NMI_MASK	= 0x80000000
 	movl $(__USER_DS), %edx; \
 	movl %edx, %ds; \
 	movl %edx, %es; \
-	movl $(__KERNEL_PDA), %edx; \
+	movl $(__KERNEL_PERCPU), %edx; \
 	movl %edx, %fs
 
 #define RESTORE_INT_REGS \
@@ -308,16 +308,12 @@ sysenter_past_esp:
 	pushl $(__USER_CS)
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET cs, 0*/
-#ifndef CONFIG_COMPAT_VDSO
 	/*
 	 * Push current_thread_info()->sysenter_return to the stack.
 	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
 	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
 	 */
 	pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
-#else
-	pushl $SYSENTER_RETURN
-#endif
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET eip, 0
 
@@ -345,7 +341,7 @@ sysenter_past_esp:
 	jae syscall_badsys
 	call *sys_call_table(,%eax,4)
 	movl %eax,PT_EAX(%esp)
-	DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
+	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx
@@ -400,10 +396,6 @@ ENTRY(system_call)
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
-	testl $TF_MASK,PT_EFLAGS(%esp)
-	jz no_singlestep
-	orl $_TIF_SINGLESTEP,TI_flags(%ebp)
-no_singlestep:
 					# system call tracing in operation / emulation
 	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
 	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
@@ -418,6 +410,10 @@ syscall_exit:
 					# setting need_resched or sigpending
 					# between sampling and the iret
 	TRACE_IRQS_OFF
+	testl $TF_MASK,PT_EFLAGS(%esp)	# If tracing set singlestep flag on exit
+	jz no_singlestep
+	orl $_TIF_SINGLESTEP,TI_flags(%ebp)
+no_singlestep:
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx	# current->work
 	jne syscall_exit_work
@@ -635,9 +631,7 @@ END(syscall_badsys)
 #ifndef CONFIG_XEN
 #define FIXUP_ESPFIX_STACK \
 	/* since we are on a wrong stack, we cant make it a C code :( */ \
-	movl %fs:PDA_cpu, %ebx; \
-	PER_CPU(cpu_gdt_descr, %ebx); \
-	movl GDS_address(%ebx), %ebx; \
+	PER_CPU(gdt_page, %ebx); \
 	GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
 	addl %esp, %eax; \
 	pushl $__KERNEL_DS; \
@@ -710,7 +704,7 @@ ENTRY(name)				\
 	SAVE_ALL;			\
 	TRACE_IRQS_OFF			\
 	movl %esp,%eax;			\
-	call smp_/**/name;		\
+	call smp_##name;		\
 	jmp ret_from_intr;		\
 	CFI_ENDPROC;			\
 ENDPROC(name)
@@ -718,10 +712,6 @@ ENDPROC(name)
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
 
-/* This alternate entry is needed because we hijack the apic LVTT */
-#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
-BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
-#endif
 #else
 #define UNWIND_ESPFIX_STACK
 #endif
@@ -764,7 +754,7 @@ error_code:
 	pushl %fs
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET fs, 0*/
-	movl $(__KERNEL_PDA), %ecx
+	movl $(__KERNEL_PERCPU), %ecx
 	movl %ecx, %fs
 	UNWIND_ESPFIX_STACK
 	popl %ecx
--- 12.2.orig/arch/x86/kernel/head_32-xen.S	2011-08-09 10:36:25.000000000 +0200
+++ 12.2/arch/x86/kernel/head_32-xen.S	2011-08-09 10:37:32.000000000 +0200
@@ -38,7 +38,8 @@ ENTRY(startup_32)
 	/* Set up the stack pointer */
 	movl $(init_thread_union+THREAD_SIZE),%esp
 
-	call setup_pda
+	movl %ss,%eax
+	movl %eax,%fs			# gets reset once there's real percpu
 
 	/* get vendor info */
 	xorl %eax,%eax			# call CPUID with 0 -> return vendor ID
@@ -65,55 +66,11 @@ ENTRY(startup_32)
 	xorl %eax,%eax		# Clear GS
 	movl %eax,%gs
 
-	movl $(__KERNEL_PDA),%eax
-	mov  %eax,%fs
-
 	cld			# gcc2 wants the direction flag cleared at all times
 
 	pushl $0		# fake return address for unwinder
 	jmp start_kernel
 
-/*
- * Point the GDT at this CPU's PDA.  This will be
- * cpu_gdt_table and boot_pda.
- */
-ENTRY(setup_pda)
-	/* get the PDA pointer */
-	movl $boot_pda, %eax
-
-	/* slot the PDA address into the GDT */
-	mov $cpu_gdt_table, %ecx
-	mov %ax, (__KERNEL_PDA+0+2)(%ecx)		/* base & 0x0000ffff */
-	shr $16, %eax
-	mov %al, (__KERNEL_PDA+4+0)(%ecx)		/* base & 0x00ff0000 */
-	mov %ah, (__KERNEL_PDA+4+3)(%ecx)		/* base & 0xff000000 */
-
-	# %esi still points to start_info, and no registers
-	# need to be preserved.
-
-	movl XEN_START_mfn_list(%esi), %ebx
-	movl $(cpu_gdt_table - __PAGE_OFFSET), %eax
-	shrl $PAGE_SHIFT, %eax
-	movl (%ebx,%eax,4), %ecx
-	pushl %ecx			# frame number for set_gdt below
-
-	xorl %esi, %esi
-	xorl %edx, %edx
-	shldl $PAGE_SHIFT, %ecx, %edx
-	shll $PAGE_SHIFT, %ecx
-	orl $0x61, %ecx
-	movl $cpu_gdt_table, %ebx
-	movl $__HYPERVISOR_update_va_mapping, %eax
-	int $0x82
-
-	movl $(PAGE_SIZE_asm / 8), %ecx
-	movl %esp, %ebx
-	movl $__HYPERVISOR_set_gdt, %eax
-	int $0x82
-
-	popl %ecx
-	ret
-
 #define HYPERCALL_PAGE_OFFSET 0x1000
 .org HYPERCALL_PAGE_OFFSET
 ENTRY(hypercall_page)
@@ -139,60 +96,6 @@ ENTRY(empty_zero_page)
  */
 .data
 
-/*
- * The Global Descriptor Table contains 28 quadwords, per-CPU.
- */
-	.section .data.page_aligned, "aw"
-	.align PAGE_SIZE_asm
-ENTRY(cpu_gdt_table)
-	.quad 0x0000000000000000	/* NULL descriptor */
-	.quad 0x0000000000000000	/* 0x0b reserved */
-	.quad 0x0000000000000000	/* 0x13 reserved */
-	.quad 0x0000000000000000	/* 0x1b reserved */
-	.quad 0x0000000000000000	/* 0x20 unused */
-	.quad 0x0000000000000000	/* 0x28 unused */
-	.quad 0x0000000000000000	/* 0x33 TLS entry 1 */
-	.quad 0x0000000000000000	/* 0x3b TLS entry 2 */
-	.quad 0x0000000000000000	/* 0x43 TLS entry 3 */
-	.quad 0x0000000000000000	/* 0x4b reserved */
-	.quad 0x0000000000000000	/* 0x53 reserved */
-	.quad 0x0000000000000000	/* 0x5b reserved */
-
-	.quad 0x00cf9a000000ffff	/* 0x60 kernel 4GB code at 0x00000000 */
-	.quad 0x00cf92000000ffff	/* 0x68 kernel 4GB data at 0x00000000 */
-	.quad 0x00cffa000000ffff	/* 0x73 user 4GB code at 0x00000000 */
-	.quad 0x00cff2000000ffff	/* 0x7b user 4GB data at 0x00000000 */
-
-	.quad 0x0000000000000000	/* 0x80 TSS descriptor */
-	.quad 0x0000000000000000	/* 0x88 LDT descriptor */
-
-	/*
-	 * Segments used for calling PnP BIOS have byte granularity.
-	 * They code segments and data segments have fixed 64k limits,
-	 * the transfer segment sizes are set at run time.
-	 */
-	.quad 0x0000000000000000	/* 0x90 32-bit code */
-	.quad 0x0000000000000000	/* 0x98 16-bit code */
-	.quad 0x0000000000000000	/* 0xa0 16-bit data */
-	.quad 0x0000000000000000	/* 0xa8 16-bit data */
-	.quad 0x0000000000000000	/* 0xb0 16-bit data */
-
-	/*
-	 * The APM segments have byte granularity and their bases
-	 * are set at run time.  All have 64k limits.
-	 */
-	.quad 0x0000000000000000	/* 0xb8 APM CS    code */
-	.quad 0x0000000000000000	/* 0xc0 APM CS 16 code (16 bit) */
-	.quad 0x0000000000000000	/* 0xc8 APM DS    data */
-
-	.quad 0x0000000000000000	/* 0xd0 - ESPFIX SS */
-	.quad 0x00cf92000000ffff	/* 0xd8 - PDA */
-	.quad 0x0000000000000000	/* 0xe0 - unused */
-	.quad 0x0000000000000000	/* 0xe8 - unused */
-	.quad 0x0000000000000000	/* 0xf0 - unused */
-	.quad 0x0000000000000000	/* 0xf8 - GDT entry 31: double-fault TSS */
-	.align PAGE_SIZE_asm
-
 #ifdef CONFIG_XEN_UNPRIVILEGED_GUEST
 # define XEN_DOM0_CAP		0
 # define XEN_DOM0_CAP_STR	""
--- 12.2.orig/arch/x86/kernel/io_apic_32-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/io_apic_32-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -25,7 +25,6 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/mc146818rtc.h>
 #include <linux/compiler.h>
 #include <linux/acpi.h>
@@ -35,6 +34,7 @@
 #include <linux/msi.h>
 #include <linux/htirq.h>
 #include <linux/freezer.h>
+#include <linux/kthread.h>
 
 #include <asm/io.h>
 #include <asm/smp.h>
@@ -710,8 +710,6 @@ static int balanced_irq(void *unused)
 	unsigned long prev_balance_time = jiffies;
 	long time_remaining = balanced_irq_interval;
 
-	daemonize("kirqd");
-	
 	/* push everything to CPU 0 to give us a starting point.  */
 	for (i = 0 ; i < NR_IRQS ; i++) {
 		irq_desc[i].pending_mask = cpumask_of_cpu(0);
@@ -771,10 +769,9 @@ static int __init balanced_irq_init(void
 	}
 	
 	printk(KERN_INFO "Starting balanced_irq\n");
-	if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
+	if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
 		return 0;
-	else 
-		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
+	printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
 failed:
 	for_each_possible_cpu(i) {
 		kfree(irq_cpu_data[i].irq_delta);
@@ -1455,10 +1452,6 @@ static void __init setup_ExtINT_IRQ0_pin
 	enable_8259A_irq(0);
 }
 
-static inline void UNEXPECTED_IO_APIC(void)
-{
-}
-
 void __init print_IO_APIC(void)
 {
 	int apic, i;
@@ -1498,34 +1491,12 @@ void __init print_IO_APIC(void)
 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
 	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
-	if (reg_00.bits.ID >= get_physical_broadcast())
-		UNEXPECTED_IO_APIC();
-	if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
-		UNEXPECTED_IO_APIC();
 
 	printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
 	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
-	if (	(reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
-		(reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
-		(reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
-		(reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
-		(reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
-		(reg_01.bits.entries != 0x2E) &&
-		(reg_01.bits.entries != 0x3F)
-	)
-		UNEXPECTED_IO_APIC();
 
 	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
 	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
-	if (	(reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
-		(reg_01.bits.version != 0x10) && /* oldest IO-APICs */
-		(reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
-		(reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
-		(reg_01.bits.version != 0x20)    /* Intel P64H (82806 AA) */
-	)
-		UNEXPECTED_IO_APIC();
-	if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
-		UNEXPECTED_IO_APIC();
 
 	/*
 	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
@@ -1535,8 +1506,6 @@ void __init print_IO_APIC(void)
 	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
 		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
 		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
-		if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
-			UNEXPECTED_IO_APIC();
 	}
 
 	/*
@@ -1548,8 +1517,6 @@ void __init print_IO_APIC(void)
 	    reg_03.raw != reg_01.raw) {
 		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
 		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
-		if (reg_03.bits.__reserved_1)
-			UNEXPECTED_IO_APIC();
 	}
 
 	printk(KERN_DEBUG ".... IRQ redirection table:\n");
@@ -2686,19 +2653,19 @@ int arch_setup_msi_irq(struct pci_dev *d
 	if (irq < 0)
 		return irq;
 
-	set_irq_msi(irq, desc);
 	ret = msi_compose_msg(dev, irq, &msg);
 	if (ret < 0) {
 		destroy_irq(irq);
 		return ret;
 	}
 
+	set_irq_msi(irq, desc);
 	write_msi_msg(irq, &msg);
 
 	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
 				      "edge");
 
-	return irq;
+	return 0;
 }
 
 void arch_teardown_msi_irq(unsigned int irq)
--- 12.2.orig/arch/x86/kernel/ioport_32-xen.c	2008-01-28 12:24:19.000000000 +0100
+++ 12.2/arch/x86/kernel/ioport_32-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -12,10 +12,10 @@
 #include <linux/types.h>
 #include <linux/ioport.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
 #include <linux/thread_info.h>
+#include <linux/syscalls.h>
 #include <xen/interface/physdev.h>
 
 /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
--- 12.2.orig/arch/x86/kernel/ldt_32-xen.c	2011-01-31 17:32:16.000000000 +0100
+++ 12.2/arch/x86/kernel/ldt_32-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -10,7 +10,6 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 
--- 12.2.orig/arch/x86/kernel/microcode_core-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/microcode_core-xen.c	2011-12-01 15:01:09.000000000 +0100
@@ -138,7 +138,7 @@ static int __init microcode_dev_init (vo
 	return 0;
 }
 
-static void __exit microcode_dev_exit (void)
+static void microcode_dev_exit (void)
 {
 	misc_deregister(&microcode_dev);
 }
--- 12.2.orig/arch/x86/kernel/mpparse_32-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/mpparse_32-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -18,7 +18,6 @@
 #include <linux/acpi.h>
 #include <linux/delay.h>
 #include <linux/bootmem.h>
-#include <linux/smp_lock.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
 #include <linux/bitops.h>
@@ -484,7 +483,7 @@ static int __init smp_read_mpc(struct mp
 		}
 		++mpc_record;
 	}
-	clustered_apic_check();
+	setup_apic_routing();
 	if (!num_processors)
 		printk(KERN_ERR "SMP mptable: no processors registered!\n");
 	return num_processors;
--- 12.2.orig/arch/x86/kernel/pci-dma-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/pci-dma-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -13,6 +13,7 @@
 #include <linux/pci.h>
 #include <linux/module.h>
 #include <linux/version.h>
+#include <linux/pci.h>
 #include <asm/io.h>
 #include <xen/balloon.h>
 #include <xen/gnttab.h>
@@ -275,7 +276,7 @@ int dma_declare_coherent_memory(struct d
 {
 	void __iomem *mem_base = NULL;
 	int pages = size >> PAGE_SHIFT;
-	int bitmap_size = (pages + 31)/32;
+	int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
 
 	if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
 		goto out;
@@ -348,6 +349,32 @@ void *dma_mark_declared_memory_occupied(
 EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
 #endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */
 
+#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
+/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
+
+int forbid_dac;
+EXPORT_SYMBOL(forbid_dac);
+
+static __devinit void via_no_dac(struct pci_dev *dev)
+{
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
+		printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n");
+		forbid_dac = 1;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
+
+static int check_iommu(char *s)
+{
+	if (!strcmp(s, "usedac")) {
+		forbid_dac = -1;
+		return 1;
+	}
+	return 0;
+}
+__setup("iommu=", check_iommu);
+#endif
+
 dma_addr_t
 dma_map_single(struct device *dev, void *ptr, size_t size,
 	       enum dma_data_direction direction)
--- 12.2.orig/arch/x86/kernel/process_32-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/process_32-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -21,7 +21,6 @@
 #include <linux/mm.h>
 #include <linux/elfcore.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -39,6 +38,7 @@
 #include <linux/random.h>
 #include <linux/personality.h>
 #include <linux/tick.h>
+#include <linux/percpu.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -61,7 +61,6 @@
 
 #include <asm/tlbflush.h>
 #include <asm/cpu.h>
-#include <asm/pda.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -70,6 +69,12 @@ static int hlt_counter;
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
 
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
+DEFINE_PER_CPU(int, cpu_number);
+EXPORT_PER_CPU_SYMBOL(cpu_number);
+
 /*
  * Return saved PC of a blocked thread.
  */
@@ -168,6 +173,7 @@ void cpu_idle(void)
 			if (__get_cpu_var(cpu_idle_state))
 				__get_cpu_var(cpu_idle_state) = 0;
 
+			check_pgt_cache();
 			rmb();
 			idle = xen_idle; /* no alternatives */
 
@@ -218,18 +224,19 @@ void __devinit select_idle_routine(const
 {
 }
 
-static int __init idle_setup (char *str)
+static int __init idle_setup(char *str)
 {
-	if (!strncmp(str, "poll", 4)) {
+	if (!strcmp(str, "poll")) {
 		printk("using polling idle threads.\n");
 		pm_idle = poll_idle;
 	}
+	else
+		return -1;
 
 	boot_option_idle_override = 1;
-	return 1;
+	return 0;
 }
-
-__setup("idle=", idle_setup);
+early_param("idle", idle_setup);
 
 void show_regs(struct pt_regs * regs)
 {
@@ -282,7 +289,7 @@ int kernel_thread(int (*fn)(void *), voi
 
 	regs.xds = __USER_DS;
 	regs.xes = __USER_DS;
-	regs.xfs = __KERNEL_PDA;
+	regs.xfs = __KERNEL_PERCPU;
 	regs.orig_eax = -1;
 	regs.eip = (unsigned long) kernel_thread_helper;
 	regs.xcs = __KERNEL_CS | get_kernel_rpl();
@@ -562,7 +569,7 @@ struct task_struct fastcall * __switch_t
 	 * multicall to indicate FPU task switch, rather than
 	 * synchronously trapping to Xen.
 	 */
-	if (prev_p->thread_info->status & TS_USEDFPU) {
+	if (task_thread_info(prev_p)->status & TS_USEDFPU) {
 		__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
 		mcl->op      = __HYPERVISOR_fpu_taskswitch;
 		mcl->args[0] = 1;
@@ -669,7 +676,7 @@ struct task_struct fastcall * __switch_t
 	if (prev->gs | next->gs)
 		loadsegment(gs, next->gs);
 
-	write_pda(pcurrent, next_p);
+	x86_write_percpu(current_task, next_p);
 
 	return prev_p;
 }
--- 12.2.orig/arch/x86/kernel/smp_32-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/smp_32-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -13,7 +13,6 @@
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/spinlock.h>
-#include <linux/smp_lock.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
 #include <linux/cache.h>
@@ -216,7 +215,6 @@ static cpumask_t flush_cpumask;
 static struct mm_struct * flush_mm;
 static unsigned long flush_va;
 static DEFINE_SPINLOCK(tlbstate_lock);
-#define FLUSH_ALL	0xffffffff
 
 /*
  * We cannot call mmdrop() because we are in interrupt context, 
@@ -298,7 +296,7 @@ irqreturn_t smp_invalidate_interrupt(int
 		 
 	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
 		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
-			if (flush_va == FLUSH_ALL)
+			if (flush_va == TLB_FLUSH_ALL)
 				local_flush_tlb();
 			else
 				__flush_tlb_one(flush_va);
@@ -314,9 +312,11 @@ out:
 	return IRQ_HANDLED;
 }
 
-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
-						unsigned long va)
+void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+			     unsigned long va)
 {
+	cpumask_t cpumask = *cpumaskp;
+
 	/*
 	 * A couple of (to be removed) sanity checks:
 	 *
@@ -327,10 +327,12 @@ static void flush_tlb_others(cpumask_t c
 	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
 	BUG_ON(!mm);
 
+#ifdef CONFIG_HOTPLUG_CPU
 	/* If a CPU which we ran on has gone down, OK. */
 	cpus_and(cpumask, cpumask, cpu_online_map);
-	if (cpus_empty(cpumask))
+	if (unlikely(cpus_empty(cpumask)))
 		return;
+#endif
 
 	/*
 	 * i'm not happy about this global shared spinlock in the
@@ -341,17 +343,7 @@ static void flush_tlb_others(cpumask_t c
 	
 	flush_mm = mm;
 	flush_va = va;
-#if NR_CPUS <= BITS_PER_LONG
-	atomic_set_mask(cpumask, &flush_cpumask);
-#else
-	{
-		int k;
-		unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
-		unsigned long *cpu_mask = (unsigned long *)&cpumask;
-		for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
-			atomic_set_mask(cpu_mask[k], &flush_mask[k]);
-	}
-#endif
+	cpus_or(flush_cpumask, cpumask, flush_cpumask);
 	/*
 	 * We have to send the IPI only to
 	 * CPUs affected.
@@ -378,7 +370,7 @@ void flush_tlb_current_task(void)
 
 	local_flush_tlb();
 	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
 	preempt_enable();
 }
 
@@ -397,7 +389,7 @@ void flush_tlb_mm (struct mm_struct * mm
 			leave_mm(smp_processor_id());
 	}
 	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
 
 	preempt_enable();
 }
@@ -446,7 +438,7 @@ void flush_tlb_all(void)
  * it goes straight through and wastes no time serializing
  * anything. Worst case is that we lose a reschedule ...
  */
-void smp_send_reschedule(int cpu)
+void xen_smp_send_reschedule(int cpu)
 {
 	WARN_ON(cpu_is_offline(cpu));
 	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
@@ -478,36 +470,79 @@ void unlock_ipi_call_lock(void)
 
 static struct call_data_struct *call_data;
 
+static void __smp_call_function(void (*func) (void *info), void *info,
+				int nonatomic, int wait)
+{
+	struct call_data_struct data;
+	int cpus = num_online_cpus() - 1;
+
+	if (!cpus)
+		return;
+
+	data.func = func;
+	data.info = info;
+	atomic_set(&data.started, 0);
+	data.wait = wait;
+	if (wait)
+		atomic_set(&data.finished, 0);
+
+	call_data = &data;
+	mb();
+
+	/* Send a message to all other CPUs and wait for them to respond */
+	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+	/* Wait for response */
+	while (atomic_read(&data.started) != cpus)
+		cpu_relax();
+
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			cpu_relax();
+}
+
+
 /**
- * smp_call_function(): Run a function on all other CPUs.
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * @mask: The set of cpus to run on.  Must not include the current cpu.
  * @func: The function to run. This must be fast and non-blocking.
  * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: currently unused.
  * @wait: If true, wait (atomically) until function has completed on other CPUs.
  *
- * Returns 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
+  * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
  *
  * You must not call this function with disabled interrupts or from a
  * hardware interrupt handler or from a bottom half handler.
  */
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-			int wait)
+int
+xen_smp_call_function_mask(cpumask_t mask,
+			      void (*func)(void *), void *info,
+			      int wait)
 {
 	struct call_data_struct data;
+	cpumask_t allbutself;
 	int cpus;
 
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON(irqs_disabled());
+
 	/* Holding any lock stops cpus from going down. */
 	spin_lock(&call_lock);
-	cpus = num_online_cpus() - 1;
+
+	allbutself = cpu_online_map;
+	cpu_clear(smp_processor_id(), allbutself);
+
+	cpus_and(mask, mask, allbutself);
+	cpus = cpus_weight(mask);
+
 	if (!cpus) {
 		spin_unlock(&call_lock);
 		return 0;
 	}
 
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
 	data.func = func;
 	data.info = info;
 	atomic_set(&data.started, 0);
@@ -517,9 +552,12 @@ int smp_call_function (void (*func) (voi
 
 	call_data = &data;
 	mb();
-	
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+	/* Send a message to other CPUs */
+	if (cpus_equal(mask, allbutself))
+		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+	else
+		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
 
 	/* Wait for response */
 	while (atomic_read(&data.started) != cpus)
@@ -532,15 +570,14 @@ int smp_call_function (void (*func) (voi
 
 	return 0;
 }
-EXPORT_SYMBOL(smp_call_function);
 
 static void stop_this_cpu (void * dummy)
 {
+	local_irq_disable();
 	/*
 	 * Remove this CPU:
 	 */
 	cpu_clear(smp_processor_id(), cpu_online_map);
-	local_irq_disable();
 	disable_all_local_evtchn();
 	if (cpu_data[smp_processor_id()].hlt_works_ok)
 		for(;;) halt();
@@ -551,13 +588,18 @@ static void stop_this_cpu (void * dummy)
  * this function calls the 'stop' function on all other CPUs in the system.
  */
 
-void smp_send_stop(void)
+void xen_smp_send_stop(void)
 {
-	smp_call_function(stop_this_cpu, NULL, 1, 0);
+	/* Don't deadlock on the call lock in panic */
+	int nolock = !spin_trylock(&call_lock);
+	unsigned long flags;
 
-	local_irq_disable();
+	local_irq_save(flags);
+	__smp_call_function(stop_this_cpu, NULL, 0, 0);
+	if (!nolock)
+		spin_unlock(&call_lock);
 	disable_all_local_evtchn();
-	local_irq_enable();
+	local_irq_restore(flags);
 }
 
 /*
@@ -598,74 +640,3 @@ irqreturn_t smp_call_function_interrupt(
 
 	return IRQ_HANDLED;
 }
-
-/*
- * this function sends a 'generic call function' IPI to one other CPU
- * in the system.
- *
- * cpu is a standard Linux logical CPU number.
- */
-static void
-__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-				int nonatomic, int wait)
-{
-	struct call_data_struct data;
-	int cpus = 1;
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	wmb();
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (!wait)
-		return;
-
-	while (atomic_read(&data.finished) != cpus)
-		cpu_relax();
-}
-
-/*
- * smp_call_function_single - Run a function on another CPU
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Currently unused.
- * @wait: If true, wait until function has completed on other CPUs.
- *
- * Retrurns 0 on success, else a negative status code.
- *
- * Does not return until the remote CPU is nearly ready to execute <func>
- * or is or has executed.
- */
-
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-			int nonatomic, int wait)
-{
-	/* prevent preemption and reschedule on another processor */
-	int me = get_cpu();
-	if (cpu == me) {
-		WARN_ON(1);
-		put_cpu();
-		return -EBUSY;
-	}
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	spin_lock_bh(&call_lock);
-	__smp_call_function_single(cpu, func, info, nonatomic, wait);
-	spin_unlock_bh(&call_lock);
-	put_cpu();
-	return 0;
-}
-EXPORT_SYMBOL(smp_call_function_single);
--- 12.2.orig/arch/x86/kernel/time-xen.c	2011-07-12 11:13:30.000000000 +0200
+++ 12.2/arch/x86/kernel/time-xen.c	2012-02-10 13:26:34.000000000 +0100
@@ -42,7 +42,6 @@
 #include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/module.h>
-#include <linux/sysdev.h>
 #include <linux/bcd.h>
 #include <linux/efi.h>
 #include <linux/sysctl.h>
@@ -79,7 +78,6 @@
 #include <asm/i8253.h>
 DEFINE_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
-int pit_latch_buggy;              /* extern */
 #else
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 #endif
@@ -420,7 +418,7 @@ static int set_rtc_mmss(unsigned long no
 	return retval;
 }
 
-unsigned long long sched_clock(void)
+static unsigned long long local_clock(void)
 {
 	unsigned int cpu = get_cpu();
 	struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
@@ -441,6 +439,61 @@ unsigned long long sched_clock(void)
 	return time;
 }
 
+/*
+ * Runstate accounting
+ */
+static void get_runstate_snapshot(struct vcpu_runstate_info *res)
+{
+	u64 state_time;
+	struct vcpu_runstate_info *state;
+
+	BUG_ON(preemptible());
+
+	state = &__get_cpu_var(runstate);
+
+	do {
+		state_time = get_64bit_local(&state->state_entry_time);
+		*res = *state;
+	} while (get_64bit_local(&state->state_entry_time) != state_time);
+
+	WARN_ON_ONCE(res->state != RUNSTATE_running);
+}
+
+/*
+ * Xen sched_clock implementation.  Returns the number of unstolen
+ * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
+ * states.
+ */
+unsigned long long sched_clock(void)
+{
+	struct vcpu_runstate_info runstate;
+	cycle_t now;
+	u64 ret;
+	s64 offset;
+
+	/*
+	 * Ideally sched_clock should be called on a per-cpu basis
+	 * anyway, so preempt should already be disabled, but that's
+	 * not current practice at the moment.
+	 */
+	preempt_disable();
+
+	now = local_clock();
+
+	get_runstate_snapshot(&runstate);
+
+	offset = now - runstate.state_entry_time;
+	if (offset < 0)
+		offset = 0;
+
+	ret = offset + runstate.time[RUNSTATE_running]
+	      + runstate.time[RUNSTATE_blocked];
+
+	preempt_enable();
+
+	return ret;
+}
+
 unsigned long profile_pc(struct pt_regs *regs)
 {
 	unsigned long pc = instruction_pointer(regs);
@@ -488,11 +541,10 @@ EXPORT_SYMBOL(profile_pc);
 irqreturn_t timer_interrupt(int irq, void *dev_id)
 {
 	s64 delta, delta_cpu, stolen, blocked;
-	u64 sched_time;
 	unsigned int i, cpu = smp_processor_id();
 	int schedule_clock_was_set_work = 0;
 	struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
-	struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
+	struct vcpu_runstate_info runstate;
 
 	/*
 	 * Here we are in the timer irq handler. We just have irqs locally
@@ -512,20 +564,7 @@ irqreturn_t timer_interrupt(int irq, voi
 		delta     -= processed_system_time;
 		delta_cpu -= per_cpu(processed_system_time, cpu);
 
-		/*
-		 * Obtain a consistent snapshot of stolen/blocked cycles. We
-		 * can use state_entry_time to detect if we get preempted here.
-		 */
-		do {
-			sched_time = runstate->state_entry_time;
-			barrier();
-			stolen = runstate->time[RUNSTATE_runnable] +
-				runstate->time[RUNSTATE_offline] -
-				per_cpu(processed_stolen_time, cpu);
-			blocked = runstate->time[RUNSTATE_blocked] -
-				per_cpu(processed_blocked_time, cpu);
-			barrier();
-		} while (sched_time != runstate->state_entry_time);
+		get_runstate_snapshot(&runstate);
 	} while (!time_values_up_to_date(cpu));
 
 	if ((unlikely(delta < -(s64)permitted_clock_jitter) ||
@@ -571,6 +610,9 @@ irqreturn_t timer_interrupt(int irq, voi
 	 * HACK: Passing NULL to account_steal_time()
 	 * ensures that the ticks are accounted as stolen.
 	 */
+	stolen = runstate.time[RUNSTATE_runnable]
+		 + runstate.time[RUNSTATE_offline]
+		 - per_cpu(processed_stolen_time, cpu);
 	if ((stolen > 0) && (delta_cpu > 0)) {
 		delta_cpu -= stolen;
 		if (unlikely(delta_cpu < 0))
@@ -586,6 +628,8 @@ irqreturn_t timer_interrupt(int irq, voi
 	 * HACK: Passing idle_task to account_steal_time()
 	 * ensures that the ticks are accounted as idle/wait.
 	 */
+	blocked = runstate.time[RUNSTATE_blocked]
+		  - per_cpu(processed_blocked_time, cpu);
 	if ((blocked > 0) && (delta_cpu > 0)) {
 		delta_cpu -= blocked;
 		if (unlikely(delta_cpu < 0))
@@ -622,7 +666,7 @@ irqreturn_t timer_interrupt(int irq, voi
 	return IRQ_HANDLED;
 }
 
-void mark_tsc_unstable(void)
+void mark_tsc_unstable(char *reason)
 {
 #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */
 	tsc_unstable = 1;
@@ -630,17 +674,13 @@ void mark_tsc_unstable(void)
 }
 EXPORT_SYMBOL_GPL(mark_tsc_unstable);
 
+static cycle_t cs_last;
+
 static cycle_t xen_clocksource_read(void)
 {
 #ifdef CONFIG_SMP
-	static cycle_t last_ret;
-#ifndef CONFIG_64BIT
-	cycle_t last = cmpxchg64(&last_ret, 0, 0);
-#else
-	cycle_t last = last_ret;
-#define cmpxchg64 cmpxchg
-#endif
-	cycle_t ret = sched_clock();
+	cycle_t last = get_64bit(&cs_last);
+	cycle_t ret = local_clock();
 
 	if (unlikely((s64)(ret - last) < 0)) {
 		if (last - ret > permitted_clock_jitter
@@ -659,17 +699,25 @@ static cycle_t xen_clocksource_read(void
 	}
 
 	for (;;) {
-		cycle_t cur = cmpxchg64(&last_ret, last, ret);
+		cycle_t cur = cmpxchg64(&cs_last, last, ret);
 
 		if (cur == last || (s64)(ret - cur) < 0)
 			return ret;
 		last = cur;
 	}
 #else
-	return sched_clock();
+	return local_clock();
 #endif
 }
 
+static void xen_clocksource_resume(void)
+{
+	extern void time_resume(void);
+
+	time_resume();
+	cs_last = local_clock();
+}
+
 static struct clocksource clocksource_xen = {
 	.name			= "xen",
 	.rating			= 400,
@@ -678,19 +726,29 @@ static struct clocksource clocksource_xe
 	.mult			= 1 << XEN_SHIFT,		/* time directly in nanoseconds */
 	.shift			= XEN_SHIFT,
 	.flags			= CLOCK_SOURCE_IS_CONTINUOUS,
+	.resume			= xen_clocksource_resume,
 };
 
-static void init_missing_ticks_accounting(unsigned int cpu)
+struct vcpu_runstate_info *setup_runstate_area(unsigned int cpu)
 {
 	struct vcpu_register_runstate_memory_area area;
 	struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
 	int rc;
 
-	memset(runstate, 0, sizeof(*runstate));
-
-	area.addr.v = runstate;
+	set_xen_guest_handle(area.addr.h, runstate);
 	rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
-	WARN_ON(rc && rc != -ENOSYS);
+	if (rc) {
+		BUILD_BUG_ON(RUNSTATE_running);
+		memset(runstate, 0, sizeof(*runstate));
+		WARN_ON(rc != -ENOSYS);
+	}
+
+	return runstate;
+}
+
+static void init_missing_ticks_accounting(unsigned int cpu)
+{
+	struct vcpu_runstate_info *runstate = setup_runstate_area(cpu);
 
 	per_cpu(processed_blocked_time, cpu) =
 		runstate->time[RUNSTATE_blocked];
@@ -766,35 +824,6 @@ void notify_arch_cmos_timer(void)
 	mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
 }
 
-static int timer_resume(struct sys_device *dev)
-{
-	extern void time_resume(void);
-	time_resume();
-	return 0;
-}
-
-static struct sysdev_class timer_sysclass = {
-	.resume = timer_resume,
-	set_kset_name("timer"),
-};
-
-
-/* XXX this driverfs stuff should probably go elsewhere later -john */
-static struct sys_device device_timer = {
-	.id	= 0,
-	.cls	= &timer_sysclass,
-};
-
-static int time_init_device(void)
-{
-	int error = sysdev_class_register(&timer_sysclass);
-	if (!error)
-		error = sysdev_register(&device_timer);
-	return error;
-}
-
-device_initcall(time_init_device);
-
 extern void (*late_time_init)(void);
 
 /* Dynamically-mapped IRQ. */
@@ -938,21 +967,21 @@ static void start_hz_timer(void)
 	cpu_clear(cpu, nohz_cpu_mask);
 }
 
-void raw_safe_halt(void)
+void xen_safe_halt(void)
 {
 	stop_hz_timer();
 	/* Blocking includes an implicit local_irq_enable(). */
 	HYPERVISOR_block();
 	start_hz_timer();
 }
-EXPORT_SYMBOL(raw_safe_halt);
+EXPORT_SYMBOL(xen_safe_halt);
 
-void halt(void)
+void xen_halt(void)
 {
 	if (irqs_disabled())
 		VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL));
 }
-EXPORT_SYMBOL(halt);
+EXPORT_SYMBOL(xen_halt);
 
 /* No locking required. Interrupts are disabled on all CPUs. */
 void time_resume(void)
--- 12.2.orig/arch/x86/kernel/traps_32-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/traps_32-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -52,7 +52,7 @@
 #include <asm/unwind.h>
 #include <asm/smp.h>
 #include <asm/arch_hooks.h>
-#include <asm/kdebug.h>
+#include <linux/kdebug.h>
 #include <asm/stacktrace.h>
 
 #include <linux/module.h>
@@ -101,20 +101,6 @@ asmlinkage void machine_check(void);
 
 int kstack_depth_to_print = 24;
 static unsigned int code_bytes = 64;
-ATOMIC_NOTIFIER_HEAD(i386die_chain);
-
-int register_die_notifier(struct notifier_block *nb)
-{
-	vmalloc_sync_all();
-	return atomic_notifier_chain_register(&i386die_chain, nb);
-}
-EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */
-
-int unregister_die_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&i386die_chain, nb);
-}
-EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */
 
 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
 {
@@ -325,7 +311,7 @@ void show_registers(struct pt_regs *regs
 	       regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
 	printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
 		TASK_COMM_LEN, current->comm, current->pid,
-		current_thread_info(), current, current->thread_info);
+		current_thread_info(), current, task_thread_info(current));
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
@@ -482,8 +468,6 @@ static void __kprobes do_trap(int trapnr
 			      siginfo_t *info)
 {
 	struct task_struct *tsk = current;
-	tsk->thread.error_code = error_code;
-	tsk->thread.trap_no = trapnr;
 
 	if (regs->eflags & VM_MASK) {
 		if (vm86)
@@ -495,6 +479,18 @@ static void __kprobes do_trap(int trapnr
 		goto kernel_trap;
 
 	trap_signal: {
+		/*
+		 * We want error_code and trap_no set for userspace faults and
+		 * kernelspace faults which result in die(), but not
+		 * kernelspace faults which are fixed up.  die() gives the
+		 * process no chance to handle the signal and notice the
+		 * kernel fault information, so that won't result in polluting
+		 * the information about previously queued, but not yet
+		 * delivered, faults.  See also do_general_protection below.
+		 */
+		tsk->thread.error_code = error_code;
+		tsk->thread.trap_no = trapnr;
+
 		if (info)
 			force_sig_info(signr, info, tsk);
 		else
@@ -503,8 +499,11 @@ static void __kprobes do_trap(int trapnr
 	}
 
 	kernel_trap: {
-		if (!fixup_exception(regs))
+		if (!fixup_exception(regs)) {
+			tsk->thread.error_code = error_code;
+			tsk->thread.trap_no = trapnr;
 			die(str, regs, error_code);
+		}
 		return;
 	}
 
@@ -578,9 +577,6 @@ DO_ERROR_INFO(32, SIGSEGV, "iret excepti
 fastcall void __kprobes do_general_protection(struct pt_regs * regs,
 					      long error_code)
 {
-	current->thread.error_code = error_code;
-	current->thread.trap_no = 13;
-
 	if (regs->eflags & VM_MASK)
 		goto gp_in_vm86;
 
@@ -599,6 +595,8 @@ gp_in_vm86:
 
 gp_in_kernel:
 	if (!fixup_exception(regs)) {
+		current->thread.error_code = error_code;
+		current->thread.trap_no = 13;
 		if (notify_die(DIE_GPF, "general protection fault", regs,
 				error_code, 13, SIGSEGV) == NOTIFY_STOP)
 			return;
@@ -987,9 +985,7 @@ fastcall void do_spurious_interrupt_bug(
 fastcall unsigned long patch_espfix_desc(unsigned long uesp,
 					  unsigned long kesp)
 {
-	int cpu = smp_processor_id();
-	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
-	struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address;
+	struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
 	unsigned long base = (kesp - uesp) & -THREAD_SIZE;
 	unsigned long new_kesp = kesp - base;
 	unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
--- 12.2.orig/arch/x86/mm/fault_32-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/mm/fault_32-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -14,19 +14,20 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/tty.h>
 #include <linux/vt_kern.h>		/* For unblank_screen() */
 #include <linux/highmem.h>
+#include <linux/bootmem.h>		/* for max_low_pfn */
+#include <linux/vmalloc.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
+#include <linux/kdebug.h>
 
 #include <asm/system.h>
 #include <asm/desc.h>
-#include <asm/kdebug.h>
 #include <asm/segment.h>
 
 extern void die(const char *,struct pt_regs *,long);
@@ -259,25 +260,20 @@ static void dump_fault_path(unsigned lon
 	unsigned long page;
 
 	page = read_cr3();
-	page = ((unsigned long *) __va(page))[address >> 22];
-	if (oops_may_print())
-		printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
-		       machine_to_phys(page));
+	page = ((unsigned long *) __va(page))[address >> PGDIR_SHIFT];
+	printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
+	       machine_to_phys(page));
 	/*
 	 * We must not directly access the pte in the highpte
 	 * case if the page table is located in highmem.
 	 * And lets rather not kmap-atomic the pte, just in case
 	 * it's allocated already.
 	 */
-#ifdef CONFIG_HIGHPTE
-	if ((page >> PAGE_SHIFT) >= highstart_pfn)
-		return;
-#endif
-	if ((page & 1) && oops_may_print()) {
-		page &= PAGE_MASK;
-		address &= 0x003ff000;
-		page = machine_to_phys(page);
-		page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
+	if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn
+	    && (page & _PAGE_PRESENT)) {
+		page = machine_to_phys(page & PAGE_MASK);
+		page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT)
+		                                      & (PTRS_PER_PTE - 1)];
 		printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page,
 		       machine_to_phys(page));
 	}
@@ -581,6 +577,11 @@ bad_area:
 bad_area_nosemaphore:
 	/* User mode accesses just cause a SIGSEGV */
 	if (error_code & 4) {
+		/*
+		 * It's possible to have interrupts off here.
+		 */
+		local_irq_enable();
+
 		/* 
 		 * Valid to do another page fault here because this one came 
 		 * from user space.
@@ -633,7 +634,7 @@ no_context:
 	bust_spinlocks(1);
 
 	if (oops_may_print()) {
-	#ifdef CONFIG_X86_PAE
+#ifdef CONFIG_X86_PAE
 		if (error_code & 16) {
 			pte_t *pte = lookup_address(address);
 
@@ -642,7 +643,7 @@ no_context:
 					"NX-protected page - exploit attempt? "
 					"(uid: %d)\n", current->uid);
 		}
-	#endif
+#endif
 		if (address < PAGE_SIZE)
 			printk(KERN_ALERT "BUG: unable to handle kernel NULL "
 					"pointer dereference");
@@ -652,8 +653,8 @@ no_context:
 		printk(" at virtual address %08lx\n",address);
 		printk(KERN_ALERT " printing eip:\n");
 		printk("%08lx\n", regs->eip);
+		dump_fault_path(address);
 	}
-	dump_fault_path(address);
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;
 	tsk->thread.error_code = error_code;
@@ -694,7 +695,6 @@ do_sigbus:
 	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
 }
 
-#if !HAVE_SHARED_KERNEL_PMD
 void vmalloc_sync_all(void)
 {
 	/*
@@ -710,6 +710,9 @@ void vmalloc_sync_all(void)
 	static unsigned long start = TASK_SIZE;
 	unsigned long address;
 
+	if (SHARED_KERNEL_PMD)
+		return;
+
 	BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
 	for (address = start;
 	     address >= TASK_SIZE && address < hypervisor_virt_start;
@@ -752,4 +755,3 @@ void vmalloc_sync_all(void)
 			start = address + (1UL << PMD_SHIFT);
 	}
 }
-#endif
--- 12.2.orig/arch/x86/mm/highmem_32-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/mm/highmem_32-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -26,7 +26,7 @@ void kunmap(struct page *page)
  * However when holding an atomic kmap is is not legal to sleep, so atomic
  * kmaps are appropriate for short, tight code paths only.
  */
-static void *__kmap_atomic(struct page *page, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
 {
 	enum fixed_addresses idx;
 	unsigned long vaddr;
@@ -49,15 +49,7 @@ static void *__kmap_atomic(struct page *
 
 void *kmap_atomic(struct page *page, enum km_type type)
 {
-	return __kmap_atomic(page, type, kmap_prot);
-}
-
-/* Same as kmap_atomic but with PAGE_KERNEL_RO page protection. */
-void *kmap_atomic_pte(struct page *page, enum km_type type)
-{
-	return __kmap_atomic(page, type,
-	                     test_bit(PG_pinned, &page->flags)
-	                     ? PAGE_KERNEL_RO : kmap_prot);
+	return kmap_atomic_prot(page, type, kmap_prot);
 }
 
 void kunmap_atomic(void *kvaddr, enum km_type type)
@@ -80,6 +72,7 @@ void kunmap_atomic(void *kvaddr, enum km
 #endif
 	}
 
+	/*arch_flush_lazy_mmu_mode();*/
 	pagefault_enable();
 }
 
@@ -162,7 +155,6 @@ void copy_highpage(struct page *to, stru
 EXPORT_SYMBOL(kmap);
 EXPORT_SYMBOL(kunmap);
 EXPORT_SYMBOL(kmap_atomic);
-EXPORT_SYMBOL(kmap_atomic_pte);
 EXPORT_SYMBOL(kunmap_atomic);
 EXPORT_SYMBOL(kmap_atomic_to_page);
 EXPORT_SYMBOL(clear_highpage);
--- 12.2.orig/arch/x86/mm/hypervisor.c	2012-05-31 14:42:14.000000000 +0200
+++ 12.2/arch/x86/mm/hypervisor.c	2012-05-31 14:43:34.000000000 +0200
@@ -414,13 +414,13 @@ void xen_tlb_flush_all(void)
 }
 EXPORT_SYMBOL_GPL(xen_tlb_flush_all);
 
-void xen_tlb_flush_mask(cpumask_t *mask)
+void xen_tlb_flush_mask(const cpumask_t *mask)
 {
 	struct mmuext_op op;
 	if ( cpus_empty(*mask) )
 		return;
 	op.cmd = MMUEXT_TLB_FLUSH_MULTI;
-	set_xen_guest_handle(op.arg2.vcpumask, mask->bits);
+	set_xen_guest_handle(op.arg2.vcpumask, cpus_addr(*mask));
 	BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 EXPORT_SYMBOL_GPL(xen_tlb_flush_mask);
@@ -434,14 +434,14 @@ void xen_invlpg_all(unsigned long ptr)
 }
 EXPORT_SYMBOL_GPL(xen_invlpg_all);
 
-void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
+void xen_invlpg_mask(const cpumask_t *mask, unsigned long ptr)
 {
 	struct mmuext_op op;
 	if ( cpus_empty(*mask) )
 		return;
 	op.cmd = MMUEXT_INVLPG_MULTI;
 	op.arg1.linear_addr = ptr & PAGE_MASK;
-	set_xen_guest_handle(op.arg2.vcpumask, mask->bits);
+	set_xen_guest_handle(op.arg2.vcpumask, cpus_addr(*mask));
 	BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 EXPORT_SYMBOL_GPL(xen_invlpg_mask);
--- 12.2.orig/arch/x86/mm/init_32-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/mm/init_32-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -22,6 +22,7 @@
 #include <linux/init.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/pfn.h>
 #include <linux/poison.h>
 #include <linux/bootmem.h>
 #include <linux/slab.h>
@@ -65,17 +66,19 @@ static pmd_t * __init one_md_table_init(
 	pmd_t *pmd_table;
 		
 #ifdef CONFIG_X86_PAE
-	pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-	paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
-	make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
-	set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
-	pud = pud_offset(pgd, 0);
-	if (pmd_table != pmd_offset(pud, 0)) 
-		BUG();
-#else
+	if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
+		pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+
+		paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
+		make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
+		set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
+		pud = pud_offset(pgd, 0);
+		if (pmd_table != pmd_offset(pud, 0))
+			BUG();
+	}
+#endif
 	pud = pud_offset(pgd, 0);
 	pmd_table = pmd_offset(pud, 0);
-#endif
 
 	return pmd_table;
 }
@@ -86,16 +89,18 @@ static pmd_t * __init one_md_table_init(
  */
 static pte_t * __init one_page_table_init(pmd_t *pmd)
 {
+#if CONFIG_XEN_COMPAT <= 0x030002
 	if (pmd_none(*pmd)) {
+#else
+	if (!(__pmd_val(*pmd) & _PAGE_PRESENT)) {
+#endif
 		pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+
 		paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
 		make_lowmem_page_readonly(page_table,
 					  XENFEAT_writable_page_tables);
 		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
-		if (page_table != pte_offset_kernel(pmd, 0))
-			BUG();	
-
-		return page_table;
+		BUG_ON(page_table != pte_offset_kernel(pmd, 0));
 	}
 	
 	return pte_offset_kernel(pmd, 0);
@@ -115,7 +120,6 @@ static pte_t * __init one_page_table_ini
 static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
 {
 	pgd_t *pgd;
-	pud_t *pud;
 	pmd_t *pmd;
 	int pgd_idx, pmd_idx;
 	unsigned long vaddr;
@@ -126,12 +130,10 @@ static void __init page_table_range_init
 	pgd = pgd_base + pgd_idx;
 
 	for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
-		if (pgd_none(*pgd)) 
-			one_md_table_init(pgd);
-		pud = pud_offset(pgd, vaddr);
-		pmd = pmd_offset(pud, vaddr);
+		pmd = one_md_table_init(pgd);
+		pmd = pmd + pmd_index(vaddr);
 		for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
-			if (vaddr < hypervisor_virt_start && pmd_none(*pmd))
+			if (vaddr < hypervisor_virt_start)
 				one_page_table_init(pmd);
 
 			vaddr += PMD_SIZE;
@@ -194,24 +196,25 @@ static void __init kernel_physical_mappi
 			/* Map with big pages if possible, otherwise create normal page tables. */
 			if (cpu_has_pse) {
 				unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
-
 				if (is_kernel_text(address) || is_kernel_text(address2))
 					set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
 				else
 					set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+
 				pfn += PTRS_PER_PTE;
 			} else {
 				pte = one_page_table_init(pmd);
 
-				pte += pte_ofs;
-				for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
-						/* XEN: Only map initial RAM allocation. */
-						if ((pfn >= max_ram_pfn) || pte_present(*pte))
-							continue;
-						if (is_kernel_text(address))
-							set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
-						else
-							set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+				for (pte += pte_ofs;
+				     pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
+				     pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
+					/* XEN: Only map initial RAM allocation. */
+					if ((pfn >= max_ram_pfn) || pte_present(*pte))
+						continue;
+					if (is_kernel_text(address))
+						set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+					else
+						set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
 				}
 				pte_ofs = 0;
 			}
@@ -388,15 +391,44 @@ extern void __init remap_numa_kva(void);
 
 pgd_t *swapper_pg_dir;
 
+static void __init xen_pagetable_setup_start(pgd_t *base)
+{
+}
+
+static void __init xen_pagetable_setup_done(pgd_t *base)
+{
+}
+
+/*
+ * Build a proper pagetable for the kernel mappings.  Up until this
+ * point, we've been running on some set of pagetables constructed by
+ * the boot process.
+ *
+ * If we're booting on native hardware, this will be a pagetable
+ * constructed in arch/i386/kernel/head.S, and not running in PAE mode
+ * (even if we'll end up running in PAE).  The root of the pagetable
+ * will be swapper_pg_dir.
+ *
+ * If we're booting paravirtualized under a hypervisor, then there are
+ * more options: we may already be running PAE, and the pagetable may
+ * or may not be based in swapper_pg_dir.  In any case,
+ * paravirt_pagetable_setup_start() will set up swapper_pg_dir
+ * appropriately for the rest of the initialization to work.
+ *
+ * In general, pagetable_init() assumes that the pagetable may already
+ * be partially populated, and so it avoids stomping on any existing
+ * mappings.
+ */
 static void __init pagetable_init (void)
 {
-	unsigned long vaddr;
+	unsigned long vaddr, end;
 	pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
 
+	xen_pagetable_setup_start(pgd_base);
+
 	/* Enable PSE if available */
-	if (cpu_has_pse) {
+	if (cpu_has_pse)
 		set_in_cr4(X86_CR4_PSE);
-	}
 
 	/* Enable PGE if available */
 	if (cpu_has_pge) {
@@ -413,9 +445,12 @@ static void __init pagetable_init (void)
 	 * created - mappings will be set by set_fixmap():
 	 */
 	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
-	page_table_range_init(vaddr, hypervisor_virt_start, pgd_base);
+	end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
+	page_table_range_init(vaddr, end, pgd_base);
 
 	permanent_kmaps_init(pgd_base);
+
+	xen_pagetable_setup_done(pgd_base);
 }
 
 #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
@@ -756,34 +791,29 @@ int remove_memory(u64 start, u64 size)
 EXPORT_SYMBOL_GPL(remove_memory);
 #endif
 
-struct kmem_cache *pgd_cache;
 struct kmem_cache *pmd_cache;
 
 void __init pgtable_cache_init(void)
 {
+	size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
+
 	if (PTRS_PER_PMD > 1) {
 		pmd_cache = kmem_cache_create("pmd",
 					PTRS_PER_PMD*sizeof(pmd_t),
 					PTRS_PER_PMD*sizeof(pmd_t),
-					0,
+					SLAB_PANIC,
 					pmd_ctor,
 					NULL);
-		if (!pmd_cache)
-			panic("pgtable_cache_init(): cannot create pmd cache");
+		if (!SHARED_KERNEL_PMD) {
+			/* If we're in PAE mode and have a non-shared
+			   kernel pmd, then the pgd size must be a
+			   page size.  This is because the pgd_list
+			   links through the page structure, so there
+			   can only be one pgd per page for this to
+			   work. */
+			pgd_size = PAGE_SIZE;
+		}
 	}
-	pgd_cache = kmem_cache_create("pgd",
-#ifndef CONFIG_XEN
-				PTRS_PER_PGD*sizeof(pgd_t),
-				PTRS_PER_PGD*sizeof(pgd_t),
-#else
-				PAGE_SIZE,
-				PAGE_SIZE,
-#endif
-				0,
-				pgd_ctor,
-				PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
-	if (!pgd_cache)
-		panic("pgtable_cache_init(): Cannot create pgd cache");
 }
 
 /*
@@ -817,13 +847,26 @@ static int noinline do_test_wp_bit(void)
 
 void mark_rodata_ro(void)
 {
-	unsigned long addr = (unsigned long)__start_rodata;
-
-	for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
-		change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO);
+	unsigned long start = PFN_ALIGN(_text);
+	unsigned long size = PFN_ALIGN(_etext) - start;
 
-	printk("Write protecting the kernel read-only data: %uk\n",
-			(__end_rodata - __start_rodata) >> 10);
+#ifndef CONFIG_KPROBES
+#ifdef CONFIG_HOTPLUG_CPU
+	/* It must still be possible to apply SMP alternatives. */
+	if (num_possible_cpus() <= 1)
+#endif
+	{
+		change_page_attr(virt_to_page(start),
+		                 size >> PAGE_SHIFT, PAGE_KERNEL_RX);
+		printk("Write protecting the kernel text: %luk\n", size >> 10);
+	}
+#endif
+	start += size;
+	size = (unsigned long)__end_rodata - start;
+	change_page_attr(virt_to_page(start),
+	                 size >> PAGE_SHIFT, PAGE_KERNEL_RO);
+	printk("Write protecting the kernel read-only data: %luk\n",
+	       size >> 10);
 
 	/*
 	 * change_page_attr() requires a global_flush_tlb() call after it.
@@ -846,7 +889,7 @@ void free_init_pages(char *what, unsigne
 		free_page(addr);
 		totalram_pages++;
 	}
-	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
 }
 
 void free_initmem(void)
--- 12.2.orig/arch/x86/mm/ioremap-xen.c	2011-02-07 15:37:37.000000000 +0100
+++ 12.2/arch/x86/mm/ioremap-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/sched.h>
 #include <asm/fixmap.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
--- 12.2.orig/arch/x86/mm/pgtable_32-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/mm/pgtable_32-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -13,6 +13,7 @@
 #include <linux/pagemap.h>
 #include <linux/spinlock.h>
 #include <linux/module.h>
+#include <linux/quicklist.h>
 
 #include <asm/system.h>
 #include <asm/pgtable.h>
@@ -218,8 +219,6 @@ void pmd_ctor(void *pmd, struct kmem_cac
  * against pageattr.c; it is the unique case in which a valid change
  * of kernel pagetables can't be lazily synchronized by vmalloc faults.
  * vmalloc faults work because attached pagetables are never freed.
- * The locking scheme was chosen on the basis of manfred's
- * recommendations and having no core impact whatsoever.
  * -- wli
  */
 DEFINE_SPINLOCK(pgd_lock);
@@ -246,37 +245,54 @@ static inline void pgd_list_del(pgd_t *p
 	page->mapping = NULL;
 }
 
-void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
+
+
+#if (PTRS_PER_PMD == 1)
+/* Non-PAE pgd constructor */
+void pgd_ctor(void *pgd)
 {
 	unsigned long flags;
 
-	if (PTRS_PER_PMD > 1) {
-		if (HAVE_SHARED_KERNEL_PMD)
-			clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
-					swapper_pg_dir + USER_PTRS_PER_PGD,
-					KERNEL_PGD_PTRS);
-	} else {
-		spin_lock_irqsave(&pgd_lock, flags);
+	/* !PAE, no pagetable sharing */
+	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+
+	spin_lock_irqsave(&pgd_lock, flags);
+
+	/* must happen under lock */
+	clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+			swapper_pg_dir + USER_PTRS_PER_PGD,
+			KERNEL_PGD_PTRS);
+
+	paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
+				__pa(swapper_pg_dir) >> PAGE_SHIFT,
+				USER_PTRS_PER_PGD,
+				KERNEL_PGD_PTRS);
+	pgd_list_add(pgd);
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+#else  /* PTRS_PER_PMD > 1 */
+/* PAE pgd constructor */
+void pgd_ctor(void *pgd)
+{
+	/* PAE, kernel PMD may be shared */
+
+	if (SHARED_KERNEL_PMD) {
 		clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
 				swapper_pg_dir + USER_PTRS_PER_PGD,
 				KERNEL_PGD_PTRS);
+	} else {
 		memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
-
-		/* must happen under lock */
-		paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
-			__pa(swapper_pg_dir) >> PAGE_SHIFT,
-			USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
-
-		pgd_list_add(pgd);
-		spin_unlock_irqrestore(&pgd_lock, flags);
 	}
 }
+#endif	/* PTRS_PER_PMD */
 
-/* never called when PTRS_PER_PMD > 1 */
-void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
+void pgd_dtor(void *pgd)
 {
 	unsigned long flags; /* can be called from interrupt context */
 
+	if (SHARED_KERNEL_PMD)
+		return;
+
 	paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
 	spin_lock_irqsave(&pgd_lock, flags);
 	pgd_list_del(pgd);
@@ -285,11 +301,46 @@ void pgd_dtor(void *pgd, struct kmem_cac
 	pgd_test_and_unpin(pgd);
 }
 
+#define UNSHARED_PTRS_PER_PGD				\
+	(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
+
+/* If we allocate a pmd for part of the kernel address space, then
+   make sure its initialized with the appropriate kernel mappings.
+   Otherwise use a cached zeroed pmd.  */
+static pmd_t *pmd_cache_alloc(int idx)
+{
+	pmd_t *pmd;
+
+	if (idx >= USER_PTRS_PER_PGD) {
+		pmd = (pmd_t *)__get_free_page(GFP_KERNEL);
+
+#ifndef CONFIG_XEN
+		if (pmd)
+			memcpy(pmd,
+			       (void *)pgd_page_vaddr(swapper_pg_dir[idx]),
+			       sizeof(pmd_t) * PTRS_PER_PMD);
+#endif
+	} else
+		pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+
+	return pmd;
+}
+
+static void pmd_cache_free(pmd_t *pmd, int idx)
+{
+	if (idx >= USER_PTRS_PER_PGD) {
+		make_lowmem_page_writable(pmd, XENFEAT_writable_page_tables);
+		memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
+		free_page((unsigned long)pmd);
+	} else
+		kmem_cache_free(pmd_cache, pmd);
+}
+
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	int i;
-	pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
-	pmd_t **pmd;
+	pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
+	pmd_t **pmds = NULL;
 	unsigned long flags;
 
 	if (!pgd)
@@ -303,37 +354,40 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	if (PTRS_PER_PMD == 1)
 		return pgd;
 
-	if (HAVE_SHARED_KERNEL_PMD) {
-		for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
-			pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
-			if (!pmd)
-				goto out_oom;
-			paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
-			set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
+#ifdef CONFIG_XEN
+	if (!SHARED_KERNEL_PMD) {
+		/*
+		 * We can race save/restore (if we sleep during a GFP_KERNEL memory
+		 * allocation). We therefore store virtual addresses of pmds as they
+		 * do not change across save/restore, and poke the machine addresses
+		 * into the pgdir under the pgd_lock.
+		 */
+		pmds = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
+		if (!pmds) {
+			quicklist_free(0, pgd_dtor, pgd);
+			return NULL;
 		}
-		return pgd;
-	}
-
-	/*
-	 * We can race save/restore (if we sleep during a GFP_KERNEL memory
-	 * allocation). We therefore store virtual addresses of pmds as they
-	 * do not change across save/restore, and poke the machine addresses
-	 * into the pgdir under the pgd_lock.
-	 */
-	pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
-	if (!pmd) {
-		kmem_cache_free(pgd_cache, pgd);
-		return NULL;
 	}
+#endif
 
 	/* Allocate pmds, remember virtual addresses. */
-	for (i = 0; i < PTRS_PER_PGD; ++i) {
-		pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
-		if (!pmd[i])
+	for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
+		pmd_t *pmd = pmd_cache_alloc(i);
+
+		if (!pmd)
 			goto out_oom;
+
 		paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
+		if (pmds)
+			pmds[i] = pmd;
+		else
+			set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
 	}
 
+#ifdef CONFIG_XEN
+	if (SHARED_KERNEL_PMD)
+		return pgd;
+
 	spin_lock_irqsave(&pgd_lock, flags);
 
 	/* Protect against save/restore: move below 4GB under pgd_lock. */
@@ -348,44 +402,43 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 
 	/* Copy kernel pmd contents and write-protect the new pmds. */
 	for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
-		unsigned long v = (unsigned long)i << PGDIR_SHIFT;
-		pgd_t *kpgd = pgd_offset_k(v);
-		pud_t *kpud = pud_offset(kpgd, v);
-		pmd_t *kpmd = pmd_offset(kpud, v);
-		memcpy(pmd[i], kpmd, PAGE_SIZE);
+		memcpy(pmds[i],
+		       (void *)pgd_page_vaddr(swapper_pg_dir[i]),
+		       sizeof(pmd_t) * PTRS_PER_PMD);
 		make_lowmem_page_readonly(
-			pmd[i], XENFEAT_writable_page_tables);
+			pmds[i], XENFEAT_writable_page_tables);
 	}
 
 	/* It is safe to poke machine addresses of pmds under the pmd_lock. */
 	for (i = 0; i < PTRS_PER_PGD; i++)
-		set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i])));
+		set_pgd(&pgd[i], __pgd(1 + __pa(pmds[i])));
 
 	/* Ensure this pgd gets picked up and pinned on save/restore. */
 	pgd_list_add(pgd);
 
 	spin_unlock_irqrestore(&pgd_lock, flags);
 
-	kfree(pmd);
+	kfree(pmds);
+#endif
 
 	return pgd;
 
 out_oom:
-	if (HAVE_SHARED_KERNEL_PMD) {
+	if (!pmds) {
 		for (i--; i >= 0; i--) {
 			pgd_t pgdent = pgd[i];
 			void* pmd = (void *)__va(pgd_val(pgdent)-1);
 			paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
-			kmem_cache_free(pmd_cache, pmd);
+			pmd_cache_free(pmd, i);
 		}
 	} else {
 		for (i--; i >= 0; i--) {
-			paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT);
-			kmem_cache_free(pmd_cache, pmd[i]);
+			paravirt_release_pd(__pa(pmds[i]) >> PAGE_SHIFT);
+			pmd_cache_free(pmds[i], i);
 		}
-		kfree(pmd);
+		kfree(pmds);
 	}
-	kmem_cache_free(pgd_cache, pgd);
+	quicklist_free(0, pgd_dtor, pgd);
 	return NULL;
 }
 
@@ -405,35 +458,24 @@ void pgd_free(pgd_t *pgd)
 
 	/* in the PAE case user pgd entries are overwritten before usage */
 	if (PTRS_PER_PMD > 1) {
-		for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+		for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
 			pgd_t pgdent = pgd[i];
 			void* pmd = (void *)__va(pgd_val(pgdent)-1);
 			paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
-			kmem_cache_free(pmd_cache, pmd);
+			pmd_cache_free(pmd, i);
 		}
 
-		if (!HAVE_SHARED_KERNEL_PMD) {
-			unsigned long flags;
-			spin_lock_irqsave(&pgd_lock, flags);
-			pgd_list_del(pgd);
-			spin_unlock_irqrestore(&pgd_lock, flags);
-
-			for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
-				pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
-				make_lowmem_page_writable(
-					pmd, XENFEAT_writable_page_tables);
-				memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
-				kmem_cache_free(pmd_cache, pmd);
-			}
-
-			if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
-				xen_destroy_contiguous_region(
-					(unsigned long)pgd, 0);
-		}
+		if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
+			xen_destroy_contiguous_region((unsigned long)pgd, 0);
 	}
 
 	/* in the non-PAE case, free_pgtables() clears user pgd entries */
-	kmem_cache_free(pgd_cache, pgd);
+	quicklist_free(0, pgd_dtor, pgd);
+}
+
+void check_pgt_cache(void)
+{
+	quicklist_trim(0, pgd_dtor, 25, 16);
 }
 
 void make_lowmem_page_readonly(void *va, unsigned int feature)
@@ -730,13 +772,13 @@ void mm_pin_all(void)
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
-void _arch_dup_mmap(struct mm_struct *mm)
+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
 	if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags))
 		mm_pin(mm);
 }
 
-void _arch_exit_mmap(struct mm_struct *mm)
+void arch_exit_mmap(struct mm_struct *mm)
 {
 	struct task_struct *tsk = current;
 
--- 12.2.orig/arch/x86/ia32/ia32entry-xen.S	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/ia32/ia32entry-xen.S	2011-01-31 17:32:29.000000000 +0100
@@ -431,11 +431,7 @@ ia32_sys_call_table:
 	.quad sys_symlink
 	.quad sys_lstat
 	.quad sys_readlink		/* 85 */
-#ifdef CONFIG_IA32_AOUT
 	.quad sys_uselib
-#else
-	.quad quiet_ni_syscall
-#endif
 	.quad sys_swapon
 	.quad sys_reboot
 	.quad compat_sys_old_readdir
@@ -574,7 +570,7 @@ ia32_sys_call_table:
 	.quad quiet_ni_syscall		/* tux */
 	.quad quiet_ni_syscall    	/* security */
 	.quad sys_gettid	
-	.quad sys_readahead	/* 225 */
+	.quad sys32_readahead	/* 225 */
 	.quad sys_setxattr
 	.quad sys_lsetxattr
 	.quad sys_fsetxattr
@@ -599,7 +595,7 @@ ia32_sys_call_table:
 	.quad compat_sys_io_getevents
 	.quad compat_sys_io_submit
 	.quad sys_io_cancel
-	.quad sys_fadvise64		/* 250 */
+	.quad sys32_fadvise64		/* 250 */
 	.quad quiet_ni_syscall 	/* free_huge_pages */
 	.quad sys_exit_group
 	.quad sys32_lookup_dcookie
@@ -663,10 +659,14 @@ ia32_sys_call_table:
 	.quad compat_sys_set_robust_list
 	.quad compat_sys_get_robust_list
 	.quad sys_splice
-	.quad sys_sync_file_range
-	.quad sys_tee
+	.quad sys32_sync_file_range
+	.quad sys_tee			/* 315 */
 	.quad compat_sys_vmsplice
 	.quad compat_sys_move_pages
 	.quad sys_getcpu
 	.quad sys_epoll_pwait
-ia32_syscall_end:		
+	.quad compat_sys_utimensat	/* 320 */
+	.quad compat_sys_signalfd
+	.quad compat_sys_timerfd
+	.quad sys_eventfd
+ia32_syscall_end:
--- 12.2.orig/arch/x86/kernel/e820_64-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/e820_64-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -17,6 +17,8 @@
 #include <linux/kexec.h>
 #include <linux/module.h>
 #include <linux/mm.h>
+#include <linux/suspend.h>
+#include <linux/pfn.h>
 
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -28,7 +30,7 @@
 
 struct e820map e820 __initdata;
 #ifdef CONFIG_XEN
-struct e820map machine_e820 __initdata;
+struct e820map machine_e820;
 #endif
 
 /* 
@@ -293,22 +295,6 @@ void __init e820_reserve_resources(struc
 }
 
 #ifndef CONFIG_XEN
-/* Mark pages corresponding to given address range as nosave */
-static void __init
-e820_mark_nosave_range(unsigned long start, unsigned long end)
-{
-	unsigned long pfn, max_pfn;
-
-	if (start >= end)
-		return;
-
-	printk("Nosave address range: %016lx - %016lx\n", start, end);
-	max_pfn = end >> PAGE_SHIFT;
-	for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
-		if (pfn_valid(pfn))
-			SetPageNosave(pfn_to_page(pfn));
-}
-
 /*
  * Find the ranges of physical addresses that do not correspond to
  * e820 RAM areas and mark the corresponding pages as nosave for software
@@ -327,13 +313,13 @@ void __init e820_mark_nosave_regions(voi
 		struct e820entry *ei = &e820.map[i];
 
 		if (paddr < ei->addr)
-			e820_mark_nosave_range(paddr,
-					round_up(ei->addr, PAGE_SIZE));
+			register_nosave_region(PFN_DOWN(paddr),
+						PFN_UP(ei->addr));
 
 		paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
 		if (ei->type != E820_RAM)
-			e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
-					paddr);
+			register_nosave_region(PFN_UP(ei->addr),
+						PFN_DOWN(paddr));
 
 		if (paddr >= (end_pfn << PAGE_SHIFT))
 			break;
--- 12.2.orig/arch/x86/kernel/early_printk-xen.c	2011-01-31 17:29:16.000000000 +0100
+++ 12.2/arch/x86/kernel/early_printk-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -11,13 +11,12 @@
 
 #ifdef __i386__
 #include <asm/setup.h>
-#define VGABASE		(__ISA_IO_base + 0xb8000)
 #else
 #include <asm/bootsetup.h>
-#define VGABASE		((void __iomem *)0xffffffff800b8000UL)
 #endif
-
 #ifndef CONFIG_XEN
+#define VGABASE		(__ISA_IO_base + 0xb8000)
+
 static int max_ypos = 25, max_xpos = 80;
 static int current_ypos = 25, current_xpos = 0; 
 
@@ -93,9 +92,9 @@ static int early_serial_putc(unsigned ch
 static void early_serial_write(struct console *con, const char *s, unsigned n)
 {
 	while (*s && n-- > 0) {
-		early_serial_putc(*s);
 		if (*s == '\n')
 			early_serial_putc('\r');
+		early_serial_putc(*s);
 		s++;
 	}
 }
@@ -205,7 +204,7 @@ static noinline long simnow(long cmd, lo
 	return ret;
 }
 
-void __init simnow_init(char *str)
+static void __init simnow_init(char *str)
 {
 	char *fn = "klog";
 	if (*str == '=')
@@ -277,22 +276,12 @@ static int __init setup_early_printk(cha
  		early_console = &simnow_console;
  		keep_early = 1;
 	}
+
+	if (keep_early)
+		early_console->flags &= ~CON_BOOT;
+	else
+		early_console->flags |= CON_BOOT;
 	register_console(early_console);
 	return 0;
 }
-
 early_param("earlyprintk", setup_early_printk);
-
-void __init disable_early_printk(void)
-{
-	if (!early_console_initialized || !early_console)
-		return;
-	if (!keep_early) {
-		printk("disabling early console\n");
-		unregister_console(early_console);
-		early_console_initialized = 0;
-	} else {
-		printk("keeping early console\n");
-	}
-}
-
--- 12.2.orig/arch/x86/kernel/entry_64-xen.S	2011-10-07 11:20:12.000000000 +0200
+++ 12.2/arch/x86/kernel/entry_64-xen.S	2011-10-06 11:55:00.000000000 +0200
@@ -178,7 +178,7 @@ NMI_MASK = 0x80000000
 
 2:	/* Slow iret via hypervisor. */
 	andl  $~NMI_MASK, 2*8(%rsp)
-	pushq $\flag
+	pushq $\flag & VGCF_in_syscall
 	jmp  hypercall_page + (__HYPERVISOR_iret * 32)
 	.endm
 
@@ -1252,3 +1252,24 @@ ENTRY(call_softirq)
 	ret
 	CFI_ENDPROC
 ENDPROC(call_softirq)
+
+#ifndef CONFIG_IA32_EMULATION
+KPROBE_ENTRY(ignore_sysret)
+	CFI_STARTPROC	simple
+	CFI_SIGNAL_FRAME
+	CFI_DEF_CFA	rsp,SS+8-RIP+16
+/*	CFI_REL_OFFSET	ss,SS-RIP+16 */
+	CFI_REL_OFFSET	rsp,RSP-RIP+16
+/*	CFI_REL_OFFSET	rflags,EFLAGS-RIP+16 */
+/*	CFI_REL_OFFSET	cs,CS-RIP+16 */
+	CFI_REL_OFFSET	rip,RIP-RIP+16
+	popq %rcx
+	CFI_ADJUST_CFA_OFFSET -8
+	popq %r11
+	CFI_ADJUST_CFA_OFFSET -8
+	mov $-ENOSYS,%eax
+	# any non-zero value not having VGCF_in_syscall set will do:
+	HYPERVISOR_IRET VGCF_i387_valid
+	CFI_ENDPROC
+ENDPROC(ignore_sysret)
+#endif
--- 12.2.orig/arch/x86/kernel/head_64-xen.S	2011-08-09 10:35:37.000000000 +0200
+++ 12.2/arch/x86/kernel/head_64-xen.S	2011-08-09 10:37:34.000000000 +0200
@@ -5,6 +5,7 @@
  *  Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  *  Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
  *  Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
+ *  Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
  *  Jun Nakajima <jun.nakajima@intel.com>
  *    Modified for Xen                                
  */
@@ -35,27 +36,15 @@ startup_64:
 	pushq $0		# fake return address
 	jmp x86_64_start_kernel
 
-#ifdef CONFIG_ACPI_SLEEP
-.org 0xf00
-	.globl pGDT32
-pGDT32:
-	.word	gdt_end-cpu_gdt_table-1
-	.long	cpu_gdt_table-__START_KERNEL_map
-#endif
-ENTRY(stext)
-ENTRY(_stext)
+.balign PAGE_SIZE
 
-	$page = 0
 #define NEXT_PAGE(name) \
-	$page = $page + 1; \
-	.org $page * 0x1000; \
-	phys_##name = $page * 0x1000 + __PHYSICAL_START; \
+	.balign	PAGE_SIZE; \
+	phys_##name = . - .bootstrap.text; \
 ENTRY(name)
 
 NEXT_PAGE(init_level4_pgt)
-	/* This gets initialized in x86_64_start_kernel */
 	.fill	512,8,0
-NEXT_PAGE(init_level4_user_pgt)
         /*
          * We update two pgd entries to make kernel and user pgd consistent
          * at pgd_populate(). It can be used for kernel modules. So we place 
@@ -99,14 +88,6 @@ NEXT_PAGE(hypercall_page)
 #undef NEXT_PAGE
 
 	.data
-/* Just dummy symbol to allow compilation. Not used in sleep path */
-#ifdef CONFIG_ACPI_SLEEP
-	.align PAGE_SIZE
-ENTRY(wakeup_level4_pgt)
-	.fill	512,8,0
-#endif
-
-	.data
 
 	.align 16
 	.globl cpu_gdt_descr
@@ -134,13 +115,13 @@ gdt:
 
 ENTRY(cpu_gdt_table)
 	.quad	0x0000000000000000	/* NULL descriptor */
+	.quad	0x00cf9b000000ffff	/* __KERNEL32_CS */
+	.quad	0x00af9b000000ffff	/* __KERNEL_CS */
+	.quad	0x00cf93000000ffff	/* __KERNEL_DS */
+	.quad	0x00cffb000000ffff	/* __USER32_CS */
+	.quad	0x00cff3000000ffff	/* __USER_DS, __USER32_DS  */
+	.quad	0x00affb000000ffff	/* __USER_CS */
 	.quad	0x0			/* unused */
-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
-	.quad	0x00cffa000000ffff	/* __USER32_CS */
-	.quad	0x00cff2000000ffff	/* __USER_DS, __USER32_DS  */
-	.quad	0x00affa000000ffff	/* __USER_CS */
-	.quad	0x00cf9a000000ffff	/* __KERNEL32_CS */
 	.quad	0,0			/* TSS */
 	.quad	0,0			/* LDT */
 	.quad   0,0,0			/* three TLS descriptors */
@@ -175,14 +156,11 @@ ENTRY(empty_zero_page)
  * __xen_guest information
  */
 .macro utoh value
- .if (\value) < 0 || (\value) >= 0x10
-	utoh (((\value)>>4)&0x0fffffffffffffff)
- .endif
- .if ((\value) & 0xf) < 10
-  .byte '0' + ((\value) & 0xf)
- .else
-  .byte 'A' + ((\value) & 0xf) - 10
- .endif
+ i = 64
+ .rept 16
+  i = i - 4
+  .byte '0' + ((((\value) >> i) & 0xf) > 9) * ('0' - 'A' + 10) + (((\value) >> i) & 0xf)
+ .endr
 .endm
 
 .section __xen_guest
--- 12.2.orig/arch/x86/kernel/head64-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/head64-xen.c	2011-08-09 10:37:35.000000000 +0200
@@ -25,13 +25,21 @@
 #include <asm/setup.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 #include <asm/sections.h>
 
 unsigned long start_pfn;
 
+#ifndef CONFIG_XEN
+static void __init zap_identity_mappings(void)
+{
+	pgd_t *pgd = pgd_offset_k(0UL);
+	pgd_clear(pgd);
+	__flush_tlb();
+}
+
 /* Don't add a printk in there. printk relies on the PDA which is not initialized 
    yet. */
-#if 0
 static void __init clear_bss(void)
 {
 	memset(__bss_start, 0,
@@ -40,26 +48,25 @@ static void __init clear_bss(void)
 #endif
 
 #define NEW_CL_POINTER		0x228	/* Relative to real mode data */
-#define OLD_CL_MAGIC_ADDR	0x90020
+#define OLD_CL_MAGIC_ADDR	0x20
 #define OLD_CL_MAGIC            0xA33F
-#define OLD_CL_BASE_ADDR        0x90000
-#define OLD_CL_OFFSET           0x90022
+#define OLD_CL_OFFSET           0x22
 
 static void __init copy_bootdata(char *real_mode_data)
 {
 #ifndef CONFIG_XEN
-	int new_data;
+	unsigned long new_data;
 	char * command_line;
 
 	memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
-	new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
+	new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER);
 	if (!new_data) {
-		if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
+		if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) {
 			return;
 		}
-		new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
+		new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET);
 	}
-	command_line = (char *) ((u64)(new_data));
+	command_line = __va(new_data);
 	memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
 #else
 	int max_cmdline;
@@ -98,10 +105,13 @@ void __init x86_64_start_kernel(char * r
 		machine_to_phys_nr = mapping.max_mfn + 1;
 	}
 
-#if 0
+#ifndef CONFIG_XEN
 	/* clear bss before set_intr_gate with early_idt_handler */
 	clear_bss();
 
+	/* Make NULL pointers segfault */
+	zap_identity_mappings();
+
 	for (i = 0; i < IDT_ENTRIES; i++)
 		set_intr_gate(i, early_idt_handler);
 	asm volatile("lidt %0" :: "m" (idt_descr));
@@ -113,7 +123,7 @@ void __init x86_64_start_kernel(char * r
  		cpu_pda(i) = &boot_cpu_pda[i];
 
 	pda_init(0);
-	copy_bootdata(real_mode_data);
+	copy_bootdata(__va(real_mode_data));
 #ifdef CONFIG_SMP
 	cpu_set(0, cpu_online_map);
 #endif
--- 12.2.orig/arch/x86/kernel/io_apic_64-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/io_apic_64-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -25,7 +25,6 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/pci.h>
 #include <linux/mc146818rtc.h>
 #include <linux/acpi.h>
@@ -904,10 +903,6 @@ static void __init setup_ExtINT_IRQ0_pin
 	enable_8259A_irq(0);
 }
 
-void __init UNEXPECTED_IO_APIC(void)
-{
-}
-
 void __apicdebuginit print_IO_APIC(void)
 {
 	int apic, i;
@@ -943,40 +938,16 @@ void __apicdebuginit print_IO_APIC(void)
 	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
 	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
-	if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
-		UNEXPECTED_IO_APIC();
 
 	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
 	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
-	if (	(reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
-		(reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
-		(reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
-		(reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
-		(reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
-		(reg_01.bits.entries != 0x2E) &&
-		(reg_01.bits.entries != 0x3F) &&
-		(reg_01.bits.entries != 0x03) 
-	)
-		UNEXPECTED_IO_APIC();
 
 	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
 	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
-	if (	(reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
-		(reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */
-		(reg_01.bits.version != 0x10) && /* oldest IO-APICs */
-		(reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
-		(reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
-		(reg_01.bits.version != 0x20)    /* Intel P64H (82806 AA) */
-	)
-		UNEXPECTED_IO_APIC();
-	if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
-		UNEXPECTED_IO_APIC();
 
 	if (reg_01.bits.version >= 0x10) {
 		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
 		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
-		if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
-			UNEXPECTED_IO_APIC();
 	}
 
 	printk(KERN_DEBUG ".... IRQ redirection table:\n");
@@ -1408,8 +1379,7 @@ static void irq_complete_move(unsigned i
 
 	vector = ~get_irq_regs()->orig_rax;
 	me = smp_processor_id();
-	if ((vector == cfg->vector) &&
-	    cpu_isset(smp_processor_id(), cfg->domain)) {
+	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
 		cpumask_t cleanup_mask;
 
 		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
@@ -1444,7 +1414,7 @@ static void ack_apic_level(unsigned int 
 
 	/*
 	 * We must acknowledge the irq before we move it or the acknowledge will
-	 * not propogate properly.
+	 * not propagate properly.
 	 */
 	ack_APIC_irq();
 
@@ -1527,6 +1497,7 @@ static void ack_lapic_irq (unsigned int 
 static void end_lapic_irq (unsigned int i) { /* nothing */ }
 
 static struct hw_interrupt_type lapic_irq_type __read_mostly = {
+	.name = "local-APIC",
 	.typename = "local-APIC-edge",
 	.startup = NULL, /* startup_irq() not used for IRQ0 */
 	.shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
@@ -1998,18 +1969,18 @@ int arch_setup_msi_irq(struct pci_dev *d
 	if (irq < 0)
 		return irq;
 
-	set_irq_msi(irq, desc);
 	ret = msi_compose_msg(dev, irq, &msg);
 	if (ret < 0) {
 		destroy_irq(irq);
 		return ret;
 	}
 
+	set_irq_msi(irq, desc);
 	write_msi_msg(irq, &msg);
 
 	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 
-	return irq;
+	return 0;
 }
 
 void arch_teardown_msi_irq(unsigned int irq)
--- 12.2.orig/arch/x86/kernel/ioport_64-xen.c	2011-01-31 17:29:16.000000000 +0100
+++ 12.2/arch/x86/kernel/ioport_64-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -13,10 +13,10 @@
 #include <linux/ioport.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
 #include <linux/thread_info.h>
+#include <linux/syscalls.h>
 #include <xen/interface/physdev.h>
 
 /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
--- 12.2.orig/arch/x86/kernel/ldt_64-xen.c	2007-06-12 13:13:01.000000000 +0200
+++ 12.2/arch/x86/kernel/ldt_64-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -13,7 +13,6 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 
--- 12.2.orig/arch/x86/kernel/mpparse_64-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/mpparse_64-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -17,7 +17,6 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/bootmem.h>
-#include <linux/smp_lock.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
 #include <linux/acpi.h>
@@ -307,7 +306,7 @@ static int __init smp_read_mpc(struct mp
 			}
 		}
 	}
-	clustered_apic_check();
+	setup_apic_routing();
 	if (!num_processors)
 		printk(KERN_ERR "MPTABLE: no processors registered!\n");
 	return num_processors;
--- 12.2.orig/arch/x86/kernel/process_64-xen.c	2011-02-02 08:31:05.000000000 +0100
+++ 12.2/arch/x86/kernel/process_64-xen.c	2011-02-02 08:31:39.000000000 +0100
@@ -39,6 +39,7 @@
 #include <linux/random.h>
 #include <linux/notifier.h>
 #include <linux/kprobes.h>
+#include <linux/kdebug.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -49,7 +50,6 @@
 #include <asm/mmu_context.h>
 #include <asm/pda.h>
 #include <asm/prctl.h>
-#include <asm/kdebug.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/vcpu.h>
 #include <asm/desc.h>
@@ -231,16 +231,18 @@ void __cpuinit select_idle_routine(const
 
 static int __init idle_setup (char *str)
 {
-	if (!strncmp(str, "poll", 4)) {
+	if (!strcmp(str, "poll")) {
 		printk("using polling idle threads.\n");
 		pm_idle = poll_idle;
-	}
+	} else if (!strcmp(str, "mwait"))
+		force_mwait = 1;
+	else
+		return -1;
 
 	boot_option_idle_override = 1;
-	return 1;
+	return 0;
 }
-
-__setup("idle=", idle_setup);
+early_param("idle", idle_setup);
 
 /* Prints also some state that isn't saved in the pt_regs */ 
 void __show_regs(struct pt_regs * regs)
@@ -545,7 +547,7 @@ __switch_to(struct task_struct *prev_p, 
 	 * The AMD workaround requires it to be after DS reload, or
 	 * after DS has been cleared, which we do in __prepare_arch_switch.
 	 */
-	if (prev_p->thread_info->status & TS_USEDFPU) {
+	if (task_thread_info(prev_p)->status & TS_USEDFPU) {
 		__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
 		mcl->op      = __HYPERVISOR_fpu_taskswitch;
 		mcl->args[0] = 1;
--- 12.2.orig/arch/x86/kernel/setup_64-xen.c	2012-06-06 13:49:08.000000000 +0200
+++ 12.2/arch/x86/kernel/setup_64-xen.c	2012-06-06 13:49:31.000000000 +0200
@@ -120,6 +120,8 @@ int bootloader_type;
 
 unsigned long saved_video_mode;
 
+int force_mwait __cpuinitdata;
+
 /* 
  * Early DMI memory
  */
@@ -254,10 +256,10 @@ static void discover_ebda(void)
 	 * there is a real-mode segmented pointer pointing to the 
 	 * 4K EBDA area at 0x40E
 	 */
-	ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER;
+	ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
 	ebda_addr <<= 4;
 
-	ebda_size = *(unsigned short *)(unsigned long)ebda_addr;
+	ebda_size = *(unsigned short *)__va(ebda_addr);
 
 	/* Round EBDA up to pages */
 	if (ebda_size == 0)
@@ -411,15 +413,8 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 #ifdef CONFIG_SMP
-	/*
-	 * But first pinch a few for the stack/trampoline stuff
-	 * FIXME: Don't need the extra page at 4K, but need to fix
-	 * trampoline before removing it. (see the GDT stuff)
-	 */
-	reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
-
 	/* Reserve SMP trampoline */
-	reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
+	reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
 #endif
 #endif
 
@@ -569,8 +564,6 @@ void __init setup_arch(char **cmdline_p)
 	early_quirks();
 #endif
 
-	zap_low_mappings(0);
-
 	/*
 	 * set this early, so we dont allocate cpu0
 	 * if MADT list doesnt list BSP first
@@ -863,6 +856,10 @@ static void __cpuinit init_amd(struct cp
 
 	/* RDTSC can be speculated around */
 	clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+
+	/* Family 10 doesn't support C states in MWAIT so don't use it */
+	if (c->x86 == 0x10 && !force_mwait)
+		clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
 }
 
 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -1147,9 +1144,7 @@ void __cpuinit identify_cpu(struct cpuin
 #ifdef CONFIG_X86_MCE
 	mcheck_init(c);
 #endif
-	if (c == &boot_cpu_data)
-		mtrr_bp_init();
-	else
+	if (c != &boot_cpu_data)
 		mtrr_ap_init();
 #ifdef CONFIG_NUMA
 	numa_add_cpu(smp_processor_id());
@@ -1240,9 +1235,8 @@ static int show_cpuinfo(struct seq_file 
 		"stc",
 		"100mhzsteps",
 		"hwpstate",
-		NULL,	/* tsc invariant mapped to constant_tsc */
-		NULL,
-		/* nothing */	/* constant_tsc - moved to flags */
+		"",	/* tsc invariant mapped to constant_tsc */
+		/* nothing */
 	};
 
 
--- 12.2.orig/arch/x86/kernel/setup64-xen.c	2011-01-31 17:29:16.000000000 +0100
+++ 12.2/arch/x86/kernel/setup64-xen.c	2012-04-20 15:04:40.000000000 +0200
@@ -113,9 +113,9 @@ void __init setup_per_cpu_areas(void)
 		if (!NODE_DATA(cpu_to_node(i))) {
 			printk("cpu with no node %d, num_online_nodes %d\n",
 			       i, num_online_nodes());
-			ptr = alloc_bootmem(size);
+			ptr = alloc_bootmem_pages(size);
 		} else { 
-			ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size);
+			ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
 		}
 		if (!ptr)
 			panic("Cannot allocate cpu data for CPU %d\n", i);
@@ -208,6 +208,8 @@ char boot_exception_stacks[(N_EXCEPTION_
 __attribute__((section(".bss.page_aligned")));
 #endif
 
+extern asmlinkage void ignore_sysret(void);
+
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
 {
@@ -219,12 +221,26 @@ void syscall_init(void)
 	 */ 
 	wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32); 
 	wrmsrl(MSR_LSTAR, system_call); 
+	wrmsrl(MSR_CSTAR, ignore_sysret);
 
 	/* Flags to clear on syscall */
 	wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); 
 #endif
 #ifdef CONFIG_IA32_EMULATION   		
 	syscall32_cpu_init ();
+#elif defined(CONFIG_XEN)
+	{
+		struct callback_register cb = {
+			.type = CALLBACKTYPE_syscall32,
+			.address = (unsigned long)ignore_sysret
+		};
+
+		if (HYPERVISOR_callback_op(CALLBACKOP_register, &cb))
+			printk(KERN_WARNING "Unable to register CSTAR stub\n");
+		cb.type = CALLBACKTYPE_sysenter;
+		if (HYPERVISOR_callback_op(CALLBACKOP_register, &cb))
+			printk(KERN_WARNING "Unable to register SEP stub\n");
+	}
 #endif
 }
 
@@ -262,7 +278,6 @@ void __cpuinit cpu_init (void)
 	/* CPU 0 is initialised in head64.c */
 	if (cpu != 0) {
 		pda_init(cpu);
-		zap_low_mappings(cpu);
 	}
 #ifndef CONFIG_X86_NO_TSS
 	else
--- 12.2.orig/arch/x86/kernel/smp_64-xen.c	2011-01-31 17:32:16.000000000 +0100
+++ 12.2/arch/x86/kernel/smp_64-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -14,7 +14,6 @@
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/spinlock.h>
-#include <linux/smp_lock.h>
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
@@ -457,44 +456,36 @@ int smp_call_function (void (*func) (voi
 }
 EXPORT_SYMBOL(smp_call_function);
 
-void smp_stop_cpu(void)
+static void stop_this_cpu(void *dummy)
 {
-	unsigned long flags;
+	local_irq_disable();
 	/*
 	 * Remove this CPU:
 	 */
 	cpu_clear(smp_processor_id(), cpu_online_map);
-	local_irq_save(flags);
 	disable_all_local_evtchn();
-	local_irq_restore(flags); 
-}
-
-static void smp_really_stop_cpu(void *dummy)
-{
-	smp_stop_cpu(); 
 	for (;;) 
 		halt();
 } 
 
 void smp_send_stop(void)
 {
-	int nolock = 0;
+	int nolock;
+	unsigned long flags;
+
 #ifndef CONFIG_XEN
 	if (reboot_force)
 		return;
 #endif
+
 	/* Don't deadlock on the call lock in panic */
-	if (!spin_trylock(&call_lock)) {
-		/* ignore locking because we have panicked anyways */
-		nolock = 1;
-	}
-	__smp_call_function(smp_really_stop_cpu, NULL, 0, 0);
+	nolock = !spin_trylock(&call_lock);
+	local_irq_save(flags);
+	__smp_call_function(stop_this_cpu, NULL, 0, 0);
 	if (!nolock)
 		spin_unlock(&call_lock);
-
-	local_irq_disable();
 	disable_all_local_evtchn();
-	local_irq_enable();
+	local_irq_restore(flags);
 }
 
 /*
--- 12.2.orig/arch/x86/kernel/traps_64-xen.c	2011-01-31 17:32:16.000000000 +0100
+++ 12.2/arch/x86/kernel/traps_64-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -32,6 +32,7 @@
 #include <linux/unwind.h>
 #include <linux/uaccess.h>
 #include <linux/bug.h>
+#include <linux/kdebug.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
@@ -39,7 +40,6 @@
 #include <asm/debugreg.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
-#include <asm/kdebug.h>
 #include <asm/processor.h>
 #include <asm/unwind.h>
 #include <asm/smp.h>
@@ -71,22 +71,6 @@ asmlinkage void alignment_check(void);
 asmlinkage void machine_check(void);
 asmlinkage void spurious_interrupt_bug(void);
 
-ATOMIC_NOTIFIER_HEAD(die_chain);
-EXPORT_SYMBOL(die_chain);
-
-int register_die_notifier(struct notifier_block *nb)
-{
-	vmalloc_sync_all();
-	return atomic_notifier_chain_register(&die_chain, nb);
-}
-EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */
-
-int unregister_die_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&die_chain, nb);
-}
-EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */
-
 static inline void conditional_sti(struct pt_regs *regs)
 {
 	if (regs->eflags & X86_EFLAGS_IF)
@@ -428,8 +412,7 @@ void show_registers(struct pt_regs *regs
 	const int cpu = smp_processor_id();
 	struct task_struct *cur = cpu_pda(cpu)->pcurrent;
 
-		rsp = regs->rsp;
-
+	rsp = regs->rsp;
 	printk("CPU %d ", cpu);
 	__show_regs(regs);
 	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
@@ -440,7 +423,6 @@ void show_registers(struct pt_regs *regs
 	 * time of the fault..
 	 */
 	if (in_kernel) {
-
 		printk("Stack: ");
 		_show_stack(NULL, regs, (unsigned long*)rsp);
 
@@ -485,13 +467,14 @@ static unsigned int die_nest_count;
 
 unsigned __kprobes long oops_begin(void)
 {
-	int cpu = smp_processor_id();
+	int cpu;
 	unsigned long flags;
 
 	oops_enter();
 
 	/* racy, but better than risking deadlock. */
 	local_irq_save(flags);
+	cpu = smp_processor_id();
 	if (!spin_trylock(&die_lock)) { 
 		if (cpu == die_owner) 
 			/* nested oops. should stop eventually */;
@@ -585,10 +568,20 @@ static void __kprobes do_trap(int trapnr
 {
 	struct task_struct *tsk = current;
 
-	tsk->thread.error_code = error_code;
-	tsk->thread.trap_no = trapnr;
-
 	if (user_mode(regs)) {
+		/*
+		 * We want error_code and trap_no set for userspace
+		 * faults and kernelspace faults which result in
+		 * die(), but not kernelspace faults which are fixed
+		 * up.  die() gives the process no chance to handle
+		 * the signal and notice the kernel fault information,
+		 * so that won't result in polluting the information
+		 * about previously queued, but not yet delivered,
+		 * faults.  See also do_general_protection below.
+		 */
+		tsk->thread.error_code = error_code;
+		tsk->thread.trap_no = trapnr;
+
 		if (exception_trace && unhandled_signal(tsk, signr))
 			printk(KERN_INFO
 			       "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
@@ -609,8 +602,11 @@ static void __kprobes do_trap(int trapnr
 		fixup = search_exception_tables(regs->rip);
 		if (fixup)
 			regs->rip = fixup->fixup;
-		else	
+		else {
+			tsk->thread.error_code = error_code;
+			tsk->thread.trap_no = trapnr;
 			die(str, regs, error_code);
+		}
 		return;
 	}
 }
@@ -686,10 +682,10 @@ asmlinkage void __kprobes do_general_pro
 
 	conditional_sti(regs);
 
-	tsk->thread.error_code = error_code;
-	tsk->thread.trap_no = 13;
-
 	if (user_mode(regs)) {
+		tsk->thread.error_code = error_code;
+		tsk->thread.trap_no = 13;
+
 		if (exception_trace && unhandled_signal(tsk, SIGSEGV))
 			printk(KERN_INFO
 		       "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
@@ -708,6 +704,9 @@ asmlinkage void __kprobes do_general_pro
 			regs->rip = fixup->fixup;
 			return;
 		}
+
+		tsk->thread.error_code = error_code;
+		tsk->thread.trap_no = 13;
 		if (notify_die(DIE_GPF, "general protection fault", regs,
 					error_code, 13, SIGSEGV) == NOTIFY_STOP)
 			return;
--- 12.2.orig/arch/x86/kernel/vsyscall_64-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/kernel/vsyscall_64-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -45,14 +45,34 @@
 
 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
 #define __syscall_clobber "r11","rcx","memory"
+#define __pa_vsymbol(x)			\
+	({unsigned long v;  		\
+	extern char __vsyscall_0; 	\
+	  asm("" : "=r" (v) : "0" (x)); \
+	  ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
 
+/*
+ * vsyscall_gtod_data contains data that is :
+ * - readonly from vsyscalls
+ * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
+ * Try to keep this structure as small as possible to avoid cache line ping pongs
+ */
 struct vsyscall_gtod_data_t {
-	seqlock_t lock;
-	int sysctl_enabled;
-	struct timeval wall_time_tv;
+	seqlock_t	lock;
+
+	/* open coded 'struct timespec' */
+	time_t		wall_time_sec;
+	u32		wall_time_nsec;
+
+	int		sysctl_enabled;
 	struct timezone sys_tz;
-	cycle_t offset_base;
-	struct clocksource clock;
+	struct { /* extract of a clocksource struct */
+		cycle_t (*vread)(void);
+		cycle_t	cycle_last;
+		cycle_t	mask;
+		u32	mult;
+		u32	shift;
+	} clock;
 };
 int __vgetcpu_mode __section_vgetcpu_mode;
 
@@ -68,9 +88,13 @@ void update_vsyscall(struct timespec *wa
 
 	write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
 	/* copy vsyscall data */
-	vsyscall_gtod_data.clock = *clock;
-	vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec;
-	vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000;
+	vsyscall_gtod_data.clock.vread = clock->vread;
+	vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
+	vsyscall_gtod_data.clock.mask = clock->mask;
+	vsyscall_gtod_data.clock.mult = clock->mult;
+	vsyscall_gtod_data.clock.shift = clock->shift;
+	vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
+	vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
 	vsyscall_gtod_data.sys_tz = sys_tz;
 	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
 }
@@ -105,7 +129,8 @@ static __always_inline long time_syscall
 static __always_inline void do_vgettimeofday(struct timeval * tv)
 {
 	cycle_t now, base, mask, cycle_delta;
-	unsigned long seq, mult, shift, nsec_delta;
+	unsigned seq;
+	unsigned long mult, shift, nsec;
 	cycle_t (*vread)(void);
 	do {
 		seq = read_seqbegin(&__vsyscall_gtod_data.lock);
@@ -121,21 +146,20 @@ static __always_inline void do_vgettimeo
 		mult = __vsyscall_gtod_data.clock.mult;
 		shift = __vsyscall_gtod_data.clock.shift;
 
-		*tv = __vsyscall_gtod_data.wall_time_tv;
-
+		tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
+		nsec = __vsyscall_gtod_data.wall_time_nsec;
 	} while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
 
 	/* calculate interval: */
 	cycle_delta = (now - base) & mask;
 	/* convert to nsecs: */
-	nsec_delta = (cycle_delta * mult) >> shift;
+	nsec += (cycle_delta * mult) >> shift;
 
-	/* convert to usecs and add to timespec: */
-	tv->tv_usec += nsec_delta / NSEC_PER_USEC;
-	while (tv->tv_usec > USEC_PER_SEC) {
+	while (nsec >= NSEC_PER_SEC) {
 		tv->tv_sec += 1;
-		tv->tv_usec -= USEC_PER_SEC;
+		nsec -= NSEC_PER_SEC;
 	}
+	tv->tv_usec = nsec / NSEC_PER_USEC;
 }
 
 int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
@@ -151,11 +175,16 @@ int __vsyscall(0) vgettimeofday(struct t
  * unlikely */
 time_t __vsyscall(1) vtime(time_t *t)
 {
+	struct timeval tv;
+	time_t result;
 	if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
 		return time_syscall(t);
-	else if (t)
-		*t = __vsyscall_gtod_data.wall_time_tv.tv_sec;
-	return __vsyscall_gtod_data.wall_time_tv.tv_sec;
+
+	vgettimeofday(&tv, 0);
+	result = tv.tv_sec;
+	if (t)
+		*t = result;
+	return result;
 }
 
 /* Fast way to get current CPU and node.
@@ -224,10 +253,10 @@ static int vsyscall_sysctl_change(ctl_ta
 		return ret;
 	/* gcc has some trouble with __va(__pa()), so just do it this
 	   way. */
-	map1 = ioremap(__pa_symbol(&vsysc1), 2);
+	map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
 	if (!map1)
 		return -ENOMEM;
-	map2 = ioremap(__pa_symbol(&vsysc2), 2);
+	map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
 	if (!map2) {
 		ret = -ENOMEM;
 		goto out;
@@ -304,7 +333,7 @@ static int __cpuinit
 cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
 {
 	long cpu = (long)arg;
-	if (action == CPU_ONLINE)
+	if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
 		smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
 	return NOTIFY_DONE;
 }
--- 12.2.orig/arch/x86/mm/fault_64-xen.c	2011-07-26 09:27:34.000000000 +0200
+++ 12.2/arch/x86/mm/fault_64-xen.c	2011-07-26 09:27:41.000000000 +0200
@@ -15,22 +15,22 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/tty.h>
 #include <linux/vt_kern.h>		/* For unblank_screen() */
 #include <linux/compiler.h>
+#include <linux/vmalloc.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
+#include <linux/kdebug.h>
 
 #include <asm/system.h>
 #include <asm/pgalloc.h>
 #include <asm/smp.h>
 #include <asm/tlbflush.h>
 #include <asm/proto.h>
-#include <asm/kdebug.h>
 #include <asm-generic/sections.h>
 
 /* Page fault error code bits */
@@ -538,6 +538,12 @@ bad_area:
 bad_area_nosemaphore:
 	/* User mode accesses just cause a SIGSEGV */
 	if (error_code & PF_USER) {
+
+		/*
+		 * It's possible to have interrupts off here.
+		 */
+		local_irq_enable();
+
 		if (is_prefetch(regs, address, error_code))
 			return;
 
@@ -647,10 +653,10 @@ do_sigbus:
 }
 
 DEFINE_SPINLOCK(pgd_lock);
-struct page *pgd_list;
+LIST_HEAD(pgd_list);
 
 #define pgd_page_table(what, pg) \
-	spin_##what(&((struct mm_struct *)(pg)->mapping)->page_table_lock)
+	spin_##what(&((struct mm_struct *)(pg)->private)->page_table_lock)
 
 void vmalloc_sync_all(void)
 {
@@ -670,8 +676,7 @@ void vmalloc_sync_all(void)
 			if (pgd_none(*pgd_ref))
 				continue;
 			spin_lock(&pgd_lock);
-			for (page = pgd_list; page;
-			     page = (struct page *)page->index) {
+			list_for_each_entry(page, &pgd_list, lru) {
 				pgd_t *pgd;
 				pgd = (pgd_t *)page_address(page) + pgd_index(address);
 
--- 12.2.orig/arch/x86/mm/init_64-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/mm/init_64-xen.c	2011-11-03 12:08:43.000000000 +0100
@@ -25,10 +25,12 @@
 #include <linux/bootmem.h>
 #include <linux/proc_fs.h>
 #include <linux/pci.h>
+#include <linux/pfn.h>
 #include <linux/poison.h>
 #include <linux/dma-mapping.h>
 #include <linux/module.h>
 #include <linux/memory_hotplug.h>
+#include <linux/nmi.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -51,7 +53,7 @@
 #define Dprintk(x...)
 #endif
 
-struct dma_mapping_ops* dma_ops;
+const struct dma_mapping_ops* dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
 #if CONFIG_XEN_COMPAT <= 0x030002
@@ -191,6 +193,13 @@ void show_mem(void)
 
 	for_each_online_pgdat(pgdat) {
                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
+			/* this loop can take a while with 256 GB and 4k pages
+			   so update the NMI watchdog */
+			if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
+				touch_nmi_watchdog();
+			}
+			if (!pfn_valid(pgdat->node_start_pfn + i))
+				continue;
 			page = pfn_to_page(pgdat->node_start_pfn + i);
 			total++;
 			if (PageReserved(page))
@@ -359,7 +368,7 @@ __set_fixmap (enum fixed_addresses idx, 
 	}
 }
 
-unsigned long __initdata table_start, table_end; 
+unsigned long __meminitdata table_start, table_end;
 
 static __meminit void *alloc_static_page(unsigned long *phys)
 {
@@ -376,7 +385,7 @@ static __meminit void *alloc_static_page
 	start_pfn++;
 	clear_page((void *)va);
 	return (void *)va;
-} 
+}
 
 #define PTE_SIZE PAGE_SIZE
 
@@ -412,28 +421,46 @@ static inline int make_readonly(unsigned
 
 #ifndef CONFIG_XEN
 /* Must run before zap_low_mappings */
-__init void *early_ioremap(unsigned long addr, unsigned long size)
+__meminit void *early_ioremap(unsigned long addr, unsigned long size)
 {
-	unsigned long map = round_down(addr, LARGE_PAGE_SIZE);
-
-	/* actually usually some more */
-	if (size >= LARGE_PAGE_SIZE) {
-		return NULL;
+	unsigned long vaddr;
+	pmd_t *pmd, *last_pmd;
+	int i, pmds;
+
+	pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
+	vaddr = __START_KERNEL_map;
+	pmd = level2_kernel_pgt;
+	last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
+	for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
+		for (i = 0; i < pmds; i++) {
+			if (pmd_present(pmd[i]))
+				goto next;
+		}
+		vaddr += addr & ~PMD_MASK;
+		addr &= PMD_MASK;
+		for (i = 0; i < pmds; i++, addr += PMD_SIZE)
+			set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
+		__flush_tlb();
+		return (void *)vaddr;
+	next:
+		;
 	}
-	set_pmd(temp_mappings[0].pmd,  __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
-	map += LARGE_PAGE_SIZE;
-	set_pmd(temp_mappings[1].pmd,  __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
-	__flush_tlb();
-	return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
+	printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
+	return NULL;
 }
 
 /* To avoid virtual aliases later */
-__init void early_iounmap(void *addr, unsigned long size)
+__meminit void early_iounmap(void *addr, unsigned long size)
 {
-	if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address)
-		printk("early_iounmap: bad address %p\n", addr);
-	set_pmd(temp_mappings[0].pmd, __pmd(0));
-	set_pmd(temp_mappings[1].pmd, __pmd(0));
+	unsigned long vaddr;
+	pmd_t *pmd;
+	int i, pmds;
+
+	vaddr = (unsigned long)addr;
+	pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
+	pmd = level2_kernel_pgt + pmd_index(vaddr);
+	for (i = 0; i < pmds; i++)
+		pmd_clear(pmd + i);
 	__flush_tlb();
 }
 #endif
@@ -792,14 +819,6 @@ void __meminit init_memory_mapping(unsig
 	__flush_tlb_all();
 }
 
-void __cpuinit zap_low_mappings(int cpu)
-{
-	/* this is not required for Xen */
-#if 0
-	swap_low_mappings();
-#endif
-}
-
 #ifndef CONFIG_NUMA
 void __init paging_init(void)
 {
@@ -984,17 +1003,6 @@ void __init mem_init(void)
 		reservedpages << (PAGE_SHIFT-10),
 		datasize >> 10,
 		initsize >> 10);
-
-#ifndef CONFIG_XEN
-#ifdef CONFIG_SMP
-	/*
-	 * Sync boot_level4_pgt mappings with the init_level4_pgt
-	 * except for the low identity mappings which are already zapped
-	 * in init_level4_pgt. This sync-up is essential for AP's bringup
-	 */
-	memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
-#endif
-#endif
 }
 
 void free_init_pages(char *what, unsigned long begin, unsigned long end)
@@ -1004,7 +1012,7 @@ void free_init_pages(char *what, unsigne
 	if (begin >= end)
 		return;
 
-	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
 	for (addr = begin; addr < end; addr += PAGE_SIZE) {
 		ClearPageReserved(virt_to_page(addr));
 		init_page_count(virt_to_page(addr));
@@ -1013,24 +1021,17 @@ void free_init_pages(char *what, unsigne
 		if (addr >= __START_KERNEL_map) {
 			/* make_readonly() reports all kernel addresses. */
 			__make_page_writable(__va(__pa(addr)));
-			if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
-				pgd_t *pgd = pgd_offset_k(addr);
-				pud_t *pud = pud_offset(pgd, addr);
-				pmd_t *pmd = pmd_offset(pud, addr);
-				pte_t *pte = pte_offset_kernel(pmd, addr);
-
-				xen_l1_entry_update(pte, __pte(0)); /* fallback */
-			}
+			change_page_attr_addr(addr, 1, __pgprot(0));
 		}
 		free_page(addr);
 		totalram_pages++;
 	}
+	if (addr > __START_KERNEL_map)
+		global_flush_tlb();
 }
 
 void free_initmem(void)
 {
-	memset(__initdata_begin, POISON_FREE_INITDATA,
-		__initdata_end - __initdata_begin);
 	free_init_pages("unused kernel memory",
 			(unsigned long)(&__init_begin),
 			(unsigned long)(&__init_end));
@@ -1040,13 +1041,28 @@ void free_initmem(void)
 
 void mark_rodata_ro(void)
 {
-	unsigned long addr = (unsigned long)__start_rodata;
+	unsigned long start = (unsigned long)_stext, end;
+
+#ifdef CONFIG_HOTPLUG_CPU
+	/* It must still be possible to apply SMP alternatives. */
+	if (num_possible_cpus() > 1)
+		start = (unsigned long)_etext;
+#endif
+
+#ifdef CONFIG_KPROBES
+	start = (unsigned long)__start_rodata;
+#endif
+
+	end = (unsigned long)__end_rodata;
+	start = (start + PAGE_SIZE - 1) & PAGE_MASK;
+	end &= PAGE_MASK;
+	if (end <= start)
+		return;
 
-	for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
-		change_page_attr_addr(addr, 1, PAGE_KERNEL_RO);
+	change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
 
-	printk ("Write protecting the kernel read-only data: %luk\n",
-			(__end_rodata - __start_rodata) >> 10);
+	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
+	       (end - start) >> 10);
 
 	/*
 	 * change_page_attr_addr() requires a global_flush_tlb() call after it.
@@ -1210,3 +1226,11 @@ int in_gate_area_no_task(unsigned long a
 {
 	return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
 }
+
+#ifndef CONFIG_XEN
+void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
+{
+	return __alloc_bootmem_core(pgdat->bdata, size,
+			SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
+}
+#endif
--- 12.2.orig/arch/x86/mm/pageattr_64-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/mm/pageattr_64-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -215,13 +215,13 @@ void mm_pin_all(void)
 	preempt_enable();
 }
 
-void _arch_dup_mmap(struct mm_struct *mm)
+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
 	if (!mm->context.pinned)
 		mm_pin(mm);
 }
 
-void _arch_exit_mmap(struct mm_struct *mm)
+void arch_exit_mmap(struct mm_struct *mm)
 {
 	struct task_struct *tsk = current;
 
@@ -343,10 +343,11 @@ static void flush_kernel_map(void *arg)
 	struct page *pg;
 
 	/* When clflush is available always use it because it is
-	   much cheaper than WBINVD */
-	if (!cpu_has_clflush)
+	   much cheaper than WBINVD. Disable clflush for now because
+	   the high level code is not ready yet */
+	if (1 || !cpu_has_clflush)
 		asm volatile("wbinvd" ::: "memory");
-	list_for_each_entry(pg, l, lru) {
+	else list_for_each_entry(pg, l, lru) {
 		void *adr = page_address(pg);
 		if (cpu_has_clflush)
 			cache_flush_page(adr);
@@ -460,16 +461,24 @@ __change_page_attr(unsigned long address
  */
 int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 {
-	int err = 0; 
+	int err = 0, kernel_map = 0;
 	int i; 
 
+	if (address >= __START_KERNEL_map
+	    && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
+		address = (unsigned long)__va(__pa(address));
+		kernel_map = 1;
+	}
+
 	down_write(&init_mm.mmap_sem);
 	for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
 		unsigned long pfn = __pa(address) >> PAGE_SHIFT;
 
-		err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
-		if (err) 
-			break; 
+		if (!kernel_map || pte_present(pfn_pte(0, prot))) {
+			err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
+			if (err)
+				break;
+		}
 		/* Handle kernel mapping too which aliases part of the
 		 * lowmem */
 		if (__pa(address) < KERNEL_TEXT_SIZE) {
--- 12.2.orig/drivers/char/tpm/tpm_xen.c	2012-03-12 13:31:05.000000000 +0100
+++ 12.2/drivers/char/tpm/tpm_xen.c	2012-03-12 13:33:35.000000000 +0100
@@ -461,7 +461,7 @@ static int tpmif_connect(struct xenbus_d
 	tp->backend_id = domid;
 
 	err = bind_listening_port_to_irqhandler(
-		domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp);
+		domid, tpmif_int, IRQF_SAMPLE_RANDOM, "tpmif", tp);
 	if (err <= 0) {
 		WPRINTK("bind_listening_port_to_irqhandler failed "
 			"(err=%d)\n", err);
--- 12.2.orig/drivers/hwmon/Kconfig	2012-06-20 12:12:06.000000000 +0200
+++ 12.2/drivers/hwmon/Kconfig	2012-04-10 16:15:23.000000000 +0200
@@ -435,7 +435,7 @@ config SENSORS_GPIO_FAN
 
 config SENSORS_CORETEMP
 	tristate "Intel Core/Core2/Atom temperature sensor"
-	depends on X86 && PCI && EXPERIMENTAL
+	depends on X86 && PCI && !XEN && EXPERIMENTAL
 	help
 	  If you say yes here you get support for the temperature
 	  sensor inside your CPU. Most of the family 6 CPUs
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ 12.2/drivers/hwmon/coretemp-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -0,0 +1,449 @@
+/*
+ * coretemp.c - Linux kernel module for hardware monitoring
+ *
+ * Copyright (C) 2007 Rudolf Marek <r.marek@assembler.cz>
+ *
+ * Inspired from many hwmon drivers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/hwmon.h>
+#include <linux/sysfs.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <asm/msr.h>
+#include <xen/pcpu.h>
+#include "../xen/core/domctl.h"
+
+#define DRVNAME	"coretemp"
+#define coretemp_data pdev_entry
+
+typedef enum { SHOW_TEMP, SHOW_TJMAX, SHOW_LABEL, SHOW_NAME } SHOW;
+
+/*
+ * Functions declaration
+ */
+
+static struct coretemp_data *coretemp_update_device(struct device *dev);
+
+struct pdev_entry {
+	struct list_head list;
+	struct platform_device *pdev;
+	struct class_device *class_dev;
+	struct mutex update_lock;
+	const char *name;
+	u8 x86_model, x86_mask;
+	u32 ucode_rev;
+	char valid;		/* zero until following fields are valid */
+	unsigned long last_updated;	/* in jiffies */
+	int temp;
+	int tjmax;
+	u8 alarm;
+};
+
+static struct coretemp_data *coretemp_update_device(struct device *dev);
+
+/*
+ * Sysfs stuff
+ */
+
+static ssize_t show_name(struct device *dev, struct device_attribute
+			  *devattr, char *buf)
+{
+	int ret;
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+	struct coretemp_data *data = dev_get_drvdata(dev);
+
+	if (attr->index == SHOW_NAME)
+		ret = sprintf(buf, "%s\n", data->name);
+	else	/* show label */
+		ret = sprintf(buf, "Core %d\n", data->pdev->id);
+	return ret;
+}
+
+static ssize_t show_alarm(struct device *dev, struct device_attribute
+			  *devattr, char *buf)
+{
+	struct coretemp_data *data = coretemp_update_device(dev);
+	/* read the Out-of-spec log, never clear */
+	return sprintf(buf, "%d\n", data->alarm);
+}
+
+static ssize_t show_temp(struct device *dev,
+			 struct device_attribute *devattr, char *buf)
+{
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+	struct coretemp_data *data = coretemp_update_device(dev);
+	int err;
+
+	if (attr->index == SHOW_TEMP)
+		err = data->valid ? sprintf(buf, "%d\n", data->temp) : -EAGAIN;
+	else
+		err = sprintf(buf, "%d\n", data->tjmax);
+	return err;
+}
+
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL,
+			  SHOW_TEMP);
+static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, show_temp, NULL,
+			  SHOW_TJMAX);
+static DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL);
+static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, show_name, NULL, SHOW_LABEL);
+static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, SHOW_NAME);
+
+static struct attribute *coretemp_attributes[] = {
+	&sensor_dev_attr_name.dev_attr.attr,
+	&sensor_dev_attr_temp1_label.dev_attr.attr,
+	&dev_attr_temp1_crit_alarm.attr,
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	&sensor_dev_attr_temp1_crit.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group coretemp_group = {
+	.attrs = coretemp_attributes,
+};
+
+static struct coretemp_data *coretemp_update_device(struct device *dev)
+{
+	struct coretemp_data *data = dev_get_drvdata(dev);
+
+	mutex_lock(&data->update_lock);
+
+	if (!data->valid || time_after(jiffies, data->last_updated + HZ)) {
+		u32 eax, edx;
+
+		data->valid = 0;
+		if (rdmsr_safe_on_pcpu(data->pdev->id, MSR_IA32_THERM_STATUS,
+				       &eax, &edx) < 0)
+			eax = ~0;
+		data->alarm = (eax >> 5) & 1;
+		/* update only if data has been valid */
+		if (eax & 0x80000000) {
+			data->temp = data->tjmax - (((eax >> 16)
+							& 0x7f) * 1000);
+			data->valid = 1;
+		} else {
+			dev_dbg(dev, "Temperature data invalid (0x%x)\n", eax);
+		}
+		data->last_updated = jiffies;
+	}
+
+	mutex_unlock(&data->update_lock);
+	return data;
+}
+
+static int coretemp_probe(struct platform_device *pdev)
+{
+	struct coretemp_data *data = platform_get_drvdata(pdev);
+	int err;
+	u32 eax, edx;
+
+	data->name = "coretemp";
+	mutex_init(&data->update_lock);
+	/* Tjmax default is 100 degrees C */
+	data->tjmax = 100000;
+
+	/* test if we can access the THERM_STATUS MSR */
+	err = rdmsr_safe_on_pcpu(pdev->id, MSR_IA32_THERM_STATUS, &eax, &edx);
+	if (err < 0) {
+		dev_err(&pdev->dev,
+			"Unable to access THERM_STATUS MSR, giving up\n");
+		return err;
+	}
+
+	/* Check if we have problem with errata AE18 of Core processors:
+	   Readings might stop update when processor visited too deep sleep,
+	   fixed for stepping D0 (6EC).
+	*/
+
+	if ((data->x86_model == 0xe) && (data->x86_mask < 0xc)) {
+		/* check for microcode update */
+		if (!(data->ucode_rev + 1))
+			dev_warn(&pdev->dev,
+				 "Cannot read microcode revision of CPU\n");
+		else if (data->ucode_rev < 0x39) {
+			err = -ENODEV;
+			dev_err(&pdev->dev,
+				"Errata AE18 not fixed, update BIOS or "
+				"microcode of the CPU!\n");
+			return err;
+		}
+	}
+
+	/* Some processors have Tjmax 85 following magic should detect it
+	   Intel won't disclose the information without signed NDA, but
+	   individuals cannot sign it. Catch(ed) 22.
+	*/
+
+	if (((data->x86_model == 0xf) && (data->x86_mask > 3)) ||
+		(data->x86_model == 0xe))  {
+		err = rdmsr_safe_on_pcpu(data->pdev->id, 0xee, &eax, &edx);
+		if (err < 0) {
+			dev_warn(&pdev->dev,
+				 "Unable to access MSR 0xEE, Tjmax left at %d "
+				 "degrees C\n", data->tjmax/1000);
+		} else if (eax & 0x40000000) {
+			data->tjmax = 85000;
+		}
+	}
+
+	/* Intel says that above should not work for desktop Core2 processors,
+	   but it seems to work. There is no other way how get the absolute
+	   readings. Warn the user about this. First check if are desktop,
+	   bit 50 of MSR_IA32_PLATFORM_ID should be 0.
+	*/
+
+	rdmsr_safe_on_pcpu(data->pdev->id, MSR_IA32_PLATFORM_ID, &eax, &edx);
+
+	if ((data->x86_model == 0xf) && (!(edx & 0x00040000))) {
+		dev_warn(&pdev->dev, "Using undocumented features, absolute "
+			 "temperature might be wrong!\n");
+	}
+
+	if ((err = sysfs_create_group(&pdev->dev.kobj, &coretemp_group)))
+		return err;
+
+	data->class_dev = hwmon_device_register(&pdev->dev);
+	if (IS_ERR(data->class_dev)) {
+		err = PTR_ERR(data->class_dev);
+		dev_err(&pdev->dev, "Class registration failed (%d)\n",
+			err);
+		goto exit_class;
+	}
+
+	return 0;
+
+exit_class:
+	sysfs_remove_group(&pdev->dev.kobj, &coretemp_group);
+	return err;
+}
+
+static int coretemp_remove(struct platform_device *pdev)
+{
+	struct coretemp_data *data = platform_get_drvdata(pdev);
+
+	hwmon_device_unregister(data->class_dev);
+	sysfs_remove_group(&pdev->dev.kobj, &coretemp_group);
+	return 0;
+}
+
+static struct platform_driver coretemp_driver = {
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = DRVNAME,
+	},
+	.probe = coretemp_probe,
+	.remove = coretemp_remove,
+};
+
+static LIST_HEAD(pdev_list);
+static DEFINE_MUTEX(pdev_list_mutex);
+
+struct cpu_info {
+	struct pdev_entry *pdev_entry;
+	u8 x86;
+	u32 cpuid_6_eax;
+};
+
+static void get_cpuid_info(void *arg)
+{
+	struct cpu_info *info = arg;
+	struct pdev_entry *pdev_entry = info->pdev_entry;
+	u32 val = cpuid_eax(1);
+
+	info->x86 = ((val >> 8) & 0xf) + ((val >> 20) & 0xff);
+	pdev_entry->x86_model = ((val >> 4) & 0xf) | ((val >> 12) & 0xf0);
+	pdev_entry->x86_mask = val & 0xf;
+
+	if (info->x86 != 6 || !pdev_entry->x86_model
+	    || wrmsr_safe(MSR_IA32_UCODE_REV, 0, 0) < 0
+	    || (sync_core(), rdmsr_safe(MSR_IA32_UCODE_REV,
+					&val, &pdev_entry->ucode_rev)) < 0)
+		pdev_entry->ucode_rev = ~0;
+
+	info->cpuid_6_eax = cpuid_eax(0) >= 6 ? cpuid_eax(6) : 0;
+}
+
+static int coretemp_device_add(unsigned int cpu)
+{
+	int err;
+	struct cpu_info info;
+	struct platform_device *pdev;
+	struct pdev_entry *pdev_entry;
+
+	pdev_entry = kzalloc(sizeof(*pdev_entry), GFP_KERNEL);
+	if (!info.pdev_entry)
+		return -ENOMEM;
+
+	info.pdev_entry = pdev_entry;
+	err = xen_set_physical_cpu_affinity(cpu);
+	if (!err) {
+		get_cpuid_info(&info);
+		WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1));
+	} else if (err > 0) {
+		static bool warned;
+
+		if (!warned) {
+			warned = true;
+			printk(KERN_WARNING DRVNAME
+			       "Cannot set physical CPU affinity"
+			       " (assuming use of dom0_vcpus_pin)\n");
+		}
+		err = smp_call_function_single(cpu, get_cpuid_info, &info, 1);
+	}
+	if (err)
+		goto exit_entry_free;
+
+	/* check if family 6, models e, f */
+	if (info.x86 != 0x6 ||
+	    !((pdev_entry->x86_model == 0xe) || (pdev_entry->x86_model == 0xf))) {
+
+		/* supported CPU not found, but report the unknown
+		   family 6 CPU */
+		if ((info.x86 == 0x6) && (pdev_entry->x86_model > 0xf))
+			printk(KERN_WARNING DRVNAME ": Unknown CPU "
+				"model 0x%x", pdev_entry->x86_model);
+		goto exit_entry_free;
+	}
+
+	pdev = platform_device_alloc(DRVNAME, cpu);
+	if (!pdev) {
+		err = -ENOMEM;
+		printk(KERN_ERR DRVNAME ": Device allocation failed\n");
+		goto exit_entry_free;
+	}
+
+	platform_set_drvdata(pdev, pdev_entry);
+	pdev_entry->pdev = pdev;
+
+	err = platform_device_add(pdev);
+	if (err) {
+		printk(KERN_ERR DRVNAME ": Device addition failed (%d)\n",
+		       err);
+		goto exit_device_put;
+	}
+
+	mutex_lock(&pdev_list_mutex);
+	list_add_tail(&pdev_entry->list, &pdev_list);
+	mutex_unlock(&pdev_list_mutex);
+
+	return 0;
+
+exit_device_put:
+	platform_device_put(pdev);
+exit_entry_free:
+	kfree(info.pdev_entry);
+	return err;
+}
+
+static void coretemp_device_remove(unsigned int cpu)
+{
+	struct pdev_entry *p;
+
+	mutex_lock(&pdev_list_mutex);
+	list_for_each_entry(p, &pdev_list, list) {
+		if (p->pdev->id == cpu) {
+			platform_device_unregister(p->pdev);
+			list_del(&p->list);
+			kfree(p);
+		}
+	}
+	mutex_unlock(&pdev_list_mutex);
+}
+
+static int coretemp_cpu_callback(struct notifier_block *nfb,
+				 unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long) hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+		coretemp_device_add(cpu);
+		break;
+	case CPU_DEAD:
+		coretemp_device_remove(cpu);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block coretemp_cpu_notifier = {
+	.notifier_call = coretemp_cpu_callback,
+};
+
+static int __init coretemp_init(void)
+{
+	int err = -ENODEV;
+
+	if (!is_initial_xendomain())
+		goto exit;
+
+	/* quick check if we run Intel */
+	if (cpu_data(0).x86_vendor != X86_VENDOR_INTEL)
+		goto exit;
+
+	err = platform_driver_register(&coretemp_driver);
+	if (err)
+		goto exit;
+
+	err = register_pcpu_notifier(&coretemp_cpu_notifier);
+	if (err)
+		goto exit_driver_unreg;
+
+	if (list_empty(&pdev_list)) {
+		err = -ENODEV;
+		goto exit_notifier_unreg;
+	}
+
+	return 0;
+
+exit_notifier_unreg:
+	unregister_pcpu_notifier(&coretemp_cpu_notifier);
+exit_driver_unreg:
+	platform_driver_unregister(&coretemp_driver);
+exit:
+	return err;
+}
+
+static void __exit coretemp_exit(void)
+{
+	struct pdev_entry *p, *n;
+
+	unregister_pcpu_notifier(&coretemp_cpu_notifier);
+	mutex_lock(&pdev_list_mutex);
+	list_for_each_entry_safe(p, n, &pdev_list, list) {
+		platform_device_unregister(p->pdev);
+		list_del(&p->list);
+		kfree(p);
+	}
+	mutex_unlock(&pdev_list_mutex);
+	platform_driver_unregister(&coretemp_driver);
+}
+
+MODULE_AUTHOR("Rudolf Marek <r.marek@assembler.cz>");
+MODULE_DESCRIPTION("Intel Core temperature monitor");
+MODULE_LICENSE("GPL");
+
+module_init(coretemp_init)
+module_exit(coretemp_exit)
--- 12.2.orig/drivers/pci/msi-xen.c	2011-11-03 12:03:26.000000000 +0100
+++ 12.2/drivers/pci/msi-xen.c	2012-04-04 10:13:55.000000000 +0200
@@ -12,16 +12,15 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
-#include <linux/smp_lock.h>
 #include <linux/pci.h>
 #include <linux/proc_fs.h>
 #include <linux/msi.h>
+#include <linux/smp.h>
 
 #include <xen/evtchn.h>
 
 #include <asm/errno.h>
 #include <asm/io.h>
-#include <asm/smp.h>
 
 #include "pci.h"
 #include "msi.h"
@@ -40,7 +39,6 @@ struct msi_dev_list {
 	struct pci_dev *dev;
 	struct list_head list;
 	spinlock_t pirq_list_lock;
-	struct list_head pirq_list_head;
 	/* Store default pre-assigned irq */
 	unsigned int default_irq;
 };
@@ -108,7 +106,6 @@ static struct msi_dev_list *get_msi_dev_
 
 	ret->dev = dev;
 	spin_lock_init(&ret->pirq_list_lock);
-	INIT_LIST_HEAD(&ret->pirq_list_head);
 	list_add_tail(&ret->list, &msi_dev_head);
 	spin_unlock_irqrestore(&msi_dev_lock, flags);
 	return ret;
@@ -125,7 +122,7 @@ static int attach_pirq_entry(int pirq, i
 	entry->pirq = pirq;
 	entry->entry_nr = entry_nr;
 	spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags);
-	list_add_tail(&entry->list, &msi_dev_entry->pirq_list_head);
+	list_add_tail(&entry->list, &msi_dev_entry->dev->msi_list);
 	spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags);
 	return 0;
 }
@@ -136,7 +133,7 @@ static void detach_pirq_entry(int entry_
 	unsigned long flags;
 	struct msi_pirq_entry *pirq_entry;
 
-	list_for_each_entry(pirq_entry, &msi_dev_entry->pirq_list_head, list) {
+	list_for_each_entry(pirq_entry, &msi_dev_entry->dev->msi_list, list) {
 		if (pirq_entry->entry_nr == entry_nr) {
 			spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags);
 			list_del(&pirq_entry->list);
@@ -161,6 +158,7 @@ int register_msi_get_owner(int (*func)(s
 	get_owner = func;
 	return 0;
 }
+EXPORT_SYMBOL(register_msi_get_owner);
 
 int unregister_msi_get_owner(int (*func)(struct pci_dev *dev))
 {
@@ -169,6 +167,7 @@ int unregister_msi_get_owner(int (*func)
 	get_owner = NULL;
 	return 0;
 }
+EXPORT_SYMBOL(unregister_msi_get_owner);
 
 static int msi_get_dev_owner(struct pci_dev *dev)
 {
@@ -277,11 +276,6 @@ static int msi_map_vector(struct pci_dev
 		map_irq.pirq : evtchn_map_pirq(-1, map_irq.pirq));
 }
 
-static int msi_init(void)
-{
-	return 0;
-}
-
 #ifdef CONFIG_PM
 void pci_restore_msi_state(struct pci_dev *dev)
 {
@@ -384,7 +378,7 @@ static int msix_capability_init(struct p
 	/* MSI-X Table Initialization */
 	for (i = 0; i < nvec; i++) {
 		mapped = 0;
-		list_for_each_entry(pirq_entry, &msi_dev_entry->pirq_list_head, list) {
+		list_for_each_entry(pirq_entry, &dev->msi_list, list) {
 			if (pirq_entry->entry_nr == entries[i].entry) {
 				printk(KERN_WARNING "msix entry %d for dev %02x:%02x:%01x are \
 				       not freed before acquire again.\n", entries[i].entry,
@@ -427,21 +421,32 @@ static int msix_capability_init(struct p
 }
 
 /**
- * pci_msi_supported - check whether MSI may be enabled on device
+ * pci_msi_check_device - check whether MSI may be enabled on a device
  * @dev: pointer to the pci_dev data structure of MSI device function
+ * @nvec: how many MSIs have been requested ?
+ * @type: are we checking for MSI or MSI-X ?
  *
  * Look at global flags, the device itself, and its parent busses
- * to return 0 if MSI are supported for the device.
+ * to determine if MSI/-X are supported for the device. If MSI/-X is
+ * supported return 0, else return an error code.
  **/
-static
-int pci_msi_supported(struct pci_dev * dev)
+static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type)
 {
 	struct pci_bus *bus;
+	int ret;
 
 	/* MSI must be globally enabled and supported by the device */
 	if (!pci_msi_enable || !dev || dev->no_msi)
 		return -EINVAL;
 
+	/*
+	 * You can't ask to have 0 or less MSIs configured.
+	 *  a) it's stupid ..
+	 *  b) the list manipulation code assumes nvec >= 1.
+	 */
+	if (nvec < 1)
+		return -ERANGE;
+
 	/* Any bridge which does NOT route MSI transactions from it's
 	 * secondary bus to it's primary bus must set NO_MSI flag on
 	 * the secondary pci_bus.
@@ -452,6 +457,13 @@ int pci_msi_supported(struct pci_dev * d
 		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
 			return -EINVAL;
 
+	ret = arch_msi_check_device(dev, nvec, type);
+	if (ret)
+		return ret;
+
+	if (!pci_find_capability(dev, type))
+		return -EINVAL;
+
 	return 0;
 }
 
@@ -468,14 +480,11 @@ int pci_msi_supported(struct pci_dev * d
 extern int pci_frontend_enable_msi(struct pci_dev *dev);
 int pci_enable_msi(struct pci_dev* dev)
 {
-	int pos, temp, status;
+	int temp, status;
 	struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
 
-	if (pci_msi_supported(dev) < 0)
-		return -EINVAL;
-
-	status = msi_init();
-	if (status < 0)
+	status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI);
+	if (status)
 		return status;
 
 	if (!is_initial_xendomain()) {
@@ -499,10 +508,6 @@ int pci_enable_msi(struct pci_dev* dev)
 
 	temp = dev->irq;
 
-	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
-	if (!pos)
-		return -EINVAL;
-
 	/* Check whether driver already requested for MSI-X irqs */
 	if (dev->msix_enabled) {
 		printk(KERN_INFO "PCI: %s: Can't enable MSI.  "
@@ -517,6 +522,7 @@ int pci_enable_msi(struct pci_dev* dev)
 
 	return status;
 }
+EXPORT_SYMBOL(pci_enable_msi);
 
 extern void pci_frontend_disable_msi(struct pci_dev* dev);
 void pci_disable_msi(struct pci_dev* dev)
@@ -524,12 +530,7 @@ void pci_disable_msi(struct pci_dev* dev
 	int pirq;
 	struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
 
-	if (!pci_msi_enable)
-		return;
-	if (!dev)
-		return;
-
-	if (!dev->msi_enabled)
+	if (!pci_msi_enable || !dev || !dev->msi_enabled)
 		return;
 
 	if (!is_initial_xendomain()) {
@@ -552,6 +553,7 @@ void pci_disable_msi(struct pci_dev* dev
 	pci_intx(dev, 1);		/* enable intx */
 	dev->msi_enabled = 0;
 }
+EXPORT_SYMBOL(pci_disable_msi);
 
 /**
  * pci_enable_msix - configure device's MSI-X capability structure
@@ -577,7 +579,7 @@ int pci_enable_msix(struct pci_dev* dev,
 	u16 control;
 	struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
 
-	if (!entries || pci_msi_supported(dev) < 0)
+	if (!entries)
  		return -EINVAL;
 
 	if (!is_initial_xendomain()) {
@@ -597,7 +599,7 @@ int pci_enable_msix(struct pci_dev* dev,
 		for (i = 0; i < nvec; i++) {
 			int mapped = 0;
 
-			list_for_each_entry(pirq_entry, &msi_dev_entry->pirq_list_head, list) {
+			list_for_each_entry(pirq_entry, &dev->msi_list, list) {
 				if (pirq_entry->entry_nr == entries[i].entry) {
 					irq = pirq_entry->pirq;
 					BUG_ON(entries[i].vector != evtchn_get_xen_pirq(irq));
@@ -618,14 +620,11 @@ int pci_enable_msix(struct pci_dev* dev,
 #endif
 	}
 
-	status = msi_init();
-	if (status < 0)
+	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX);
+	if (status)
 		return status;
 
 	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
-	if (!pos)
- 		return -EINVAL;
-
 	pci_read_config_word(dev, msi_control_reg(pos), &control);
 	nr_entries = multi_msix_capable(control);
 	if (nvec > nr_entries)
@@ -657,16 +656,12 @@ int pci_enable_msix(struct pci_dev* dev,
 
 	return status;
 }
+EXPORT_SYMBOL(pci_enable_msix);
 
 extern void pci_frontend_disable_msix(struct pci_dev* dev);
 void pci_disable_msix(struct pci_dev* dev)
 {
-	if (!pci_msi_enable)
-		return;
-	if (!dev)
-		return;
-
-	if (!dev->msix_enabled)
+	if (!pci_msi_enable || !dev || !dev->msix_enabled)
 		return;
 
 	if (!is_initial_xendomain())
@@ -685,6 +680,7 @@ void pci_disable_msix(struct pci_dev* de
 	}
 	dev->msix_enabled = 0;
 }
+EXPORT_SYMBOL(pci_disable_msix);
 
 /**
  * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
@@ -707,16 +703,14 @@ void msi_remove_pci_irq_vectors(struct p
 	msi_dev_entry = get_msi_dev_pirq_list(dev);
 
 	spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags);
-	if (!list_empty(&msi_dev_entry->pirq_list_head))
-		list_for_each_entry_safe(pirq_entry, tmp,
-		                         &msi_dev_entry->pirq_list_head, list) {
-			if (is_initial_xendomain())
-				msi_unmap_pirq(dev, pirq_entry->pirq);
-			else
-				evtchn_map_pirq(pirq_entry->pirq, 0);
-			list_del(&pirq_entry->list);
-			kfree(pirq_entry);
-		}
+	list_for_each_entry_safe(pirq_entry, tmp, &dev->msi_list, list) {
+		if (is_initial_xendomain())
+			msi_unmap_pirq(dev, pirq_entry->pirq);
+		else
+			evtchn_map_pirq(pirq_entry->pirq, 0);
+		list_del(&pirq_entry->list);
+		kfree(pirq_entry);
+	}
 	spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags);
 	dev->irq = msi_dev_entry->default_irq;
 }
@@ -726,12 +720,16 @@ void pci_no_msi(void)
 	pci_msi_enable = 0;
 }
 
-EXPORT_SYMBOL(pci_enable_msi);
-EXPORT_SYMBOL(pci_disable_msi);
-EXPORT_SYMBOL(pci_enable_msix);
-EXPORT_SYMBOL(pci_disable_msix);
-#ifdef CONFIG_XEN
-EXPORT_SYMBOL(register_msi_get_owner);
-EXPORT_SYMBOL(unregister_msi_get_owner);
-#endif
+void pci_msi_init_pci_dev(struct pci_dev *dev)
+{
+	INIT_LIST_HEAD(&dev->msi_list);
+}
 
+
+/* Arch hooks */
+
+int __attribute__ ((weak))
+arch_msi_check_device(struct pci_dev* dev, int nvec, int type)
+{
+	return 0;
+}
--- 12.2.orig/drivers/xen/blkfront/blkfront.c	2012-06-12 15:12:36.000000000 +0200
+++ 12.2/drivers/xen/blkfront/blkfront.c	2012-03-12 13:33:33.000000000 +0100
@@ -244,7 +244,7 @@ static int setup_blkring(struct xenbus_d
 	info->ring_ref = err;
 
 	err = bind_listening_port_to_irqhandler(
-		dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
+		dev->otherend_id, blkif_int, IRQF_SAMPLE_RANDOM, "blkif", info);
 	if (err <= 0) {
 		xenbus_dev_fatal(dev, err,
 				 "bind_listening_port_to_irqhandler");
--- 12.2.orig/drivers/xen/core/machine_reboot.c	2011-10-17 10:45:09.000000000 +0200
+++ 12.2/drivers/xen/core/machine_reboot.c	2011-01-31 17:32:29.000000000 +0100
@@ -80,6 +80,8 @@ static void post_suspend(int suspend_can
 #ifdef CONFIG_SMP
 		cpu_initialized_map = cpu_online_map;
 #endif
+		for_each_possible_cpu(i)
+			setup_runstate_area(i);
 	}
 
 	shinfo_mfn = xen_start_info->shared_info >> PAGE_SHIFT;
--- 12.2.orig/drivers/xen/core/smpboot.c	2012-01-20 14:45:36.000000000 +0100
+++ 12.2/drivers/xen/core/smpboot.c	2012-03-22 16:08:04.000000000 +0100
@@ -154,13 +154,12 @@ static void xen_smp_intr_exit(unsigned i
 
 void __cpuinit cpu_bringup(void)
 {
+	cpu_init();
 #ifdef __i386__
-	cpu_set_gdt(current_thread_info()->cpu);
-	secondary_cpu_init();
+	identify_secondary_cpu(cpu_data + smp_processor_id());
 #else
-	cpu_init();
-#endif
 	identify_cpu(cpu_data + smp_processor_id());
+#endif
 	touch_softlockup_watchdog();
 	preempt_disable();
 	lock_ipi_call_lock();
@@ -182,11 +181,6 @@ static void __cpuinit cpu_initialize_con
 	static DEFINE_SPINLOCK(ctxt_lock);
 
 	struct task_struct *idle = idle_task(cpu);
-#ifdef __x86_64__
-	struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
-#else
-	struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
-#endif
 
 	if (cpu_test_and_set(cpu, cpu_initialized_map))
 		return;
@@ -204,11 +198,11 @@ static void __cpuinit cpu_initialize_con
 
 	smp_trap_init(ctxt.trap_ctxt);
 
-
-	ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address);
-	ctxt.gdt_ents      = gdt_descr->size / 8;
+	ctxt.gdt_ents = GDT_SIZE / 8;
 
 #ifdef __i386__
+	ctxt.gdt_frames[0] = virt_to_mfn(get_cpu_gdt_table(cpu));
+
 	ctxt.user_regs.cs = __KERNEL_CS;
 	ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
 
@@ -221,7 +215,11 @@ static void __cpuinit cpu_initialize_con
 	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
 
 	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
+
+	ctxt.user_regs.fs = __KERNEL_PERCPU;
 #else /* __x86_64__ */
+	ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[cpu].address);
+
 	ctxt.user_regs.cs = __KERNEL_CS;
 	ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
 
@@ -251,9 +249,8 @@ void __init smp_prepare_cpus(unsigned in
 	struct vcpu_get_physid cpu_id;
 #ifdef __x86_64__
 	struct desc_ptr *gdt_descr;
-#else
-	struct Xgt_desc_struct *gdt_descr;
 #endif
+	void *gdt_addr;
 
 	apicid = 0;
 	if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0)
@@ -302,14 +299,12 @@ void __init smp_prepare_cpus(unsigned in
 		}
 		gdt_descr->size = GDT_SIZE;
 		memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
+		gdt_addr = (void *)gdt_descr->address;
 #else
-		if (unlikely(!init_gdt(cpu, idle)))
-			continue;
-		gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+		init_gdt(cpu);
+		gdt_addr = get_cpu_gdt_table(cpu);
 #endif
-		make_page_readonly(
-			(void *)gdt_descr->address,
-			XENFEAT_writable_descriptor_tables);
+		make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables);
 
 		apicid = cpu;
 		if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0)
@@ -323,6 +318,8 @@ void __init smp_prepare_cpus(unsigned in
 		cpu_pda(cpu)->pcurrent = idle;
 		cpu_pda(cpu)->cpunumber = cpu;
 		clear_tsk_thread_flag(idle, TIF_FORK);
+#else
+	 	per_cpu(current_task, cpu) = idle;
 #endif
 
 		irq_ctx_init(cpu);
@@ -347,8 +344,12 @@ void __init smp_prepare_cpus(unsigned in
 #endif
 }
 
-void __devinit smp_prepare_boot_cpu(void)
+void __init smp_prepare_boot_cpu(void)
 {
+#ifdef __i386__
+	init_gdt(smp_processor_id());
+	switch_to_new_gdt();
+#endif
 	prefill_possible_map();
 }
 
--- 12.2.orig/drivers/xen/fbfront/xenkbd.c	2011-01-31 17:29:16.000000000 +0100
+++ 12.2/drivers/xen/fbfront/xenkbd.c	2011-10-04 15:30:07.000000000 +0200
@@ -137,11 +137,11 @@ int __devinit xenkbd_probe(struct xenbus
 	kbd->id.bustype = BUS_PCI;
 	kbd->id.vendor = 0x5853;
 	kbd->id.product = 0xffff;
-	kbd->evbit[0] = BIT(EV_KEY);
+	__set_bit(EV_KEY, kbd->evbit);
 	for (i = KEY_ESC; i < KEY_UNKNOWN; i++)
-		set_bit(i, kbd->keybit);
+		__set_bit(i, kbd->keybit);
 	for (i = KEY_OK; i < KEY_MAX; i++)
-		set_bit(i, kbd->keybit);
+		__set_bit(i, kbd->keybit);
 
 	ret = input_register_device(kbd);
 	if (ret) {
@@ -166,11 +166,10 @@ int __devinit xenkbd_probe(struct xenbus
 		input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0);
 		input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0);
 	} else {
-		__set_bit(REL_X, ptr->relbit);
-		__set_bit(REL_Y, ptr->relbit);
+		input_set_capability(ptr, EV_REL, REL_X);
+		input_set_capability(ptr, EV_REL, REL_Y);
 	}
-	__set_bit(REL_WHEEL, ptr->relbit);
-	__set_bit(EV_REL, ptr->evbit);
+	input_set_capability(ptr, EV_REL, REL_WHEEL);
 
 	__set_bit(EV_KEY, ptr->evbit);
 	for (i = BTN_LEFT; i <= BTN_TASK; i++)
--- 12.2.orig/drivers/xen/netback/common.h	2012-06-06 13:47:45.000000000 +0200
+++ 12.2/drivers/xen/netback/common.h	2012-06-06 13:49:42.000000000 +0200
@@ -98,7 +98,6 @@ typedef struct netif_st {
 	struct list_head list;  /* scheduling list */
 	atomic_t         refcnt;
 	struct net_device *dev;
-	struct net_device_stats stats;
 
 	unsigned int carrier;
 
@@ -202,7 +201,6 @@ void netif_schedule_work(netif_t *netif)
 void netif_deschedule_work(netif_t *netif);
 
 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
-struct net_device_stats *netif_be_get_stats(struct net_device *dev);
 irqreturn_t netif_be_int(int irq, void *dev_id);
 
 static inline int netbk_can_queue(struct net_device *dev)
--- 12.2.orig/drivers/xen/netback/interface.c	2011-11-03 12:03:20.000000000 +0100
+++ 12.2/drivers/xen/netback/interface.c	2011-04-11 14:38:22.000000000 +0200
@@ -257,7 +257,6 @@ netif_t *netif_alloc(struct device *pare
 	init_timer(&netif->tx_queue_timeout);
 
 	dev->hard_start_xmit = netif_be_start_xmit;
-	dev->get_stats       = netif_be_get_stats;
 	dev->open            = net_open;
 	dev->stop            = net_close;
 	dev->change_mtu	     = netbk_change_mtu;
--- 12.2.orig/drivers/xen/netback/loopback.c	2011-01-31 17:32:16.000000000 +0100
+++ 12.2/drivers/xen/netback/loopback.c	2011-01-31 17:32:29.000000000 +0100
@@ -62,14 +62,17 @@ MODULE_PARM_DESC(nloopbacks, "Number of 
 
 struct net_private {
 	struct net_device *loopback_dev;
-	struct net_device_stats stats;
 	int loop_idx;
 };
 
+static inline struct net_private *loopback_priv(struct net_device *dev)
+{
+	return netdev_priv(dev);
+}
+
 static int loopback_open(struct net_device *dev)
 {
-	struct net_private *np = netdev_priv(dev);
-	memset(&np->stats, 0, sizeof(np->stats));
+	memset(&dev->stats, 0, sizeof(dev->stats));
 	netif_start_queue(dev);
 	return 0;
 }
@@ -130,10 +133,8 @@ static int skb_remove_foreign_references
 
 static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct net_private *np = netdev_priv(dev);
-
 	if (!skb_remove_foreign_references(skb)) {
-		np->stats.tx_dropped++;
+		dev->stats.tx_dropped++;
 		dev_kfree_skb(skb);
 		return 0;
 	}
@@ -143,19 +144,17 @@ static int loopback_start_xmit(struct sk
 
 	skb_orphan(skb);
 
-	np->stats.tx_bytes += skb->len;
-	np->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+	dev->stats.tx_packets++;
 
 	/* Switch to loopback context. */
-	dev = np->loopback_dev;
-	np  = netdev_priv(dev);
+	dev = loopback_priv(dev)->loopback_dev;
 
-	np->stats.rx_bytes += skb->len;
-	np->stats.rx_packets++;
+	dev->stats.rx_bytes += skb->len;
+	dev->stats.rx_packets++;
 
 	skb->pkt_type = PACKET_HOST; /* overridden by eth_type_trans() */
 	skb->protocol = eth_type_trans(skb, dev);
-	skb->dev      = dev;
 	dev->last_rx  = jiffies;
 
 	/* Flush netfilter context: rx'ed skbuffs not expected to have any. */
@@ -167,17 +166,11 @@ static int loopback_start_xmit(struct sk
 	return 0;
 }
 
-static struct net_device_stats *loopback_get_stats(struct net_device *dev)
-{
-	struct net_private *np = netdev_priv(dev);
-	return &np->stats;
-}
-
 static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
 	strcpy(info->driver, "netloop");
 	snprintf(info->bus_info, ETHTOOL_BUSINFO_LEN, "vif-0-%d",
-		 ((struct net_private *)netdev_priv(dev))->loop_idx);
+		 loopback_priv(dev)->loop_idx);
 }
 
 static struct ethtool_ops network_ethtool_ops =
@@ -204,7 +197,7 @@ static void loopback_set_multicast_list(
 static void loopback_construct(struct net_device *dev, struct net_device *lo,
 			       int loop_idx)
 {
-	struct net_private *np = netdev_priv(dev);
+	struct net_private *np = loopback_priv(dev);
 
 	np->loopback_dev     = lo;
 	np->loop_idx         = loop_idx;
@@ -212,7 +205,6 @@ static void loopback_construct(struct ne
 	dev->open            = loopback_open;
 	dev->stop            = loopback_close;
 	dev->hard_start_xmit = loopback_start_xmit;
-	dev->get_stats       = loopback_get_stats;
 	dev->set_multicast_list = loopback_set_multicast_list;
 	dev->change_mtu	     = NULL; /* allow arbitrary mtu */
 
--- 12.2.orig/drivers/xen/netback/netback.c	2012-06-06 13:47:50.000000000 +0200
+++ 12.2/drivers/xen/netback/netback.c	2012-06-06 13:49:44.000000000 +0200
@@ -210,7 +210,7 @@ static struct sk_buff *netbk_copy_skb(st
 		goto err;
 
 	skb_reserve(nskb, 16 + NET_IP_ALIGN);
-	headlen = nskb->end - nskb->data;
+	headlen = skb_end_pointer(nskb) - nskb->data;
 	if (headlen > skb_headlen(skb))
 		headlen = skb_headlen(skb);
 	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
@@ -256,11 +256,15 @@ static struct sk_buff *netbk_copy_skb(st
 		len -= copy;
 	}
 
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	offset = 0;
+#else
 	offset = nskb->data - skb->data;
+#endif
 
-	nskb->h.raw = skb->h.raw + offset;
-	nskb->nh.raw = skb->nh.raw + offset;
-	nskb->mac.raw = skb->mac.raw + offset;
+	nskb->transport_header = skb->transport_header + offset;
+	nskb->network_header   = skb->network_header   + offset;
+	nskb->mac_header       = skb->mac_header       + offset;
 
 	return nskb;
 
@@ -350,7 +354,7 @@ int netif_be_start_xmit(struct sk_buff *
 	return 0;
 
  drop:
-	netif->stats.tx_dropped++;
+	dev->stats.tx_dropped++;
 	dev_kfree_skb(skb);
 	return 0;
 }
@@ -694,8 +698,8 @@ static void net_rx_action(unsigned long 
 			netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
 		}
 
-		netif->stats.tx_bytes += skb->len;
-		netif->stats.tx_packets++;
+		skb->dev->stats.tx_bytes += skb->len;
+		skb->dev->stats.tx_packets++;
 
 		id = meta[npo.meta_cons].id;
 		flags = nr_frags ? NETRXF_more_data : 0;
@@ -780,12 +784,6 @@ static void netbk_tx_pending_timeout(uns
 	tasklet_schedule(&net_tx_tasklet);
 }
 
-struct net_device_stats *netif_be_get_stats(struct net_device *dev)
-{
-	netif_t *netif = netdev_priv(dev);
-	return &netif->stats;
-}
-
 static int __on_net_schedule_list(netif_t *netif)
 {
 	return netif->list.next != NULL;
@@ -1422,10 +1420,12 @@ static void net_tx_action(unsigned long 
 
 	mop = tx_map_ops;
 	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
+		struct net_device *dev;
 		netif_tx_request_t *txp;
 
 		pending_idx = *((u16 *)skb->data);
 		netif       = pending_tx_info[pending_idx].netif;
+		dev         = netif->dev;
 		txp         = &pending_tx_info[pending_idx].req;
 
 		/* Check the remap error code. */
@@ -1433,6 +1433,7 @@ static void net_tx_action(unsigned long 
 			DPRINTK("netback grant failed.\n");
 			skb_shinfo(skb)->nr_frags = 0;
 			kfree_skb(skb);
+			dev->stats.rx_dropped++;
 			continue;
 		}
 
@@ -1468,8 +1469,7 @@ static void net_tx_action(unsigned long 
 			__pskb_pull_tail(skb, target - skb_headlen(skb));
 		}
 
-		skb->dev      = netif->dev;
-		skb->protocol = eth_type_trans(skb, skb->dev);
+		skb->protocol = eth_type_trans(skb, dev);
 
 		if (skb_checksum_setup(skb, &netif->rx_gso_csum_fixups)) {
 			DPRINTK("Can't setup checksum in net_tx_action\n");
@@ -1477,18 +1477,19 @@ static void net_tx_action(unsigned long 
 			continue;
 		}
 
-		netif->stats.rx_bytes += skb->len;
-		netif->stats.rx_packets++;
-
 		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
 		    unlikely(skb_linearize(skb))) {
 			DPRINTK("Can't linearize skb in net_tx_action.\n");
 			kfree_skb(skb);
+			dev->stats.rx_errors++;
 			continue;
 		}
 
+		dev->stats.rx_bytes += skb->len;
+		dev->stats.rx_packets++;
+
 		netif_rx(skb);
-		netif->dev->last_rx = jiffies;
+		dev->last_rx = jiffies;
 	}
 
  out:
@@ -1685,7 +1686,7 @@ static int __init netback_init(void)
 	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
 				      0,
 				      netif_be_dbg,
-				      SA_SHIRQ, 
+				      IRQF_SHARED,
 				      "net-be-dbg",
 				      &netif_be_dbg);
 #endif
--- 12.2.orig/drivers/xen/netback/xenbus.c	2012-01-06 10:19:49.000000000 +0100
+++ 12.2/drivers/xen/netback/xenbus.c	2012-01-03 11:55:56.000000000 +0100
@@ -19,6 +19,7 @@
 
 #include <stdarg.h>
 #include <linux/module.h>
+#include <linux/rwsem.h>
 #include <xen/xenbus.h>
 #include "common.h"
 
@@ -28,12 +29,13 @@
     printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
 #endif
 
+static DECLARE_RWSEM(teardown_sem);
 
 static int connect_rings(struct backend_info *);
 static void connect(struct backend_info *);
 static void backend_create_netif(struct backend_info *be);
 static void unregister_hotplug_status_watch(struct backend_info *be);
-static void netback_disconnect(struct device *);
+static void netback_disconnect(struct device *, bool);
 
 static int netback_remove(struct xenbus_device *dev)
 {
@@ -41,23 +43,29 @@ static int netback_remove(struct xenbus_
 
 	netback_remove_accelerators(be, dev);
 
-	netback_disconnect(&dev->dev);
+	netback_disconnect(&dev->dev, true);
 	kfree(be);
-	dev->dev.driver_data = NULL;
 	return 0;
 }
 
-static void netback_disconnect(struct device *xbdev_dev)
+static void netback_disconnect(struct device *xbdev_dev, bool clear)
 {
 	struct backend_info *be = xbdev_dev->driver_data;
 
 	unregister_hotplug_status_watch(be);
-	if (be->netif) {
+	if (be->netif)
 		kobject_uevent(&xbdev_dev->kobj, KOBJ_OFFLINE);
-		xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status");
+
+	xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status");
+
+	down_write(&teardown_sem);
+	if (be->netif) {
 		netif_disconnect(be);
 		be->netif = NULL;
 	}
+	if (clear)
+		xbdev_dev->driver_data = NULL;
+	up_write(&teardown_sem);
 }
 
 /**
@@ -162,8 +170,7 @@ fail:
 static int netback_uevent(struct xenbus_device *xdev, char **envp,
 			  int num_envp, char *buffer, int buffer_size)
 {
-	struct backend_info *be = xdev->dev.driver_data;
-	netif_t *netif = be->netif;
+	struct backend_info *be;
 	int i = 0, length = 0;
 	char *val;
 
@@ -181,8 +188,12 @@ static int netback_uevent(struct xenbus_
 		kfree(val);
 	}
 
-	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
-		       "vif=%s", netif->dev->name);
+	down_read(&teardown_sem);
+	be = xdev->dev.driver_data;
+	if (be && be->netif)
+		add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
+			       &length, "vif=%s", be->netif->dev->name);
+	up_read(&teardown_sem);
 
 	envp[i] = NULL;
 
@@ -195,6 +206,7 @@ static void backend_create_netif(struct 
 	int err;
 	long handle;
 	struct xenbus_device *dev = be->dev;
+	netif_t *netif;
 
 	if (be->netif != NULL)
 		return;
@@ -205,13 +217,13 @@ static void backend_create_netif(struct 
 		return;
 	}
 
-	be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
-	if (IS_ERR(be->netif)) {
-		err = PTR_ERR(be->netif);
-		be->netif = NULL;
+	netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
+	if (IS_ERR(netif)) {
+		err = PTR_ERR(netif);
 		xenbus_dev_fatal(dev, err, "creating interface");
 		return;
 	}
+	be->netif = netif;
 
 	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
 }
@@ -252,7 +264,7 @@ static void frontend_changed(struct xenb
 		break;
 
 	case XenbusStateClosing:
-		netback_disconnect(&dev->dev);
+		netback_disconnect(&dev->dev, false);
 		xenbus_switch_state(dev, XenbusStateClosing);
 		break;
 
--- 12.2.orig/drivers/xen/netfront/accel.c	2009-05-04 10:01:03.000000000 +0200
+++ 12.2/drivers/xen/netfront/accel.c	2011-01-31 17:32:29.000000000 +0100
@@ -548,7 +548,7 @@ static void accelerator_remove_hooks(str
 
 			/* Last chance to get statistics from the accelerator */
 			vif_state->hooks->get_stats(vif_state->np->netdev,
-						    &vif_state->np->stats);
+						    &vif_state->np->netdev->stats);
 
 			spin_unlock_irqrestore(&accelerator->vif_states_lock,
 					       flags);
@@ -604,7 +604,8 @@ static int do_remove(struct netfront_inf
 		spin_lock_irqsave(&accelerator->vif_states_lock, flags);
 
 		/* Last chance to get statistics from the accelerator */
-		np->accel_vif_state.hooks->get_stats(np->netdev, &np->stats);
+		np->accel_vif_state.hooks->get_stats(np->netdev,
+						     &np->netdev->stats);
 
 		spin_unlock_irqrestore(&accelerator->vif_states_lock, 
 				       flags);
@@ -819,7 +820,7 @@ int netfront_accelerator_call_get_stats(
 		if (np->accel_vif_state.hooks && 
 		    np->accelerator == accelerator)
  			rc = np->accel_vif_state.hooks->get_stats(dev,
-								  &np->stats);
+								  &dev->stats);
 		spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
 	}
 	return rc;
--- 12.2.orig/drivers/xen/netfront/netfront.c	2012-06-06 13:49:14.000000000 +0200
+++ 12.2/drivers/xen/netfront/netfront.c	2012-06-06 13:49:50.000000000 +0200
@@ -513,7 +513,7 @@ static int setup_device(struct xenbus_de
 	memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
 
 	err = bind_listening_port_to_irqhandler(
-		dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name,
+		dev->otherend_id, netif_int, IRQF_SAMPLE_RANDOM, netdev->name,
 		netdev);
 	if (err < 0)
 		goto fail;
@@ -628,8 +628,6 @@ static int network_open(struct net_devic
 {
 	struct netfront_info *np = netdev_priv(dev);
 
-	memset(&np->stats, 0, sizeof(np->stats));
-
 	spin_lock_bh(&np->rx_lock);
 	if (netfront_carrier_ok(np)) {
 		network_alloc_rx_buffers(dev);
@@ -1030,8 +1028,8 @@ static int network_start_xmit(struct sk_
 	if (notify)
 		notify_remote_via_irq(np->irq);
 
-	np->stats.tx_bytes += skb->len;
-	np->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+	dev->stats.tx_packets++;
 	dev->trans_start = jiffies;
 
 	/* Note: It is not safe to access skb after network_tx_buf_gc()! */
@@ -1045,7 +1043,7 @@ static int network_start_xmit(struct sk_
 	return 0;
 
  drop:
-	np->stats.tx_dropped++;
+	dev->stats.tx_dropped++;
 	dev_kfree_skb(skb);
 	return 0;
 }
@@ -1363,7 +1361,7 @@ static int netif_poll(struct net_device 
 err:	
 			while ((skb = __skb_dequeue(&tmpq)))
 				__skb_queue_tail(&errq, skb);
-			np->stats.rx_errors++;
+			dev->stats.rx_errors++;
 			i = np->rx.rsp_cons;
 			continue;
 		}
@@ -1431,8 +1429,8 @@ err:	
 		else
 			skb->ip_summed = CHECKSUM_NONE;
 
-		np->stats.rx_packets++;
-		np->stats.rx_bytes += skb->len;
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += skb->len;
 
 		__skb_queue_tail(&rxq, skb);
 
@@ -1683,7 +1681,7 @@ static struct net_device_stats *network_
 	struct netfront_info *np = netdev_priv(dev);
 
 	netfront_accelerator_call_get_stats(np, dev);
-	return &np->stats;
+	return &dev->stats;
 }
 
 static int xennet_set_mac_address(struct net_device *dev, void *p)
--- 12.2.orig/drivers/xen/netfront/netfront.h	2011-02-09 15:35:17.000000000 +0100
+++ 12.2/drivers/xen/netfront/netfront.h	2012-06-06 13:49:51.000000000 +0200
@@ -150,9 +150,6 @@ struct netfront_info {
 	struct list_head list;
 	struct net_device *netdev;
 
-	struct net_device_stats stats;
-	unsigned long rx_gso_csum_fixups;
-
 	struct netif_tx_front_ring tx;
 	struct netif_rx_front_ring rx;
 
@@ -194,6 +191,9 @@ struct netfront_info {
 	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
 	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
 
+	/* Statistics */
+	unsigned long rx_gso_csum_fixups;
+
 	/* Private pointer to state internal to accelerator module */
 	void *accel_priv;
 	/* The accelerator used by this netfront device */
--- 12.2.orig/drivers/xen/pcifront/xenbus.c	2012-03-12 13:31:59.000000000 +0100
+++ 12.2/drivers/xen/pcifront/xenbus.c	2012-03-12 13:33:18.000000000 +0100
@@ -11,10 +11,6 @@
 #include <xen/gnttab.h>
 #include "pcifront.h"
 
-#ifndef __init_refok
-#define __init_refok
-#endif
-
 #define INVALID_EVTCHN    (-1)
 
 static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
@@ -100,7 +96,7 @@ static int pcifront_publish_info(struct 
 
 	err = bind_caller_port_to_irqhandler(pdev->evtchn,
 					     pcifront_handler_aer,
-					     SA_SAMPLE_RANDOM,
+					     IRQF_SAMPLE_RANDOM,
 					     "pcifront", pdev);
 	if (err < 0) {
 		xenbus_dev_fatal(pdev->xdev, err,
--- 12.2.orig/drivers/xen/scsiback/emulate.c	2012-01-06 10:19:49.000000000 +0100
+++ 12.2/drivers/xen/scsiback/emulate.c	2012-01-09 11:04:26.000000000 +0100
@@ -153,8 +153,9 @@ static int __copy_to_sg(struct scatterli
 	return -ENOMEM;
 }
 
-static int __copy_from_sg(struct scatterlist *sg, unsigned int nr_sg,
-		 void *buf, unsigned int buflen)
+static int __maybe_unused __copy_from_sg(struct scatterlist *sg,
+					 unsigned int nr_sg, void *buf,
+					 unsigned int buflen)
 {
 	void *from;
 	void *to = buf;
--- 12.2.orig/drivers/xen/scsifront/xenbus.c	2011-12-21 10:02:58.000000000 +0100
+++ 12.2/drivers/xen/scsifront/xenbus.c	2011-02-08 10:03:46.000000000 +0100
@@ -100,7 +100,7 @@ static int scsifront_alloc_ring(struct v
 
 	err = bind_listening_port_to_irqhandler(
 			dev->otherend_id, scsifront_intr,
-			SA_SAMPLE_RANDOM, "scsifront", info);
+			IRQF_SAMPLE_RANDOM, "scsifront", info);
 
 	if (err <= 0) {
 		xenbus_dev_fatal(dev, err, "bind_listening_port_to_irqhandler");
--- 12.2.orig/drivers/xen/sfc_netback/accel_fwd.c	2008-04-02 12:34:02.000000000 +0200
+++ 12.2/drivers/xen/sfc_netback/accel_fwd.c	2011-01-31 17:32:29.000000000 +0100
@@ -308,7 +308,7 @@ static struct netback_accel *for_a_vnic(
 static inline int packet_is_arp_reply(struct sk_buff *skb)
 {
 	return skb->protocol == ntohs(ETH_P_ARP) 
-		&& skb->nh.arph->ar_op == ntohs(ARPOP_REPLY);
+		&& arp_hdr(skb)->ar_op == ntohs(ARPOP_REPLY);
 }
 
 
@@ -392,12 +392,13 @@ void netback_accel_tx_packet(struct sk_b
 
 	BUG_ON(fwd_priv == NULL);
 
-	if (is_broadcast_ether_addr(skb->mac.raw) && packet_is_arp_reply(skb)) {
+	if (is_broadcast_ether_addr(skb_mac_header(skb))
+	    && packet_is_arp_reply(skb)) {
 		/*
 		 * update our fast path forwarding to reflect this
 		 * gratuitous ARP
 		 */ 
-		mac = skb->mac.raw+ETH_ALEN;
+		mac = skb_mac_header(skb)+ETH_ALEN;
 
 		DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n",
 			__FUNCTION__, MAC_ARG(mac));
--- 12.2.orig/drivers/xen/sfc_netback/accel_solarflare.c	2010-01-18 15:23:12.000000000 +0100
+++ 12.2/drivers/xen/sfc_netback/accel_solarflare.c	2011-01-31 17:32:29.000000000 +0100
@@ -113,7 +113,7 @@ bend_dl_tx_packet(struct efx_dl_device *
 	BUG_ON(port == NULL);
 
 	NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_packets++);
-	if (skb->mac.raw != NULL)
+	if (skb_mac_header_was_set(skb))
 		netback_accel_tx_packet(skb, port->fwd_priv);
 	else {
 		DPRINTK("Ignoring packet with missing mac address\n");
--- 12.2.orig/drivers/xen/sfc_netfront/accel_tso.c	2011-01-31 17:29:16.000000000 +0100
+++ 12.2/drivers/xen/sfc_netfront/accel_tso.c	2011-01-31 17:32:29.000000000 +0100
@@ -33,10 +33,9 @@
 
 #include "accel_tso.h"
 
-#define PTR_DIFF(p1, p2)  ((u8*)(p1) - (u8*)(p2))
-#define ETH_HDR_LEN(skb)  ((skb)->nh.raw - (skb)->data)
-#define SKB_TCP_OFF(skb)  PTR_DIFF ((skb)->h.th, (skb)->data)
-#define SKB_IP_OFF(skb)   PTR_DIFF ((skb)->nh.iph, (skb)->data)
+#define ETH_HDR_LEN(skb)  skb_network_offset(skb)
+#define SKB_TCP_OFF(skb)  skb_transport_offset(skb)
+#define SKB_IP_OFF(skb)   skb_network_offset(skb)
 
 /*
  * Set a maximum number of buffers in each output packet to make life
@@ -114,9 +113,8 @@ struct netfront_accel_tso_state {
 static inline void tso_check_safe(struct sk_buff *skb) {
 	EPRINTK_ON(skb->protocol != htons (ETH_P_IP));
 	EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP));
-	EPRINTK_ON(skb->nh.iph->protocol != IPPROTO_TCP);
-	EPRINTK_ON((SKB_TCP_OFF(skb)
-		    + (skb->h.th->doff << 2u)) > skb_headlen(skb));
+	EPRINTK_ON(ip_hdr(skb)->protocol != IPPROTO_TCP);
+	EPRINTK_ON((SKB_TCP_OFF(skb) + tcp_hdrlen(skb)) > skb_headlen(skb));
 }
 
 
@@ -129,17 +127,17 @@ static inline void tso_start(struct netf
 	 * All ethernet/IP/TCP headers combined size is TCP header size
 	 * plus offset of TCP header relative to start of packet.
  	 */
-	st->p.header_length = (skb->h.th->doff << 2u) + SKB_TCP_OFF(skb);
+	st->p.header_length = tcp_hdrlen(skb) + SKB_TCP_OFF(skb);
 	st->p.full_packet_size = (st->p.header_length
 				  + skb_shinfo(skb)->gso_size);
 	st->p.gso_size = skb_shinfo(skb)->gso_size;
 
-	st->p.ip_id = htons(skb->nh.iph->id);
-	st->seqnum = ntohl(skb->h.th->seq);
+	st->p.ip_id = htons(ip_hdr(skb)->id);
+	st->seqnum = ntohl(tcp_hdr(skb)->seq);
 
-	EPRINTK_ON(skb->h.th->urg);
-	EPRINTK_ON(skb->h.th->syn);
-	EPRINTK_ON(skb->h.th->rst);
+	EPRINTK_ON(tcp_hdr(skb)->urg);
+	EPRINTK_ON(tcp_hdr(skb)->syn);
+	EPRINTK_ON(tcp_hdr(skb)->rst);
 
 	st->remaining_len = skb->len - st->p.header_length;
 
@@ -258,8 +256,8 @@ int tso_start_new_packet(netfront_accel_
 		/* This packet will be the last in the TSO burst. */
 		ip_length = (st->p.header_length - ETH_HDR_LEN(skb)
 			     + st->remaining_len);
-		tsoh_th->fin = skb->h.th->fin;
-		tsoh_th->psh = skb->h.th->psh;
+		tsoh_th->fin = tcp_hdr(skb)->fin;
+		tsoh_th->psh = tcp_hdr(skb)->psh;
 	}
 
 	tsoh_iph->tot_len = htons(ip_length);
--- 12.2.orig/drivers/xen/sfc_netfront/accel_vi.c	2011-06-30 16:03:44.000000000 +0200
+++ 12.2/drivers/xen/sfc_netfront/accel_vi.c	2011-06-30 16:06:37.000000000 +0200
@@ -465,7 +465,7 @@ netfront_accel_enqueue_skb_multi(netfron
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		/* Set to zero to encourage falcon to work it out for us */
-		*(u16*)(skb->h.raw + skb->csum_offset) = 0;
+		*(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0;
 	}
 
 	if (multi_post_start_new_buffer(vnic, &state)) {
@@ -586,7 +586,7 @@ netfront_accel_enqueue_skb_single(netfro
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		/* Set to zero to encourage falcon to work it out for us */
-		*(u16*)(skb->h.raw + skb->csum_offset) = 0;
+		*(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0;
 	}
 	NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
 		(skb, idx, frag_data, frag_len, {
@@ -793,7 +793,6 @@ static void  netfront_accel_vi_rx_comple
 	}
 
 	net_dev = vnic->net_dev;
-	skb->dev = net_dev;
 	skb->protocol = eth_type_trans(skb, net_dev);
 	/* CHECKSUM_UNNECESSARY as hardware has done it already */
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
--- 12.2.orig/drivers/xen/sfc_netfront/accel_xenbus.c	2008-02-20 09:32:49.000000000 +0100
+++ 12.2/drivers/xen/sfc_netfront/accel_xenbus.c	2011-01-31 17:32:29.000000000 +0100
@@ -356,7 +356,7 @@ static int vnic_setup_domU_shared_state(
 	/* Create xenbus msg event channel */
 	err = bind_listening_port_to_irqhandler
 		(dev->otherend_id, netfront_accel_msg_channel_irq_from_bend,
-		 SA_SAMPLE_RANDOM, "vnicctrl", vnic);
+		 IRQF_SAMPLE_RANDOM, "vnicctrl", vnic);
 	if (err < 0) {
 		EPRINTK("Couldn't bind msg event channel\n");
 		goto fail_msg_irq;
@@ -367,7 +367,7 @@ static int vnic_setup_domU_shared_state(
 	/* Create xenbus net event channel */
 	err = bind_listening_port_to_irqhandler
 		(dev->otherend_id, netfront_accel_net_channel_irq_from_bend,
-		 SA_SAMPLE_RANDOM, "vnicfront", vnic);
+		 IRQF_SAMPLE_RANDOM, "vnicfront", vnic);
 	if (err < 0) {
 		EPRINTK("Couldn't bind net event channel\n");
 		goto fail_net_irq;
--- 12.2.orig/drivers/xen/usbfront/xenbus.c	2012-03-12 12:51:05.000000000 +0100
+++ 12.2/drivers/xen/usbfront/xenbus.c	2012-03-12 13:33:24.000000000 +0100
@@ -109,7 +109,7 @@ static int setup_rings(struct xenbus_dev
 	info->conn_ring_ref = err;
 
 	err = bind_listening_port_to_irqhandler(
-		dev->otherend_id, xenhcd_int, SA_SAMPLE_RANDOM, "usbif", info);
+		dev->otherend_id, xenhcd_int, IRQF_SAMPLE_RANDOM, "usbif", info);
 	if (err <= 0) {
 		xenbus_dev_fatal(dev, err,
 				 "bind_listening_port_to_irqhandler");
--- 12.2.orig/drivers/xen/xenbus/xenbus_xs.c	2012-01-20 14:18:49.000000000 +0100
+++ 12.2/drivers/xen/xenbus/xenbus_xs.c	2011-06-30 16:06:45.000000000 +0200
@@ -538,18 +538,15 @@ int xenbus_printf(struct xenbus_transact
 {
 	va_list ap;
 	int ret;
-#define PRINTF_BUFFER_SIZE 4096
 	char *printf_buffer;
 
-	printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_NOIO | __GFP_HIGH);
-	if (printf_buffer == NULL)
-		return -ENOMEM;
-
 	va_start(ap, fmt);
-	ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
+	printf_buffer = kvasprintf(GFP_NOIO | __GFP_HIGH, fmt, ap);
 	va_end(ap);
 
-	BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
+	if (!printf_buffer)
+		return -ENOMEM;
+
 	ret = xenbus_write(t, dir, node, printf_buffer);
 
 	kfree(printf_buffer);
--- 12.2.orig/fs/aio.c	2012-05-08 10:48:08.000000000 +0200
+++ 12.2/fs/aio.c	2012-05-08 10:48:49.000000000 +0200
@@ -41,7 +41,7 @@
 
 #ifdef CONFIG_EPOLL
 #include <linux/poll.h>
-#include <linux/eventpoll.h>
+#include <linux/anon_inodes.h>
 #endif
 
 #if DEBUG > 1
@@ -1346,7 +1346,7 @@ static const struct file_operations aioq
 
 /* make_aio_fd:
  *  Create a file descriptor that can be used to poll the event queue.
- *  Based and piggybacked on the excellent epoll code.
+ *  Based on the excellent epoll code.
  */
 
 static int make_aio_fd(struct kioctx *ioctx)
@@ -1355,7 +1355,8 @@ static int make_aio_fd(struct kioctx *io
 	struct inode *inode;
 	struct file *file;
 
-	error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
+	error = anon_inode_getfd(&fd, &inode, &file, "[aioq]",
+				 &aioq_fops, ioctx);
 	if (error)
 		return error;
 
--- 12.2.orig/arch/x86/include/asm/boot.h	2012-06-20 12:12:06.000000000 +0200
+++ 12.2/arch/x86/include/asm/boot.h	2011-01-31 17:32:29.000000000 +0100
@@ -16,7 +16,7 @@
 				& ~(CONFIG_PHYSICAL_ALIGN - 1))
 
 /* Minimum kernel alignment, as a power of two */
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
 #define MIN_KERNEL_ALIGN_LG2	PMD_SHIFT
 #else
 #define MIN_KERNEL_ALIGN_LG2	(PAGE_SHIFT + THREAD_ORDER)
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ 12.2/arch/x86/include/mach-xen/asm/cmpxchg_32.h	2011-12-23 11:18:49.000000000 +0100
@@ -0,0 +1,26 @@
+#ifndef _ASM_X86_XEN_CMPXCHG_32_H
+#define _ASM_X86_XEN_CMPXCHG_32_H
+
+#include_next <asm/cmpxchg_32.h>
+
+static inline u64 get_64bit(const volatile u64 *ptr)
+{
+	u64 res;
+	__asm__("movl %%ebx,%%eax\n"
+		"movl %%ecx,%%edx\n"
+		LOCK_PREFIX "cmpxchg8b %1"
+		: "=&A" (res) : "m" (*ptr));
+	return res;
+}
+
+static inline u64 get_64bit_local(const volatile u64 *ptr)
+{
+	u64 res;
+	__asm__("movl %%ebx,%%eax\n"
+		"movl %%ecx,%%edx\n"
+		"cmpxchg8b %1"
+		: "=&A" (res) : "m" (*ptr));
+	return res;
+}
+
+#endif /* _ASM_X86_XEN_CMPXCHG_32_H */
--- 12.2.orig/arch/x86/include/mach-xen/asm/desc_32.h	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/desc_32.h	2011-01-31 17:32:29.000000000 +0100
@@ -11,23 +11,24 @@
 
 #include <asm/mmu.h>
 
-extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
-
 struct Xgt_desc_struct {
 	unsigned short size;
 	unsigned long address __attribute__((packed));
 	unsigned short pad;
 } __attribute__ ((packed));
 
-extern struct Xgt_desc_struct idt_descr;
-DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
-extern struct Xgt_desc_struct early_gdt_descr;
+struct gdt_page
+{
+	struct desc_struct gdt[GDT_ENTRIES];
+} __attribute__((aligned(PAGE_SIZE)));
+DECLARE_PER_CPU(struct gdt_page, gdt_page);
 
 static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
 {
-	return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
+	return per_cpu(gdt_page, cpu).gdt;
 }
 
+extern struct Xgt_desc_struct idt_descr;
 extern struct desc_struct idt_table[];
 extern void set_intr_gate(unsigned int irq, void * addr);
 
@@ -55,53 +56,32 @@ static inline void pack_gate(__u32 *a, _
 #define DESCTYPE_S	0x10	/* !system */
 
 #ifndef CONFIG_XEN
-#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
-
-#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
-#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
+#define load_TR_desc() native_load_tr_desc()
+#define load_gdt(dtr) native_load_gdt(dtr)
+#define load_idt(dtr) native_load_idt(dtr)
 #define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
 #define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
 
-#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
-#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
-#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
+#define store_gdt(dtr) native_store_gdt(dtr)
+#define store_idt(dtr) native_store_idt(dtr)
+#define store_tr(tr) (tr = native_store_tr())
 #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
-#endif
 
-#if TLS_SIZE != 24
-# error update this code.
-#endif
-
-static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
-{
-#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
-					      *(u64 *)&t->tls_array[i]) \
-		BUG()
-	C(0); C(1); C(2);
-#undef C
-}
+#define load_TLS(t, cpu) native_load_tls(t, cpu)
+#define set_ldt native_set_ldt
 
-#ifndef CONFIG_XEN
 #define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
 #define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
 #define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
 
-static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
+static inline void write_dt_entry(struct desc_struct *dt,
+				  int entry, u32 entry_low, u32 entry_high)
 {
-	__u32 *lp = (__u32 *)((char *)dt + entry*8);
-	*lp = entry_a;
-	*(lp+1) = entry_b;
+	dt[entry].a = entry_low;
+	dt[entry].b = entry_high;
 }
-#define set_ldt native_set_ldt
-#else
-extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
-extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
-#define set_ldt xen_set_ldt
-#endif
 
-#ifndef CONFIG_XEN
-static inline fastcall void native_set_ldt(const void *addr,
-					   unsigned int entries)
+static inline void native_set_ldt(const void *addr, unsigned int entries)
 {
 	if (likely(entries == 0))
 		__asm__ __volatile__("lldt %w0"::"q" (0));
@@ -116,6 +96,65 @@ static inline fastcall void native_set_l
 		__asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
 	}
 }
+
+
+static inline void native_load_tr_desc(void)
+{
+	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+}
+
+static inline void native_load_gdt(const struct Xgt_desc_struct *dtr)
+{
+	asm volatile("lgdt %0"::"m" (*dtr));
+}
+
+static inline void native_load_idt(const struct Xgt_desc_struct *dtr)
+{
+	asm volatile("lidt %0"::"m" (*dtr));
+}
+
+static inline void native_store_gdt(struct Xgt_desc_struct *dtr)
+{
+	asm ("sgdt %0":"=m" (*dtr));
+}
+
+static inline void native_store_idt(struct Xgt_desc_struct *dtr)
+{
+	asm ("sidt %0":"=m" (*dtr));
+}
+
+static inline unsigned long native_store_tr(void)
+{
+	unsigned long tr;
+	asm ("str %0":"=r" (tr));
+	return tr;
+}
+
+static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
+{
+	unsigned int i;
+	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+
+	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+		gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
+}
+#else
+#define load_TLS(t, cpu) xen_load_tls(t, cpu)
+#define set_ldt xen_set_ldt
+
+extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
+extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
+
+static inline void xen_load_tls(struct thread_struct *t, unsigned int cpu)
+{
+	unsigned int i;
+	struct desc_struct *gdt = get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN;
+
+	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+		if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]),
+						 *(u64 *)&t->tls_array[i]))
+			BUG();
+}
 #endif
 
 #ifndef CONFIG_X86_NO_IDT
--- 12.2.orig/arch/x86/include/mach-xen/asm/fixmap_32.h	2011-01-31 17:32:16.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/fixmap_32.h	2011-01-31 17:32:29.000000000 +0100
@@ -19,10 +19,8 @@
  * the start of the fixmap.
  */
 extern unsigned long __FIXADDR_TOP;
-#ifdef CONFIG_COMPAT_VDSO
-#define FIXADDR_USER_START	__fix_to_virt(FIX_VDSO)
-#define FIXADDR_USER_END	__fix_to_virt(FIX_VDSO - 1)
-#endif
+#define FIXADDR_USER_START     __fix_to_virt(FIX_VDSO)
+#define FIXADDR_USER_END       __fix_to_virt(FIX_VDSO - 1)
 
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
@@ -85,6 +83,9 @@ enum fixed_addresses {
 #ifdef CONFIG_PCI_MMCONFIG
 	FIX_PCIE_MCFG,
 #endif
+#ifdef CONFIG_PARAVIRT
+	FIX_PARAVIRT_BOOTMAP,
+#endif
 	FIX_SHARED_INFO,
 #define NR_FIX_ISAMAPS	256
 	FIX_ISAMAP_END,
--- 12.2.orig/arch/x86/include/mach-xen/asm/highmem.h	2011-01-31 17:32:16.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/highmem.h	2011-01-31 17:32:29.000000000 +0100
@@ -67,12 +67,17 @@ extern void FASTCALL(kunmap_high(struct 
 
 void *kmap(struct page *page);
 void kunmap(struct page *page);
+void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
 void *kmap_atomic(struct page *page, enum km_type type);
-void *kmap_atomic_pte(struct page *page, enum km_type type);
 void kunmap_atomic(void *kvaddr, enum km_type type);
 void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
 struct page *kmap_atomic_to_page(void *ptr);
 
+#define kmap_atomic_pte(page, type) \
+	kmap_atomic_prot(page, type, \
+	                 test_bit(PG_pinned, &(page)->flags) \
+	                 ? PAGE_KERNEL_RO : kmap_prot)
+
 #define flush_cache_kmaps()	do { } while (0)
 
 void clear_highpage(struct page *);
--- 12.2.orig/arch/x86/include/mach-xen/asm/hypervisor.h	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/hypervisor.h	2011-01-31 17:32:29.000000000 +0100
@@ -69,6 +69,8 @@ extern start_info_t *xen_start_info;
 #define is_initial_xendomain() 0
 #endif
 
+struct vcpu_runstate_info *setup_runstate_area(unsigned int cpu);
+
 /* arch/xen/kernel/evtchn.c */
 /* Force a proper event-channel callback from Xen. */
 void force_evtchn_callback(void);
@@ -104,8 +106,8 @@ void xen_set_ldt(const void *ptr, unsign
 #include <linux/cpumask.h>
 void xen_tlb_flush_all(void);
 void xen_invlpg_all(unsigned long ptr);
-void xen_tlb_flush_mask(cpumask_t *mask);
-void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr);
+void xen_tlb_flush_mask(const cpumask_t *mask);
+void xen_invlpg_mask(const cpumask_t *mask, unsigned long ptr);
 #else
 #define xen_tlb_flush_all xen_tlb_flush
 #define xen_invlpg_all xen_invlpg
@@ -156,7 +158,9 @@ static inline void arch_leave_lazy_mmu_m
 	xen_multicall_flush(false);
 }
 
-#ifndef arch_use_lazy_mmu_mode
+#if defined(CONFIG_X86_32)
+#define arch_use_lazy_mmu_mode() unlikely(x86_read_percpu(xen_lazy_mmu))
+#elif !defined(arch_use_lazy_mmu_mode)
 #define arch_use_lazy_mmu_mode() unlikely(__get_cpu_var(xen_lazy_mmu))
 #endif
 
--- 12.2.orig/arch/x86/include/mach-xen/asm/irqflags_32.h	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/irqflags_32.h	2011-01-31 17:32:29.000000000 +0100
@@ -11,6 +11,40 @@
 #define _ASM_IRQFLAGS_H
 
 #ifndef __ASSEMBLY__
+#define xen_save_fl(void) (current_vcpu_info()->evtchn_upcall_mask)
+
+#define xen_restore_fl(f)					\
+do {								\
+	vcpu_info_t *_vcpu;					\
+	barrier();						\
+	_vcpu = current_vcpu_info();				\
+	if ((_vcpu->evtchn_upcall_mask = (f)) == 0) {		\
+		barrier(); /* unmask then check (avoid races) */\
+		if (unlikely(_vcpu->evtchn_upcall_pending))	\
+			force_evtchn_callback();		\
+	}							\
+} while (0)
+
+#define xen_irq_disable()					\
+do {								\
+	current_vcpu_info()->evtchn_upcall_mask = 1;		\
+	barrier();						\
+} while (0)
+
+#define xen_irq_enable()					\
+do {								\
+	vcpu_info_t *_vcpu;					\
+	barrier();						\
+	_vcpu = current_vcpu_info();				\
+	_vcpu->evtchn_upcall_mask = 0;				\
+	barrier(); /* unmask then check (avoid races) */	\
+	if (unlikely(_vcpu->evtchn_upcall_pending))		\
+		force_evtchn_callback();			\
+} while (0)
+
+void xen_safe_halt(void);
+
+void xen_halt(void);
 
 /* 
  * The use of 'barrier' in the following reflects their use as local-lock
@@ -20,48 +54,31 @@
  * includes these barriers, for example.
  */
 
-#define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask)
+#define __raw_local_save_flags() xen_save_fl()
 
-#define raw_local_irq_restore(x)					\
-do {									\
-	vcpu_info_t *_vcpu;						\
-	barrier();							\
-	_vcpu = current_vcpu_info();					\
-	if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {			\
-		barrier(); /* unmask then check (avoid races) */	\
-		if (unlikely(_vcpu->evtchn_upcall_pending))		\
-			force_evtchn_callback();			\
-	}								\
-} while (0)
+#define raw_local_irq_restore(flags) xen_restore_fl(flags)
 
-#define raw_local_irq_disable()						\
-do {									\
-	current_vcpu_info()->evtchn_upcall_mask = 1;			\
-	barrier();							\
-} while (0)
+#define raw_local_irq_disable()	xen_irq_disable()
 
-#define raw_local_irq_enable()						\
-do {									\
-	vcpu_info_t *_vcpu;						\
-	barrier();							\
-	_vcpu = current_vcpu_info();					\
-	_vcpu->evtchn_upcall_mask = 0;					\
-	barrier(); /* unmask then check (avoid races) */		\
-	if (unlikely(_vcpu->evtchn_upcall_pending))			\
-		force_evtchn_callback();				\
-} while (0)
+#define raw_local_irq_enable() xen_irq_enable()
 
 /*
  * Used in the idle loop; sti takes one instruction cycle
  * to complete:
  */
-void raw_safe_halt(void);
+static inline void raw_safe_halt(void)
+{
+	xen_safe_halt();
+}
 
 /*
  * Used when interrupts are already enabled or to
  * shutdown the processor:
  */
-void halt(void);
+static inline void halt(void)
+{
+	xen_halt();
+}
 
 /*
  * For spinlocks, etc:
@@ -106,7 +123,7 @@ sysexit_scrit:	/**** START OF SYSEXIT CR
 14:	__DISABLE_INTERRUPTS						; \
 	TRACE_IRQS_OFF							; \
 sysexit_ecrit:	/**** END OF SYSEXIT CRITICAL REGION ****/		; \
-	mov  $__KERNEL_PDA, %ecx					; \
+	mov  $__KERNEL_PERCPU, %ecx					; \
 	push %esp							; \
 	mov  %ecx, %fs							; \
 	call evtchn_do_upcall						; \
--- 12.2.orig/arch/x86/include/mach-xen/asm/mmu_context_32.h	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/mmu_context_32.h	2011-01-31 17:32:29.000000000 +0100
@@ -6,6 +6,20 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
+void arch_exit_mmap(struct mm_struct *mm);
+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
+
+void mm_pin(struct mm_struct *mm);
+void mm_unpin(struct mm_struct *mm);
+void mm_pin_all(void);
+
+static inline void xen_activate_mm(struct mm_struct *prev,
+				   struct mm_struct *next)
+{
+	if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
+		mm_pin(next);
+}
+
 /*
  * Used for LDT copy/destruction.
  */
@@ -37,10 +51,6 @@ static inline void __prepare_arch_switch
 		: : "r" (0) );
 }
 
-extern void mm_pin(struct mm_struct *mm);
-extern void mm_unpin(struct mm_struct *mm);
-void mm_pin_all(void);
-
 static inline void switch_mm(struct mm_struct *prev,
 			     struct mm_struct *next,
 			     struct task_struct *tsk)
@@ -97,11 +107,10 @@ static inline void switch_mm(struct mm_s
 #define deactivate_mm(tsk, mm)			\
 	asm("movl %0,%%gs": :"r" (0));
 
-static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
-{
-	if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
-		mm_pin(next);
-	switch_mm(prev, next, NULL);
-}
+#define activate_mm(prev, next)				\
+	do {						\
+		xen_activate_mm(prev, next);		\
+		switch_mm((prev),(next),NULL);		\
+	} while(0)
 
 #endif
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgalloc_32.h	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pgalloc_32.h	2011-01-31 17:32:29.000000000 +0100
@@ -1,7 +1,6 @@
 #ifndef _I386_PGALLOC_H
 #define _I386_PGALLOC_H
 
-#include <asm/fixmap.h>
 #include <linux/threads.h>
 #include <linux/mm.h>		/* for struct page */
 #include <asm/io.h>		/* for phys_to_virt and page_to_pseudophys */
@@ -69,6 +68,4 @@ do {									\
 #define pud_populate(mm, pmd, pte)	BUG()
 #endif
 
-#define check_pgt_cache()	do { } while (0)
-
 #endif /* _I386_PGALLOC_H */
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h	2011-01-31 17:32:16.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pgtable-3level.h	2011-01-31 17:32:29.000000000 +0100
@@ -52,32 +52,40 @@ static inline int pte_exec_kernel(pte_t 
  * value and then use set_pte to update it.  -ben
  */
 
-static inline void set_pte(pte_t *ptep, pte_t pte)
+static inline void xen_set_pte(pte_t *ptep, pte_t pte)
 {
 	ptep->pte_high = pte.pte_high;
 	smp_wmb();
 	ptep->pte_low = pte.pte_low;
 }
-#define set_pte_atomic(pteptr,pteval) \
-		set_64bit((unsigned long long *)(pteptr),__pte_val(pteval))
 
-#define set_pte_at(_mm,addr,ptep,pteval) do {				\
-	if (((_mm) != current->mm && (_mm) != &init_mm) ||		\
-	    HYPERVISOR_update_va_mapping((addr), (pteval), 0))		\
-		set_pte((ptep), (pteval));				\
-} while (0)
-
-#define set_pmd(pmdptr,pmdval)				\
-		xen_l2_entry_update((pmdptr), (pmdval))
-#define set_pud(pudptr,pudval) \
-		xen_l3_entry_update((pudptr), (pudval))
+static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
+				  pte_t *ptep , pte_t pte)
+{
+	if ((mm != current->mm && mm != &init_mm) ||
+	    HYPERVISOR_update_va_mapping(addr, pte, 0))
+		xen_set_pte(ptep, pte);
+}
+
+static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+	set_64bit((unsigned long long *)(ptep),__pte_val(pte));
+}
+static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+	xen_l2_entry_update(pmdp, pmd);
+}
+static inline void xen_set_pud(pud_t *pudp, pud_t pud)
+{
+	xen_l3_entry_update(pudp, pud);
+}
 
 /*
  * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
  * entry, so clear the bottom half first and enforce ordering with a compiler
  * barrier.
  */
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	if ((mm != current->mm && mm != &init_mm)
 	    || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
@@ -87,7 +95,18 @@ static inline void pte_clear(struct mm_s
 	}
 }
 
-#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
+static inline void xen_pmd_clear(pmd_t *pmd)
+{
+	xen_l2_entry_update(pmd, __pmd(0));
+}
+
+#define set_pte(ptep, pte)			xen_set_pte(ptep, pte)
+#define set_pte_at(mm, addr, ptep, pte)		xen_set_pte_at(mm, addr, ptep, pte)
+#define set_pte_atomic(ptep, pte)		xen_set_pte_atomic(ptep, pte)
+#define set_pmd(pmdp, pmd)			xen_set_pmd(pmdp, pmd)
+#define set_pud(pudp, pud)			xen_set_pud(pudp, pud)
+#define pte_clear(mm, addr, ptep)		xen_pte_clear(mm, addr, ptep)
+#define pmd_clear(pmd)				xen_pmd_clear(pmd)
 
 /*
  * Pentium-II erratum A13: in PAE mode we explicitly have to flush
@@ -108,7 +127,8 @@ static inline void pud_clear (pud_t * pu
 #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
 			pmd_index(address))
 
-static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res)
+#ifdef CONFIG_SMP
+static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res)
 {
 	uint64_t val = __pte_val(res);
 	if (__cmpxchg64(ptep, val, 0) != val) {
@@ -119,6 +139,9 @@ static inline pte_t raw_ptep_get_and_cle
 	}
 	return res;
 }
+#else
+#define xen_ptep_get_and_clear(xp, pte) xen_local_ptep_get_and_clear(xp, pte)
+#endif
 
 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
 #define ptep_clear_flush(vma, addr, ptep)			\
@@ -165,13 +188,13 @@ extern unsigned long long __supported_pt
 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 {
 	return __pte((((unsigned long long)page_nr << PAGE_SHIFT) |
-			pgprot_val(pgprot)) & __supported_pte_mask);
+		      pgprot_val(pgprot)) & __supported_pte_mask);
 }
 
 static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 {
 	return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) |
-			pgprot_val(pgprot)) & __supported_pte_mask);
+		      pgprot_val(pgprot)) & __supported_pte_mask);
 }
 
 /*
@@ -191,6 +214,4 @@ static inline pmd_t pfn_pmd(unsigned lon
 
 #define __pmd_free_tlb(tlb, x)		do { } while (0)
 
-void vmalloc_sync_all(void);
-
 #endif /* _I386_PGTABLE_3LEVEL_H */
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgtable-3level-defs.h	2007-06-12 13:14:02.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/pgtable-3level-defs.h	2011-01-31 17:32:29.000000000 +0100
@@ -1,7 +1,7 @@
 #ifndef _I386_PGTABLE_3LEVEL_DEFS_H
 #define _I386_PGTABLE_3LEVEL_DEFS_H
 
-#define HAVE_SHARED_KERNEL_PMD 0
+#define SHARED_KERNEL_PMD	0
 
 /*
  * PGDIR_SHIFT determines what a top-level page table entry can map
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgtable_32.h	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pgtable_32.h	2011-02-07 15:38:07.000000000 +0100
@@ -24,11 +24,11 @@
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/sched.h>
 
 /* Is this pagetable pinned? */
 #define PG_pinned	PG_arch_1
 
-struct mm_struct;
 struct vm_area_struct;
 
 /*
@@ -38,17 +38,16 @@ struct vm_area_struct;
 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
 extern unsigned long empty_zero_page[1024];
 extern pgd_t *swapper_pg_dir;
-extern struct kmem_cache *pgd_cache;
 extern struct kmem_cache *pmd_cache;
 extern spinlock_t pgd_lock;
 extern struct page *pgd_list;
+void check_pgt_cache(void);
 
 void pmd_ctor(void *, struct kmem_cache *, unsigned long);
-void pgd_ctor(void *, struct kmem_cache *, unsigned long);
-void pgd_dtor(void *, struct kmem_cache *, unsigned long);
 void pgtable_cache_init(void);
 void paging_init(void);
 
+
 /*
  * The Linux x86 paging architecture is 'compile-time dual-mode', it
  * implements both the traditional 2-level x86 page tables and the
@@ -165,6 +164,7 @@ void paging_init(void);
 
 extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 #define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)
+#define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
 #define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_PCD)
 #define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
@@ -172,6 +172,7 @@ extern unsigned long long __PAGE_KERNEL,
 #define PAGE_KERNEL		__pgprot(__PAGE_KERNEL)
 #define PAGE_KERNEL_RO		__pgprot(__PAGE_KERNEL_RO)
 #define PAGE_KERNEL_EXEC	__pgprot(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RX		__pgprot(__PAGE_KERNEL_RX)
 #define PAGE_KERNEL_NOCACHE	__pgprot(__PAGE_KERNEL_NOCACHE)
 #define PAGE_KERNEL_LARGE	__pgprot(__PAGE_KERNEL_LARGE)
 #define PAGE_KERNEL_LARGE_EXEC	__pgprot(__PAGE_KERNEL_LARGE_EXEC)
@@ -275,7 +276,13 @@ static inline pte_t pte_mkhuge(pte_t pte
  */
 #define pte_update(mm, addr, ptep)		do { } while (0)
 #define pte_update_defer(mm, addr, ptep)	do { } while (0)
-#define paravirt_map_pt_hook(slot, va, pfn)	do { } while (0)
+
+/* local pte updates need not use xchg for locking */
+static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res)
+{
+	xen_set_pte(ptep, __pte(0));
+	return res;
+}
 
 /*
  * We only update the dirty/accessed state if we set
@@ -286,17 +293,34 @@ static inline pte_t pte_mkhuge(pte_t pte
  */
 #define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 #define ptep_set_access_flags(vma, address, ptep, entry, dirty)		\
-do {									\
-	if (dirty)							\
+({									\
+	int __changed = !pte_same(*(ptep), entry);			\
+	if (__changed && (dirty))					\
 		ptep_establish(vma, address, ptep, entry);		\
-} while (0)
+	__changed;							\
+})
 
-/*
- * We don't actually have these, but we want to advertise them so that
- * we can encompass the flush here.
- */
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+#define ptep_test_and_clear_dirty(vma, addr, ptep) ({			\
+	int __ret = 0;							\
+	if (pte_dirty(*(ptep)))						\
+		__ret = test_and_clear_bit(_PAGE_BIT_DIRTY,		\
+						&(ptep)->pte_low);	\
+	if (__ret)							\
+		pte_update((vma)->vm_mm, addr, ptep);			\
+	__ret;								\
+})
+
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young(vma, addr, ptep) ({			\
+	int __ret = 0;							\
+	if (pte_young(*(ptep)))						\
+		__ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,		\
+						&(ptep)->pte_low);	\
+	if (__ret)							\
+		pte_update((vma)->vm_mm, addr, ptep);			\
+	__ret;								\
+})
 
 /*
  * Rules for using ptep_establish: the pte MUST be a user pte, and
@@ -323,7 +347,7 @@ do {									\
 	int __dirty = pte_dirty(__pte);					\
 	__pte = pte_mkclean(__pte);					\
 	if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
-		ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
+		(void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
 	else if (__dirty)						\
 		(ptep)->pte_low = __pte.pte_low;			\
 	__dirty;							\
@@ -336,7 +360,7 @@ do {									\
 	int __young = pte_young(__pte);					\
 	__pte = pte_mkold(__pte);					\
 	if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
-		ptep_set_access_flags(vma, address, ptep, __pte, __young); \
+		(void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \
 	else if (__young)						\
 		(ptep)->pte_low = __pte.pte_low;			\
 	__young;							\
@@ -349,7 +373,7 @@ static inline pte_t ptep_get_and_clear(s
 	if (!pte_none(pte)
 	    && (mm != &init_mm
 	        || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) {
-		pte = raw_ptep_get_and_clear(ptep, pte);
+		pte = xen_ptep_get_and_clear(ptep, pte);
 		pte_update(mm, addr, ptep);
 	}
 	return pte;
@@ -491,24 +515,10 @@ extern pte_t *lookup_address(unsigned lo
 #endif
 
 #if defined(CONFIG_HIGHPTE)
-#define pte_offset_map(dir, address)				\
-({								\
-	pte_t *__ptep;						\
-	unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT;		\
-	__ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \
-	paravirt_map_pt_hook(KM_PTE0,__ptep, pfn);		\
-	__ptep = __ptep + pte_index(address);			\
-	__ptep;							\
-})
-#define pte_offset_map_nested(dir, address)			\
-({								\
-	pte_t *__ptep;						\
-	unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT;		\
-	__ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \
-	paravirt_map_pt_hook(KM_PTE1,__ptep, pfn);		\
-	__ptep = __ptep + pte_index(address);			\
-	__ptep;							\
-})
+#define pte_offset_map(dir, address) \
+	((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
+#define pte_offset_map_nested(dir, address) \
+	((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
 #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
 #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
 #else
@@ -594,10 +604,6 @@ int xen_change_pte_range(struct mm_struc
 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
 direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
 
-#define MK_IOSPACE_PFN(space, pfn)	(pfn)
-#define GET_IOSPACE(pfn)		0
-#define GET_PFN(pfn)			(pfn)
-
 #include <asm-generic/pgtable.h>
 
 #endif /* _I386_PGTABLE_H */
--- 12.2.orig/arch/x86/include/mach-xen/asm/processor_32.h	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/processor_32.h	2011-01-31 17:32:29.000000000 +0100
@@ -21,6 +21,7 @@
 #include <asm/percpu.h>
 #include <linux/cpumask.h>
 #include <linux/init.h>
+#include <asm/processor-flags.h>
 #include <xen/interface/physdev.h>
 
 /* flag for disabling the tsc */
@@ -118,7 +119,8 @@ extern char ignore_fpu_irq;
 
 void __init cpu_detect(struct cpuinfo_x86 *c);
 
-extern void identify_cpu(struct cpuinfo_x86 *);
+extern void identify_boot_cpu(void);
+extern void identify_secondary_cpu(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern unsigned short num_cache_leaves;
@@ -129,29 +131,8 @@ extern void detect_ht(struct cpuinfo_x86
 static inline void detect_ht(struct cpuinfo_x86 *c) {}
 #endif
 
-/*
- * EFLAGS bits
- */
-#define X86_EFLAGS_CF	0x00000001 /* Carry Flag */
-#define X86_EFLAGS_PF	0x00000004 /* Parity Flag */
-#define X86_EFLAGS_AF	0x00000010 /* Auxillary carry Flag */
-#define X86_EFLAGS_ZF	0x00000040 /* Zero Flag */
-#define X86_EFLAGS_SF	0x00000080 /* Sign Flag */
-#define X86_EFLAGS_TF	0x00000100 /* Trap Flag */
-#define X86_EFLAGS_IF	0x00000200 /* Interrupt Flag */
-#define X86_EFLAGS_DF	0x00000400 /* Direction Flag */
-#define X86_EFLAGS_OF	0x00000800 /* Overflow Flag */
-#define X86_EFLAGS_IOPL	0x00003000 /* IOPL mask */
-#define X86_EFLAGS_NT	0x00004000 /* Nested Task */
-#define X86_EFLAGS_RF	0x00010000 /* Resume Flag */
-#define X86_EFLAGS_VM	0x00020000 /* Virtual Mode */
-#define X86_EFLAGS_AC	0x00040000 /* Alignment Check */
-#define X86_EFLAGS_VIF	0x00080000 /* Virtual Interrupt Flag */
-#define X86_EFLAGS_VIP	0x00100000 /* Virtual Interrupt Pending */
-#define X86_EFLAGS_ID	0x00200000 /* CPUID detection flag */
-
-static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx,
-				      unsigned int *ecx, unsigned int *edx)
+static inline void xen_cpuid(unsigned int *eax, unsigned int *ebx,
+			     unsigned int *ecx, unsigned int *edx)
 {
 	/* ecx is often an input as well as an output. */
 	__asm__(XEN_CPUID
@@ -165,21 +146,6 @@ static inline fastcall void xen_cpuid(un
 #define load_cr3(pgdir) write_cr3(__pa(pgdir))
 
 /*
- * Intel CPU features in CR4
- */
-#define X86_CR4_VME		0x0001	/* enable vm86 extensions */
-#define X86_CR4_PVI		0x0002	/* virtual interrupts flag enable */
-#define X86_CR4_TSD		0x0004	/* disable time stamp at ipl 3 */
-#define X86_CR4_DE		0x0008	/* enable debugging extensions */
-#define X86_CR4_PSE		0x0010	/* enable page size extensions */
-#define X86_CR4_PAE		0x0020	/* enable physical address extensions */
-#define X86_CR4_MCE		0x0040	/* Machine check enable */
-#define X86_CR4_PGE		0x0080	/* enable global pages */
-#define X86_CR4_PCE		0x0100	/* enable performance counters at ipl 3 */
-#define X86_CR4_OSFXSR		0x0200	/* enable fast FPU save and restore */
-#define X86_CR4_OSXMMEXCPT	0x0400	/* enable unmasked SSE exceptions */
-
-/*
  * Save the cr4 feature set we're using (ie
  * Pentium 4MB enable and PPro Global page
  * enable), so that any CPU's that boot up
@@ -206,26 +172,6 @@ static inline void clear_in_cr4 (unsigne
 }
 
 /*
- *      NSC/Cyrix CPU configuration register indexes
- */
-
-#define CX86_PCR0 0x20
-#define CX86_GCR  0xb8
-#define CX86_CCR0 0xc0
-#define CX86_CCR1 0xc1
-#define CX86_CCR2 0xc2
-#define CX86_CCR3 0xc3
-#define CX86_CCR4 0xe8
-#define CX86_CCR5 0xe9
-#define CX86_CCR6 0xea
-#define CX86_CCR7 0xeb
-#define CX86_PCR1 0xf0
-#define CX86_DIR0 0xfe
-#define CX86_DIR1 0xff
-#define CX86_ARR_BASE 0xc4
-#define CX86_RCR_BASE 0xdc
-
-/*
  *      NSC/Cyrix CPU indexed register access macros
  */
 
@@ -351,7 +297,8 @@ typedef struct {
 struct thread_struct;
 
 #ifndef CONFIG_X86_NO_TSS
-struct tss_struct {
+/* This is the TSS defined by the hardware. */
+struct i386_hw_tss {
 	unsigned short	back_link,__blh;
 	unsigned long	esp0;
 	unsigned short	ss0,__ss0h;
@@ -375,6 +322,11 @@ struct tss_struct {
 	unsigned short	gs, __gsh;
 	unsigned short	ldt, __ldth;
 	unsigned short	trace, io_bitmap_base;
+} __attribute__((packed));
+
+struct tss_struct {
+	struct i386_hw_tss x86_tss;
+
 	/*
 	 * The extra 1 is there because the CPU will access an
 	 * additional byte beyond the end of the IO permission
@@ -428,10 +380,11 @@ struct thread_struct {
 };
 
 #define INIT_THREAD  {							\
+	.esp0 = sizeof(init_stack) + (long)&init_stack,			\
 	.vm86_info = NULL,						\
 	.sysenter_cs = __KERNEL_CS,					\
 	.io_bitmap_ptr = NULL,						\
-	.fs = __KERNEL_PDA,						\
+	.fs = __KERNEL_PERCPU,						\
 }
 
 /*
@@ -441,10 +394,12 @@ struct thread_struct {
  * be within the limit.
  */
 #define INIT_TSS  {							\
-	.esp0		= sizeof(init_stack) + (long)&init_stack,	\
-	.ss0		= __KERNEL_DS,					\
-	.ss1		= __KERNEL_CS,					\
-	.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,			\
+	.x86_tss = {							\
+		.esp0		= sizeof(init_stack) + (long)&init_stack, \
+		.ss0		= __KERNEL_DS,				\
+		.ss1		= __KERNEL_CS,				\
+		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,		\
+	 },								\
 	.io_bitmap	= { [ 0 ... IO_BITMAP_LONGS] = ~0 },		\
 }
 
@@ -551,38 +506,33 @@ static inline void rep_nop(void)
 
 #define cpu_relax()	rep_nop()
 
-#define paravirt_enabled() 1
-#define __cpuid xen_cpuid
-
 #ifndef CONFIG_X86_NO_TSS
-static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread)
+static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread)
 {
-	tss->esp0 = thread->esp0;
+	tss->x86_tss.esp0 = thread->esp0;
 	/* This can only happen when SEP is enabled, no need to test "SEP"arately */
-	if (unlikely(tss->ss1 != thread->sysenter_cs)) {
-		tss->ss1 = thread->sysenter_cs;
+	if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
+		tss->x86_tss.ss1 = thread->sysenter_cs;
 		wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
 	}
 }
-#define load_esp0(tss, thread) \
-	__load_esp0(tss, thread)
 #else
-#define load_esp0(tss, thread) do { \
+#define xen_load_esp0(tss, thread) do { \
 	if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \
 		BUG(); \
 } while (0)
 #endif
 
 
-/*
- * These special macros can be used to get or set a debugging register
- */
-#define get_debugreg(var, register)				\
-		(var) = HYPERVISOR_get_debugreg(register)
-#define set_debugreg(value, register)				\
-		WARN_ON(HYPERVISOR_set_debugreg(register, value))
+static inline unsigned long xen_get_debugreg(int regno)
+{
+	return HYPERVISOR_get_debugreg(regno);
+}
 
-#define set_iopl_mask xen_set_iopl_mask
+static inline void xen_set_debugreg(int regno, unsigned long value)
+{
+	WARN_ON(HYPERVISOR_set_debugreg(regno, value));
+}
 
 /*
  * Set IOPL bits in EFLAGS from given mask
@@ -597,6 +547,21 @@ static inline void xen_set_iopl_mask(uns
 }
 
 
+#define paravirt_enabled() 1
+#define __cpuid xen_cpuid
+
+#define load_esp0 xen_load_esp0
+
+/*
+ * These special macros can be used to get or set a debugging register
+ */
+#define get_debugreg(var, register)				\
+	(var) = xen_get_debugreg(register)
+#define set_debugreg(value, register)				\
+	xen_set_debugreg(register, value)
+
+#define set_iopl_mask xen_set_iopl_mask
+
 /*
  * Generic CPUID function
  * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
@@ -749,8 +714,14 @@ extern unsigned long boot_option_idle_ov
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);
 
-extern int init_gdt(int cpu, struct task_struct *idle);
+/* Defined in head.S */
+extern struct Xgt_desc_struct early_gdt_descr;
+
 extern void cpu_set_gdt(int);
-extern void secondary_cpu_init(void);
+extern void switch_to_new_gdt(void);
+extern void cpu_init(void);
+extern void init_gdt(int cpu);
+
+extern int force_mwait;
 
 #endif /* __ASM_I386_PROCESSOR_H */
--- 12.2.orig/arch/x86/include/mach-xen/asm/smp_32.h	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/smp_32.h	2011-01-31 17:32:29.000000000 +0100
@@ -8,19 +8,15 @@
 #include <linux/kernel.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <asm/pda.h>
 #endif
 
-#ifdef CONFIG_X86_LOCAL_APIC
-#ifndef __ASSEMBLY__
-#include <asm/fixmap.h>
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
 #include <asm/bitops.h>
 #include <asm/mpspec.h>
+#include <asm/apic.h>
 #ifdef CONFIG_X86_IO_APIC
 #include <asm/io_apic.h>
 #endif
-#include <asm/apic.h>
-#endif
 #endif
 
 #define BAD_APICID 0xFFu
@@ -52,9 +48,76 @@ extern void cpu_exit_clear(void);
 extern void cpu_uninit(void);
 #endif
 
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_XEN
+struct smp_ops
+{
+	void (*smp_prepare_boot_cpu)(void);
+	void (*smp_prepare_cpus)(unsigned max_cpus);
+	int (*cpu_up)(unsigned cpu);
+	void (*smp_cpus_done)(unsigned max_cpus);
+
+	void (*smp_send_stop)(void);
+	void (*smp_send_reschedule)(int cpu);
+	int (*smp_call_function_mask)(cpumask_t mask,
+				      void (*func)(void *info), void *info,
+				      int wait);
+};
+
+extern struct smp_ops smp_ops;
+
+static inline void smp_prepare_boot_cpu(void)
+{
+	smp_ops.smp_prepare_boot_cpu();
+}
+static inline void smp_prepare_cpus(unsigned int max_cpus)
+{
+	smp_ops.smp_prepare_cpus(max_cpus);
+}
+static inline int __cpu_up(unsigned int cpu)
+{
+	return smp_ops.cpu_up(cpu);
+}
+static inline void smp_cpus_done(unsigned int max_cpus)
+{
+	smp_ops.smp_cpus_done(max_cpus);
+}
+
+static inline void smp_send_stop(void)
+{
+	smp_ops.smp_send_stop();
+}
+static inline void smp_send_reschedule(int cpu)
+{
+	smp_ops.smp_send_reschedule(cpu);
+}
+static inline int smp_call_function_mask(cpumask_t mask,
+					 void (*func) (void *info), void *info,
+					 int wait)
+{
+	return smp_ops.smp_call_function_mask(mask, func, info, wait);
+}
+
+void native_smp_prepare_boot_cpu(void);
+void native_smp_prepare_cpus(unsigned int max_cpus);
+int native_cpu_up(unsigned int cpunum);
+void native_smp_cpus_done(unsigned int max_cpus);
+
 #define startup_ipi_hook(phys_apicid, start_eip, start_esp) 		\
 do { } while (0)
+
+#else
+
+
+void xen_smp_send_stop(void);
+void xen_smp_send_reschedule(int cpu);
+int xen_smp_call_function_mask(cpumask_t mask,
+			       void (*func) (void *info), void *info,
+			       int wait);
+
+#define smp_send_stop		xen_smp_send_stop
+#define smp_send_reschedule	xen_smp_send_reschedule
+#define smp_call_function_mask	xen_smp_call_function_mask
+
 #endif
 
 /*
@@ -62,7 +125,8 @@ do { } while (0)
  * from the initial startup. We map APIC_BASE very early in page_setup(),
  * so this is correct in the x86 case.
  */
-#define raw_smp_processor_id() (read_pda(cpu_number))
+DECLARE_PER_CPU(int, cpu_number);
+#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
 
 extern cpumask_t cpu_possible_map;
 #define cpu_callin_map cpu_possible_map
@@ -73,20 +137,6 @@ static inline int num_booting_cpus(void)
 	return cpus_weight(cpu_possible_map);
 }
 
-#ifdef CONFIG_X86_LOCAL_APIC
-
-#ifdef APIC_DEFINITION
-extern int hard_smp_processor_id(void);
-#else
-#include <mach_apicdef.h>
-static inline int hard_smp_processor_id(void)
-{
-	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
-}
-#endif
-#endif
-
 #define safe_smp_processor_id() smp_processor_id()
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
@@ -102,10 +152,31 @@ extern unsigned int num_processors;
 
 #define NO_PROC_ID		0xFF		/* No processor magic marker */
 
-#endif
+#endif /* CONFIG_SMP */
 
 #ifndef __ASSEMBLY__
 
+#ifdef CONFIG_X86_LOCAL_APIC
+
+#ifdef APIC_DEFINITION
+extern int hard_smp_processor_id(void);
+#else
+#include <mach_apicdef.h>
+static inline int hard_smp_processor_id(void)
+{
+	/* we don't want to mark this access volatile - bad code generation */
+	return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
+}
+#endif /* APIC_DEFINITION */
+
+#else /* CONFIG_X86_LOCAL_APIC */
+
+#ifndef CONFIG_SMP
+#define hard_smp_processor_id()		0
+#endif
+
+#endif /* CONFIG_X86_LOCAL_APIC */
+
 extern u8 apicid_2_node[];
 
 #ifdef CONFIG_X86_LOCAL_APIC
--- 12.2.orig/arch/x86/include/mach-xen/asm/tlbflush_32.h	2011-01-31 17:29:16.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/tlbflush_32.h	2011-01-31 17:32:29.000000000 +0100
@@ -29,8 +29,13 @@
  * and page-granular flushes are available only on i486 and up.
  */
 
+#define TLB_FLUSH_ALL	0xffffffff
+
+
 #ifndef CONFIG_SMP
 
+#include <linux/sched.h>
+
 #define flush_tlb() __flush_tlb()
 #define flush_tlb_all() __flush_tlb_all()
 #define local_flush_tlb() __flush_tlb()
@@ -55,7 +60,7 @@ static inline void flush_tlb_range(struc
 		__flush_tlb();
 }
 
-#else
+#else  /* SMP */
 
 #include <asm/smp.h>
 
@@ -84,9 +89,7 @@ struct tlb_state
 	char __cacheline_padding[L1_CACHE_BYTES-8];
 };
 DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
-
-
-#endif
+#endif	/* SMP */
 
 #define flush_tlb_kernel_range(start, end) flush_tlb_all()
 
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ 12.2/arch/x86/include/mach-xen/asm/cmpxchg_64.h	2011-12-23 11:18:57.000000000 +0100
@@ -0,0 +1,16 @@
+#ifndef _ASM_X86_XEN_CMPXCHG_64_H
+#define _ASM_X86_XEN_CMPXCHG_64_H
+
+#include_next <asm/cmpxchg_64.h>
+
+static inline u64 get_64bit(const volatile u64 *ptr)
+{
+	return *ptr;
+}
+
+#define get_64bit_local get_64bit
+
+#define cmpxchg64 cmpxchg
+#define cmpxchg64_local cmpxchg_local
+
+#endif /* _ASM_X86_XEN_CMPXCHG_64_H */
--- 12.2.orig/arch/x86/include/mach-xen/asm/desc_64.h	2011-01-31 17:32:16.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/desc_64.h	2011-01-31 17:32:29.000000000 +0100
@@ -127,16 +127,6 @@ static inline void set_ldt_desc(unsigned
 			      DESC_LDT, size * 8 - 1);
 }
 
-static inline void set_seg_base(unsigned cpu, int entry, void *base)
-{ 
-	struct desc_struct *d = &cpu_gdt(cpu)[entry];
-	u32 addr = (u32)(u64)base;
-	BUG_ON((u64)base >> 32); 
-	d->base0 = addr & 0xffff;
-	d->base1 = (addr >> 16) & 0xff;
-	d->base2 = (addr >> 24) & 0xff;
-} 
-
 #define LDT_entry_a(info) \
 	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
 /* Don't allow setting of the lm bit. It is useless anyways because 
@@ -165,25 +155,15 @@ static inline void set_seg_base(unsigned
 	(info)->useable		== 0	&& \
 	(info)->lm		== 0)
 
-#if TLS_SIZE != 24
-# error update this code.
-#endif
-
 static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
 {
-#if 0
+	unsigned int i;
 	u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN);
-	gdt[0] = t->tls_array[0];
-	gdt[1] = t->tls_array[1];
-	gdt[2] = t->tls_array[2];
-#endif
-#define C(i) \
-	if (HYPERVISOR_update_descriptor(virt_to_machine(&cpu_gdt(cpu)[GDT_ENTRY_TLS_MIN + i]), \
-						 t->tls_array[i])) \
-		BUG();
 
-	C(0); C(1); C(2);
-#undef C
+	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+		if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]),
+						 t->tls_array[i]))
+			BUG();
 } 
 
 /*
--- 12.2.orig/arch/x86/include/mach-xen/asm/fixmap_64.h	2011-01-31 17:29:16.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/fixmap_64.h	2011-01-31 17:32:29.000000000 +0100
@@ -15,7 +15,6 @@
 #include <asm/apicdef.h>
 #include <asm/page.h>
 #include <asm/vsyscall.h>
-#include <asm/vsyscall32.h>
 #include <asm/acpi.h>
 
 /*
--- 12.2.orig/arch/x86/include/mach-xen/asm/irqflags_64.h	2007-06-12 13:14:13.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/irqflags_64.h	2011-01-31 17:32:29.000000000 +0100
@@ -9,6 +9,7 @@
  */
 #ifndef _ASM_IRQFLAGS_H
 #define _ASM_IRQFLAGS_H
+#include <asm/processor-flags.h>
 
 #ifndef __ASSEMBLY__
 /*
@@ -50,19 +51,19 @@ static inline void raw_local_irq_disable
 {
 	unsigned long flags = __raw_local_save_flags();
 
-	raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18));
+	raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
 }
 
 static inline void raw_local_irq_enable(void)
 {
 	unsigned long flags = __raw_local_save_flags();
 
-	raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18));
+	raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
 }
 
 static inline int raw_irqs_disabled_flags(unsigned long flags)
 {
-	return !(flags & (1<<9)) || (flags & (1 << 18));
+	return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC);
 }
 
 #else /* CONFIG_X86_VSMP */
@@ -118,13 +119,21 @@ static inline int raw_irqs_disabled_flag
  * Used in the idle loop; sti takes one instruction cycle
  * to complete:
  */
-void raw_safe_halt(void);
+void xen_safe_halt(void);
+static inline void raw_safe_halt(void)
+{
+	xen_safe_halt();
+}
 
 /*
  * Used when interrupts are already enabled or to
  * shutdown the processor:
  */
-void halt(void);
+void xen_halt(void);
+static inline void halt(void)
+{
+	xen_halt();
+}
 
 #else /* __ASSEMBLY__: */
 # ifdef CONFIG_TRACE_IRQFLAGS
--- 12.2.orig/arch/x86/include/mach-xen/asm/mmu_context_64.h	2007-06-12 13:14:13.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/mmu_context_64.h	2011-01-31 17:32:29.000000000 +0100
@@ -9,6 +9,9 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
+void arch_exit_mmap(struct mm_struct *mm);
+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
+
 /*
  * possibly do the LDT unload here?
  */
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgalloc_64.h	2010-09-23 15:39:04.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/pgalloc_64.h	2011-01-31 17:32:29.000000000 +0100
@@ -1,7 +1,6 @@
 #ifndef _X86_64_PGALLOC_H
 #define _X86_64_PGALLOC_H
 
-#include <asm/fixmap.h>
 #include <asm/pda.h>
 #include <linux/threads.h>
 #include <linux/mm.h>
@@ -95,35 +94,25 @@ static inline void pud_free(pud_t *pud)
 	pte_free(virt_to_page(pud));
 }
 
-static inline void pgd_list_add(pgd_t *pgd, void *mm)
+static inline void pgd_list_add(pgd_t *pgd, struct mm_struct *mm)
 {
 	struct page *page = virt_to_page(pgd);
 
 	/* Store a back link for vmalloc_sync_all(). */
-	page->mapping = mm;
+	set_page_private(page, (unsigned long)mm);
 
 	spin_lock(&pgd_lock);
-	page->index = (pgoff_t)pgd_list;
-	if (pgd_list)
-		pgd_list->private = (unsigned long)&page->index;
-	pgd_list = page;
-	page->private = (unsigned long)&pgd_list;
+	list_add(&page->lru, &pgd_list);
 	spin_unlock(&pgd_lock);
 }
 
 static inline void pgd_list_del(pgd_t *pgd)
 {
-	struct page *next, **pprev, *page = virt_to_page(pgd);
+	struct page *page = virt_to_page(pgd);
 
 	spin_lock(&pgd_lock);
-	next = (struct page *)page->index;
-	pprev = (struct page **)page->private;
-	*pprev = next;
-	if (next)
-		next->private = (unsigned long)pprev;
+	list_del(&page->lru);
 	spin_unlock(&pgd_lock);
-
-	page->mapping = NULL;
 }
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgtable_64.h	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pgtable_64.h	2011-02-07 15:38:08.000000000 +0100
@@ -1,12 +1,14 @@
 #ifndef _X86_64_PGTABLE_H
 #define _X86_64_PGTABLE_H
 
+#include <linux/const.h>
+#ifndef __ASSEMBLY__
+
 /*
  * This file contains the functions and defines necessary to modify and use
  * the x86-64 page table tree.
  */
 #include <asm/processor.h>
-#include <asm/fixmap.h>
 #include <asm/bitops.h>
 #include <linux/threads.h>
 #include <linux/sched.h>
@@ -35,11 +37,9 @@ extern pte_t *lookup_address(unsigned lo
 #endif
 
 extern pud_t level3_kernel_pgt[512];
-extern pud_t level3_physmem_pgt[512];
 extern pud_t level3_ident_pgt[512];
 extern pmd_t level2_kernel_pgt[512];
 extern pgd_t init_level4_pgt[];
-extern pgd_t boot_level4_pgt[];
 extern unsigned long __supported_pte_mask;
 
 #define swapper_pg_dir init_level4_pgt
@@ -54,6 +54,8 @@ extern void clear_kernel_mapping(unsigne
 extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
 
+#endif /* !__ASSEMBLY__ */
+
 /*
  * PGDIR_SHIFT determines what a top-level page table entry can map
  */
@@ -78,6 +80,8 @@ extern unsigned long empty_zero_page[PAG
  */
 #define PTRS_PER_PTE	512
 
+#ifndef __ASSEMBLY__
+
 #define pte_ERROR(e) \
 	printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
 	       &(e), __pte_val(e), pte_pfn(e))
@@ -120,22 +124,23 @@ static inline void pgd_clear (pgd_t * pg
 
 #define pte_pgprot(a)	(__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
 
-#define PMD_SIZE	(1UL << PMD_SHIFT)
+#endif /* !__ASSEMBLY__ */
+
+#define PMD_SIZE	(_AC(1,UL) << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE-1))
-#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_SIZE	(_AC(1,UL) << PUD_SHIFT)
 #define PUD_MASK	(~(PUD_SIZE-1))
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_SIZE	(_AC(1,UL) << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
 #define USER_PTRS_PER_PGD	((TASK_SIZE-1)/PGDIR_SIZE+1)
 #define FIRST_USER_ADDRESS	0
 
-#ifndef __ASSEMBLY__
-#define MAXMEM		 0x6fffffffffUL
-#define VMALLOC_START    0xffffc20000000000UL
-#define VMALLOC_END      0xffffe1ffffffffffUL
-#define MODULES_VADDR    0xffffffff88000000UL
-#define MODULES_END      0xffffffffff000000UL
+#define MAXMEM		 _AC(0x6fffffffff, UL)
+#define VMALLOC_START    _AC(0xffffc20000000000, UL)
+#define VMALLOC_END      _AC(0xffffe1ffffffffff, UL)
+#define MODULES_VADDR    _AC(0xffffffff88000000, UL)
+#define MODULES_END      _AC(0xffffffffff000000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 
 #define _PAGE_BIT_PRESENT	0
@@ -161,16 +166,18 @@ static inline void pgd_clear (pgd_t * pg
 #define _PAGE_GLOBAL	0x100	/* Global TLB entry */
 
 #define _PAGE_PROTNONE	0x080	/* If not present */
-#define _PAGE_NX        (1UL<<_PAGE_BIT_NX)
+#define _PAGE_NX        (_AC(1,UL)<<_PAGE_BIT_NX)
 
 /* Mapped page is I/O or foreign and has no associated page struct. */
 #define _PAGE_IO	0x200
 
+#ifndef __ASSEMBLY__
 #if CONFIG_XEN_COMPAT <= 0x030002
 extern unsigned int __kernel_page_user;
 #else
 #define __kernel_page_user 0
 #endif
+#endif
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user)
@@ -235,6 +242,8 @@ extern unsigned int __kernel_page_user;
 #define __S110	PAGE_SHARED_EXEC
 #define __S111	PAGE_SHARED_EXEC
 
+#ifndef __ASSEMBLY__
+
 static inline unsigned long pgd_bad(pgd_t pgd)
 {
 	return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
@@ -346,6 +355,20 @@ static inline pte_t pte_mkwrite(pte_t pt
 static inline pte_t pte_mkhuge(pte_t pte)	{ __pte_val(pte) |= _PAGE_PSE; return pte; }
 static inline pte_t pte_clrhuge(pte_t pte)	{ __pte_val(pte) &= ~_PAGE_PSE; return pte; }
 
+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+{
+	if (!pte_dirty(*ptep))
+		return 0;
+	return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte);
+}
+
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+{
+	if (!pte_young(*ptep))
+		return 0;
+	return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte);
+}
+
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	pte_t pte = *ptep;
@@ -470,18 +493,12 @@ static inline pte_t pte_modify(pte_t pte
  * bit at the same time. */
 #define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 #define ptep_set_access_flags(vma, address, ptep, entry, dirty)		\
-	do {								\
-		if (dirty)						\
-			ptep_establish(vma, address, ptep, entry);	\
-	} while (0)
-
-
-/*
- * i386 says: We don't actually have these, but we want to advertise
- * them so that we can encompass the flush here.
- */
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+({									\
+	int __changed = !pte_same(*(ptep), entry);			\
+	if (__changed && (dirty))					\
+		ptep_establish(vma, address, ptep, entry);		\
+	__changed;							\
+})
 
 #define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
 #define ptep_clear_flush_dirty(vma, address, ptep)			\
@@ -490,7 +507,7 @@ static inline pte_t pte_modify(pte_t pte
 	int __dirty = pte_dirty(__pte);					\
 	__pte = pte_mkclean(__pte);					\
 	if ((vma)->vm_mm->context.pinned)				\
-		ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
+		(void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
 	else if (__dirty)						\
 		set_pte(ptep, __pte);					\
 	__dirty;							\
@@ -503,7 +520,7 @@ static inline pte_t pte_modify(pte_t pte
 	int __young = pte_young(__pte);					\
 	__pte = pte_mkold(__pte);					\
 	if ((vma)->vm_mm->context.pinned)				\
-		ptep_set_access_flags(vma, address, ptep, __pte, __young); \
+		(void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \
 	else if (__young)						\
 		set_pte(ptep, __pte);					\
 	__young;							\
@@ -517,10 +534,7 @@ static inline pte_t pte_modify(pte_t pte
 #define __swp_entry_to_pte(x)		((pte_t) { (x).val })
 
 extern spinlock_t pgd_lock;
-extern struct page *pgd_list;
-void vmalloc_sync_all(void);
-
-#endif /* !__ASSEMBLY__ */
+extern struct list_head pgd_list;
 
 extern int kern_addr_valid(unsigned long addr); 
 
@@ -555,10 +569,6 @@ int xen_change_pte_range(struct mm_struc
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
 		direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
 
-#define MK_IOSPACE_PFN(space, pfn)	(pfn)
-#define GET_IOSPACE(pfn)		0
-#define GET_PFN(pfn)			(pfn)
-
 #define HAVE_ARCH_UNMAPPED_AREA
 
 #define pgtable_cache_init()   do { } while (0)
@@ -572,11 +582,14 @@ int xen_change_pte_range(struct mm_struc
 #define	kc_offset_to_vaddr(o) \
    (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
 
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 #define __HAVE_ARCH_PTE_SAME
 #include <asm-generic/pgtable.h>
+#endif /* !__ASSEMBLY__ */
 
 #endif /* _X86_64_PGTABLE_H */
--- 12.2.orig/arch/x86/include/mach-xen/asm/processor_64.h	2011-01-31 17:32:16.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/processor_64.h	2011-01-31 17:32:29.000000000 +0100
@@ -20,6 +20,7 @@
 #include <asm/percpu.h>
 #include <linux/personality.h>
 #include <linux/cpumask.h>
+#include <asm/processor-flags.h>
 
 #define TF_MASK		0x00000100
 #define IF_MASK		0x00000200
@@ -103,42 +104,6 @@ extern unsigned int init_intel_cacheinfo
 extern unsigned short num_cache_leaves;
 
 /*
- * EFLAGS bits
- */
-#define X86_EFLAGS_CF	0x00000001 /* Carry Flag */
-#define X86_EFLAGS_PF	0x00000004 /* Parity Flag */
-#define X86_EFLAGS_AF	0x00000010 /* Auxillary carry Flag */
-#define X86_EFLAGS_ZF	0x00000040 /* Zero Flag */
-#define X86_EFLAGS_SF	0x00000080 /* Sign Flag */
-#define X86_EFLAGS_TF	0x00000100 /* Trap Flag */
-#define X86_EFLAGS_IF	0x00000200 /* Interrupt Flag */
-#define X86_EFLAGS_DF	0x00000400 /* Direction Flag */
-#define X86_EFLAGS_OF	0x00000800 /* Overflow Flag */
-#define X86_EFLAGS_IOPL	0x00003000 /* IOPL mask */
-#define X86_EFLAGS_NT	0x00004000 /* Nested Task */
-#define X86_EFLAGS_RF	0x00010000 /* Resume Flag */
-#define X86_EFLAGS_VM	0x00020000 /* Virtual Mode */
-#define X86_EFLAGS_AC	0x00040000 /* Alignment Check */
-#define X86_EFLAGS_VIF	0x00080000 /* Virtual Interrupt Flag */
-#define X86_EFLAGS_VIP	0x00100000 /* Virtual Interrupt Pending */
-#define X86_EFLAGS_ID	0x00200000 /* CPUID detection flag */
-
-/*
- * Intel CPU features in CR4
- */
-#define X86_CR4_VME		0x0001	/* enable vm86 extensions */
-#define X86_CR4_PVI		0x0002	/* virtual interrupts flag enable */
-#define X86_CR4_TSD		0x0004	/* disable time stamp at ipl 3 */
-#define X86_CR4_DE		0x0008	/* enable debugging extensions */
-#define X86_CR4_PSE		0x0010	/* enable page size extensions */
-#define X86_CR4_PAE		0x0020	/* enable physical address extensions */
-#define X86_CR4_MCE		0x0040	/* Machine check enable */
-#define X86_CR4_PGE		0x0080	/* enable global pages */
-#define X86_CR4_PCE		0x0100	/* enable performance counters at ipl 3 */
-#define X86_CR4_OSFXSR		0x0200	/* enable fast FPU save and restore */
-#define X86_CR4_OSXMMEXCPT	0x0400	/* enable unmasked SSE exceptions */
-
-/*
  * Save the cr4 feature set we're using (ie
  * Pentium 4MB enable and PPro Global page
  * enable), so that any CPU's that boot up
@@ -203,7 +168,7 @@ struct i387_fxsave_struct {
 	u32	mxcsr;
 	u32	mxcsr_mask;
 	u32	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
-	u32	xmm_space[64];	/* 16*16 bytes for each XMM-reg = 128 bytes */
+	u32	xmm_space[64];	/* 16*16 bytes for each XMM-reg = 256 bytes */
 	u32	padding[24];
 } __attribute__ ((aligned (16)));
 
@@ -436,22 +401,6 @@ static inline void prefetchw(void *x) 
 #define cpu_relax()   rep_nop()
 
 /*
- *      NSC/Cyrix CPU configuration register indexes
- */
-#define CX86_CCR0 0xc0
-#define CX86_CCR1 0xc1
-#define CX86_CCR2 0xc2
-#define CX86_CCR3 0xc3
-#define CX86_CCR4 0xe8
-#define CX86_CCR5 0xe9
-#define CX86_CCR6 0xea
-#define CX86_CCR7 0xeb
-#define CX86_DIR0 0xfe
-#define CX86_DIR1 0xff
-#define CX86_ARR_BASE 0xc4
-#define CX86_RCR_BASE 0xdc
-
-/*
  *      NSC/Cyrix CPU indexed register access macros
  */
 
--- 12.2.orig/arch/x86/include/mach-xen/asm/smp_64.h	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/smp_64.h	2011-01-31 17:32:29.000000000 +0100
@@ -11,12 +11,11 @@
 extern int disable_apic;
 
 #ifdef CONFIG_X86_LOCAL_APIC
-#include <asm/fixmap.h>
 #include <asm/mpspec.h>
+#include <asm/apic.h>
 #ifdef CONFIG_X86_IO_APIC
 #include <asm/io_apic.h>
 #endif
-#include <asm/apic.h>
 #include <asm/thread_info.h>
 #endif
 
@@ -41,7 +40,6 @@ extern void lock_ipi_call_lock(void);
 extern void unlock_ipi_call_lock(void);
 extern int smp_num_siblings;
 extern void smp_send_reschedule(int cpu);
-void smp_stop_cpu(void);
 
 extern cpumask_t cpu_sibling_map[NR_CPUS];
 extern cpumask_t cpu_core_map[NR_CPUS];
@@ -62,14 +60,6 @@ static inline int num_booting_cpus(void)
 
 #define raw_smp_processor_id() read_pda(cpunumber)
 
-#ifdef CONFIG_X86_LOCAL_APIC
-static inline int hard_smp_processor_id(void)
-{
-	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
-}
-#endif
-
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
 extern void prefill_possible_map(void);
@@ -78,6 +68,14 @@ extern unsigned __cpuinitdata disabled_c
 
 #define NO_PROC_ID		0xFF		/* No processor magic marker */
 
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static inline int hard_smp_processor_id(void)
+{
+	/* we don't want to mark this access volatile - bad code generation */
+	return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
+}
 #endif
 
 /*
--- 12.2.orig/arch/x86/include/mach-xen/asm/tlbflush_64.h	2011-01-31 17:29:16.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/tlbflush_64.h	2011-01-31 17:32:29.000000000 +0100
@@ -2,7 +2,9 @@
 #define _X8664_TLBFLUSH_H
 
 #include <linux/mm.h>
+#include <linux/sched.h>
 #include <asm/processor.h>
+#include <asm/system.h>
 
 #define __flush_tlb()	xen_tlb_flush()
 
--- 12.2.orig/include/linux/msi.h	2012-06-20 12:12:06.000000000 +0200
+++ 12.2/include/linux/msi.h	2012-04-04 14:03:08.000000000 +0200
@@ -4,6 +4,8 @@
 #include <linux/kobject.h>
 #include <linux/list.h>
 
+#ifndef CONFIG_XEN
+
 struct msi_msg {
 	u32	address_lo;	/* low 32 bits of msi message address */
 	u32	address_hi;	/* high 32 bits of msi message address */
@@ -49,6 +51,11 @@ struct msi_desc {
 	struct kobject kobj;
 };
 
+#else /* CONFIG_XEN */
+struct pci_dev;
+struct msi_desc;
+#endif /* CONFIG_XEN */
+
 /*
  * The arch hook for setup up msi irqs
  */
--- 12.2.orig/include/xen/net-util.h	2011-02-09 15:50:19.000000000 +0100
+++ 12.2/include/xen/net-util.h	2011-02-09 15:53:07.000000000 +0100
@@ -11,6 +11,7 @@ static inline int skb_checksum_setup(str
 				     unsigned long *fixup_counter)
 {
  	struct iphdr *iph = (void *)skb->data;
+	unsigned char *th;
 	__be16 *csum = NULL;
 	int err = -EPROTO;
 
@@ -32,31 +33,31 @@ static inline int skb_checksum_setup(str
 	if (skb->protocol != htons(ETH_P_IP))
 		goto out;
 
-	skb->nh.iph = iph;
-	skb->h.raw = skb->nh.raw + 4 * iph->ihl;
-	if (skb->h.raw >= skb->tail)
+	th = skb->data + 4 * iph->ihl;
+	if (th >= skb_tail_pointer(skb))
 		goto out;
 
+	skb->csum_start = th - skb->head;
 	switch (iph->protocol) {
 	case IPPROTO_TCP:
 		skb->csum_offset = offsetof(struct tcphdr, check);
 		if (csum)
-			csum = &skb->h.th->check;
+			csum = &((struct tcphdr *)th)->check;
 		break;
 	case IPPROTO_UDP:
 		skb->csum_offset = offsetof(struct udphdr, check);
 		if (csum)
-			csum = &skb->h.uh->check;
+			csum = &((struct udphdr *)th)->check;
 		break;
 	default:
 		if (net_ratelimit())
 			printk(KERN_ERR "Attempting to checksum a non-"
 			       "TCP/UDP packet, dropping a protocol"
-			       " %d packet\n", skb->nh.iph->protocol);
+			       " %d packet\n", iph->protocol);
 		goto out;
 	}
 
-	if ((skb->h.raw + skb->csum_offset + sizeof(*csum)) > skb->tail)
+	if ((th + skb->csum_offset + sizeof(*csum)) > skb_tail_pointer(skb))
 		goto out;
 
 	if (csum) {
--- 12.2.orig/kernel/irq/chip.c	2012-06-20 12:12:06.000000000 +0200
+++ 12.2/kernel/irq/chip.c	2012-04-10 16:15:06.000000000 +0200
@@ -89,6 +89,7 @@ int irq_set_handler_data(unsigned int ir
 }
 EXPORT_SYMBOL(irq_set_handler_data);
 
+#ifndef CONFIG_XEN
 /**
  *	irq_set_msi_desc - set MSI descriptor data for an irq
  *	@irq:	Interrupt number
@@ -109,6 +110,7 @@ int irq_set_msi_desc(unsigned int irq, s
 	irq_put_desc_unlock(desc, flags);
 	return 0;
 }
+#endif
 
 /**
  *	irq_set_chip_data - set irq chip data for an irq
--- 12.2.orig/lib/swiotlb-xen.c	2011-01-31 17:32:22.000000000 +0100
+++ 12.2/lib/swiotlb-xen.c	2011-01-31 17:32:29.000000000 +0100
@@ -751,7 +751,6 @@ swiotlb_dma_supported (struct device *hw
 	return (mask >= ((1UL << dma_bits) - 1));
 }
 
-EXPORT_SYMBOL(swiotlb_init);
 EXPORT_SYMBOL(swiotlb_map_single);
 EXPORT_SYMBOL(swiotlb_unmap_single);
 EXPORT_SYMBOL(swiotlb_map_sg);
--- 12.2.orig/mm/vmalloc.c	2012-06-20 12:12:06.000000000 +0200
+++ 12.2/mm/vmalloc.c	2012-06-20 12:13:31.000000000 +0200
@@ -1576,6 +1576,13 @@ static void *__vmalloc_area_node(struct 
 	struct page **pages;
 	unsigned int nr_pages, array_size, i;
 	gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
+#ifdef CONFIG_XEN
+	gfp_t dma_mask = gfp_mask & (__GFP_DMA | __GFP_DMA32);
+
+	BUILD_BUG_ON((__GFP_DMA | __GFP_DMA32) != (__GFP_DMA + __GFP_DMA32));
+	if (dma_mask == (__GFP_DMA | __GFP_DMA32))
+		gfp_mask &= ~(__GFP_DMA | __GFP_DMA32);
+#endif
 
 	nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
 	array_size = (nr_pages * sizeof(struct page *));
@@ -1612,6 +1619,16 @@ static void *__vmalloc_area_node(struct 
 			goto fail;
 		}
 		area->pages[i] = page;
+#ifdef CONFIG_XEN
+		if (dma_mask) {
+			if (xen_limit_pages_to_max_mfn(page, 0, 32)) {
+				area->nr_pages = i + 1;
+				goto fail;
+			}
+			if (gfp_mask & __GFP_ZERO)
+				clear_highpage(page);
+		}
+#endif
 	}
 
 	if (map_vm_area(area, prot, &pages))
@@ -1837,6 +1854,8 @@ void *vmalloc_exec(unsigned long size)
 #define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
 #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
 #define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
+#elif defined(CONFIG_XEN)
+#define GFP_VMALLOC32 GFP_DMA | GFP_DMA32 | GFP_KERNEL
 #else
 #define GFP_VMALLOC32 GFP_KERNEL
 #endif
--- 12.2.orig/scripts/Makefile.xen.awk	2007-08-06 15:10:49.000000000 +0200
+++ 12.2/scripts/Makefile.xen.awk	2011-01-31 17:32:29.000000000 +0100
@@ -13,7 +13,7 @@ BEGIN {
 	next
 }
 
-/:[[:space:]]*%\.[cS][[:space:]]/ {
+/:[[:space:]]*\$\(src\)\/%\.[cS][[:space:]]/ {
 	line = gensub(/%.([cS])/, "%-xen.\\1", "g", $0)
 	line = gensub(/(single-used-m)/, "xen-\\1", "g", line)
 	print line