Blob Blame History Raw
Automatically created from "patch-2.6.39" by xen-port-patches.py

From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.39
Patch-mainline: 2.6.39

 This patch contains the differences between 2.6.38 and 2.6.39.

Acked-by: jbeulich@novell.com

--- head.orig/arch/x86/Kconfig	2012-04-10 17:07:07.000000000 +0200
+++ head/arch/x86/Kconfig	2012-04-10 17:12:02.000000000 +0200
@@ -1195,7 +1195,7 @@ config ARCH_PHYS_ADDR_T_64BIT
 	def_bool X86_64 || X86_PAE
 
 config ARCH_DMA_ADDR_T_64BIT
-	def_bool X86_64 || HIGHMEM64G
+	def_bool X86_64 || XEN || HIGHMEM64G
 
 config DIRECT_GBPAGES
 	bool "Enable 1GB pages for kernel pagetables" if EXPERT
@@ -2052,7 +2052,7 @@ source "drivers/pci/Kconfig"
 
 # x86_64 have no ISA slots, but can have ISA-style DMA.
 config ISA_DMA_API
-	bool "ISA-style DMA support" if (X86_64 && EXPERT)
+	bool "ISA-style DMA support" if ((X86_64 || XEN) && EXPERT) || XEN_UNPRIVILEGED_GUEST
 	default y
 	help
 	  Enables ISA-style DMA support for devices requiring such controllers.
@@ -2122,7 +2122,7 @@ config SCx200HR_TIMER
 
 config OLPC
 	bool "One Laptop Per Child support"
-	depends on !X86_PAE
+	depends on !X86_PAE && !XEN
 	select GPIOLIB
 	select OF
 	select OF_PROMTREE
--- head.orig/arch/x86/ia32/ia32entry-xen.S	2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/ia32/ia32entry-xen.S	2011-04-12 16:00:27.000000000 +0200
@@ -20,6 +20,8 @@
 #define AUDIT_ARCH_I386		(EM_386|__AUDIT_ARCH_LE)
 #define __AUDIT_ARCH_LE	   0x40000000
 
+	.section .entry.text, "ax"
+
 #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
 
 	.macro IA32_ARG_FIXUP noebp=0
@@ -122,8 +124,7 @@ ENTRY(ia32_sysenter_target)
 	CFI_REL_OFFSET	rcx,0
 	movq	8(%rsp),%r11
 	CFI_RESTORE	r11
-	popq	%rcx
-	CFI_ADJUST_CFA_OFFSET -8
+	popq_cfi %rcx
 	CFI_RESTORE	rcx
  	movl	%ebp,%ebp		/* zero extension */
 	movl	%eax,%eax
@@ -293,8 +294,7 @@ ENTRY(ia32_syscall)
 	CFI_REL_OFFSET	rcx,0
 	movq 8(%rsp),%r11
 	CFI_RESTORE	r11
-	popq %rcx
-	CFI_ADJUST_CFA_OFFSET -8
+	popq_cfi %rcx
 	CFI_RESTORE	rcx
 	movl %eax,%eax
 	movq %rax,(%rsp)
@@ -732,4 +732,8 @@ ia32_sys_call_table:
 	.quad sys_fanotify_init
 	.quad sys32_fanotify_mark
 	.quad sys_prlimit64		/* 340 */
+	.quad sys_name_to_handle_at
+	.quad compat_sys_open_by_handle_at
+	.quad compat_sys_clock_adjtime
+	.quad sys_syncfs
 ia32_syscall_end:
--- head.orig/arch/x86/include/asm/acpi.h	2011-08-23 14:04:36.000000000 +0200
+++ head/arch/x86/include/asm/acpi.h	2011-08-23 14:06:28.000000000 +0200
@@ -119,7 +119,11 @@ static inline void acpi_disable_pci(void
 }
 
 /* Low-level suspend routine. */
+#ifdef CONFIG_ACPI_PV_SLEEP
+#define acpi_suspend_lowlevel() acpi_enter_sleep_state(ACPI_STATE_S3)
+#else
 extern int acpi_suspend_lowlevel(void);
+#endif
 
 extern const unsigned char acpi_wakeup_code[];
 #define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code)))
--- head.orig/arch/x86/include/asm/trampoline.h	2012-06-06 13:23:56.000000000 +0200
+++ head/arch/x86/include/asm/trampoline.h	2011-04-14 14:59:31.000000000 +0200
@@ -1,4 +1,4 @@
-#ifndef _ASM_X86_TRAMPOLINE_H
+#if !defined(_ASM_X86_TRAMPOLINE_H) && !defined(CONFIG_XEN)
 #define _ASM_X86_TRAMPOLINE_H
 
 #ifndef __ASSEMBLY__
--- head.orig/arch/x86/include/mach-xen/asm/io.h	2011-02-01 15:09:47.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/io.h	2011-04-15 11:26:41.000000000 +0200
@@ -352,6 +352,8 @@ extern void __iomem *early_ioremap(resou
 				   unsigned long size);
 extern void __iomem *early_memremap(resource_size_t phys_addr,
 				    unsigned long size);
+extern void __iomem *early_memremap_ro(resource_size_t phys_addr,
+				       unsigned long size);
 extern void early_iounmap(void __iomem *addr, unsigned long size);
 extern void fixup_early_ioremap(void);
 extern bool is_early_ioremap_ptep(pte_t *ptep);
--- head.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h	2011-03-23 10:10:03.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable-3level.h	2011-04-12 15:59:10.000000000 +0200
@@ -63,8 +63,6 @@ static inline void xen_pmd_clear(pmd_t *
 
 static inline void pud_clear(pud_t *pudp)
 {
-	pgdval_t pgd;
-
 	set_pud(pudp, __pud(0));
 
 	/*
@@ -73,13 +71,10 @@ static inline void pud_clear(pud_t *pudp
 	 * section 8.1: in PAE mode we explicitly have to flush the
 	 * TLB via cr3 if the top-level pgd is changed...
 	 *
-	 * Make sure the pud entry we're updating is within the
-	 * current pgd to avoid unnecessary TLB flushes.
+	 * Currently all places where pud_clear() is called either have
+	 * flush_tlb_mm() followed or don't need TLB flush (x86_64 code or
+	 * pud_clear_bad()), so we don't need TLB flush here.
 	 */
-	pgd = read_cr3();
-	if (__pa(pudp) >= pgd && __pa(pudp) <
-	    (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
-		xen_tlb_flush();
 }
 
 #ifdef CONFIG_SMP
--- head.orig/arch/x86/include/mach-xen/asm/pgtable_64.h	2011-03-23 10:10:05.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable_64.h	2011-04-12 15:59:10.000000000 +0200
@@ -165,7 +165,6 @@ static inline int pgd_large(pgd_t pgd) {
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
 extern int kern_addr_valid(unsigned long addr);
-extern void cleanup_highmap(void);
 
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
--- head.orig/arch/x86/include/mach-xen/asm/pgtable_types.h	2011-02-01 15:41:35.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable_types.h	2011-05-23 11:27:06.000000000 +0200
@@ -358,6 +358,8 @@ int phys_mem_access_prot_allowed(struct 
 /* Install a pte for a particular vaddr in kernel space. */
 void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 
+extern void xen_pagetable_reserve(u64 start, u64 end);
+
 struct seq_file;
 extern void arch_report_meminfo(struct seq_file *m);
 
--- head.orig/arch/x86/include/mach-xen/asm/processor.h	2011-03-03 16:47:59.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/processor.h	2011-04-12 15:59:10.000000000 +0200
@@ -103,10 +103,6 @@ struct cpuinfo_x86 {
 	int			x86_power;
 	unsigned long		loops_per_jiffy;
 #ifndef CONFIG_XEN
-#ifdef CONFIG_SMP
-	/* cpus sharing the last level cache: */
-	cpumask_var_t		llc_shared_map;
-#endif
 	/* cpuid returned max cores value: */
 	u16			 x86_max_cores;
 	u16			apicid;
--- head.orig/arch/x86/include/mach-xen/asm/smp.h	2011-03-03 16:12:54.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/smp.h	2011-04-13 17:01:31.000000000 +0200
@@ -17,12 +17,24 @@
 #endif
 #include <asm/thread_info.h>
 #include <asm/cpumask.h>
+#include <asm/cpufeature.h>
 
 extern unsigned int num_processors;
 
 #ifndef CONFIG_XEN
+static inline bool cpu_has_ht_siblings(void)
+{
+	bool has_siblings = false;
+#ifdef CONFIG_SMP
+	has_siblings = cpu_has_ht && smp_num_siblings > 1;
+#endif
+	return has_siblings;
+}
+
 DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
+/* cpus sharing the last level cache: */
+DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
 DECLARE_PER_CPU(u16, cpu_llc_id);
 DECLARE_PER_CPU(int, cpu_number);
 #endif
@@ -38,8 +50,16 @@ static inline const struct cpumask *cpu_
 }
 
 #ifndef CONFIG_XEN
+static inline struct cpumask *cpu_llc_shared_mask(int cpu)
+{
+	return per_cpu(cpu_llc_shared_map, cpu);
+}
+
 DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
 DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
+DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
+#endif
 #endif
 
 #ifdef CONFIG_SMP
--- head.orig/arch/x86/kernel/Makefile	2012-04-10 17:06:48.000000000 +0200
+++ head/arch/x86/kernel/Makefile	2012-04-10 17:11:59.000000000 +0200
@@ -117,5 +117,6 @@ ifeq ($(CONFIG_X86_64),y)
 endif
 
 disabled-obj-$(CONFIG_XEN) := crash.o early-quirks.o hpet.o i8237.o i8253.o \
-	i8259.o irqinit.o pci-swiotlb.o reboot.o smpboot.o tsc.o tsc_sync.o vsmp_64.o
+	i8259.o irqinit.o pci-swiotlb.o reboot.o smpboot.o trampoline%.o \
+	tsc.o tsc_sync.o vsmp_64.o
 disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += probe_roms_32.o
--- head.orig/arch/x86/kernel/acpi/Makefile	2011-01-31 18:07:35.000000000 +0100
+++ head/arch/x86/kernel/acpi/Makefile	2011-04-12 16:29:53.000000000 +0200
@@ -15,4 +15,4 @@ $(obj)/wakeup_rm.o:    $(obj)/realmode/w
 $(obj)/realmode/wakeup.bin: FORCE
 	$(Q)$(MAKE) $(build)=$(obj)/realmode
 
-disabled-obj-$(CONFIG_XEN)	:= cstate.o wakeup_%.o
+disabled-obj-$(CONFIG_XEN)	:= cstate.o sleep.o wakeup_%.o
--- head.orig/arch/x86/kernel/amd_nb.c	2011-04-13 13:47:56.000000000 +0200
+++ head/arch/x86/kernel/amd_nb.c	2012-02-08 12:53:26.000000000 +0100
@@ -154,6 +154,7 @@ struct resource *amd_get_mmconfig_range(
 	return res;
 }
 
+#ifndef CONFIG_XEN
 int amd_get_subcaches(int cpu)
 {
 	struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
@@ -208,6 +209,7 @@ int amd_set_subcaches(int cpu, int mask)
 
 	return 0;
 }
+#endif
 
 static int amd_cache_gart(void)
 {
--- head.orig/arch/x86/kernel/apic/io_apic-xen.c	2011-02-17 10:30:00.000000000 +0100
+++ head/arch/x86/kernel/apic/io_apic-xen.c	2011-05-09 11:46:50.000000000 +0200
@@ -30,7 +30,7 @@
 #include <linux/compiler.h>
 #include <linux/acpi.h>
 #include <linux/module.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/jiffies.h>	/* time_after() */
@@ -120,7 +120,10 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BU
 
 int skip_ioapic_setup;
 
-static void __init _arch_disable_smp_support(void)
+/**
+ * disable_ioapic_support() - disables ioapic support at runtime
+ */
+static void __init _disable_ioapic_support(void)
 {
 #ifdef CONFIG_PCI
 	noioapicquirk = 1;
@@ -132,11 +135,14 @@ static void __init _arch_disable_smp_sup
 static int __init parse_noapic(char *str)
 {
 	/* disable IO-APIC */
-	_arch_disable_smp_support();
+	_disable_ioapic_support();
 	return 0;
 }
 early_param("noapic", parse_noapic);
 
+static int io_apic_setup_irq_pin(unsigned int irq, int node,
+				 struct io_apic_irq_attr *attr);
+
 /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
 void mp_save_irq(struct mpc_intsrc *m)
 {
@@ -194,7 +200,7 @@ int __init arch_early_irq_init(void)
 	irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
 
 	for (i = 0; i < count; i++) {
-		set_irq_chip_data(i, &cfg[i]);
+		irq_set_chip_data(i, &cfg[i]);
 		zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
 		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
 		/*
@@ -213,7 +219,7 @@ int __init arch_early_irq_init(void)
 #ifdef CONFIG_SPARSE_IRQ
 static struct irq_cfg *irq_cfg(unsigned int irq)
 {
-	return get_irq_chip_data(irq);
+	return irq_get_chip_data(irq);
 }
 
 static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
@@ -239,7 +245,7 @@ static void free_irq_cfg(unsigned int at
 {
 	if (!cfg)
 		return;
-	set_irq_chip_data(at, NULL);
+	irq_set_chip_data(at, NULL);
 	free_cpumask_var(cfg->domain);
 	free_cpumask_var(cfg->old_domain);
 	kfree(cfg);
@@ -269,14 +275,14 @@ static struct irq_cfg *alloc_irq_and_cfg
 	if (res < 0) {
 		if (res != -EEXIST)
 			return NULL;
-		cfg = get_irq_chip_data(at);
+		cfg = irq_get_chip_data(at);
 		if (cfg)
 			return cfg;
 	}
 
 	cfg = alloc_irq_cfg(at, node);
 	if (cfg)
-		set_irq_chip_data(at, cfg);
+		irq_set_chip_data(at, cfg);
 	else
 		irq_free_desc(at);
 	return cfg;
@@ -868,7 +874,7 @@ static int EISA_ELCR(unsigned int irq)
 #define default_MCA_trigger(idx)	(1)
 #define default_MCA_polarity(idx)	default_ISA_polarity(idx)
 
-static int MPBIOS_polarity(int idx)
+static int irq_polarity(int idx)
 {
 	int bus = mp_irqs[idx].srcbus;
 	int polarity;
@@ -910,7 +916,7 @@ static int MPBIOS_polarity(int idx)
 	return polarity;
 }
 
-static int MPBIOS_trigger(int idx)
+static int irq_trigger(int idx)
 {
 	int bus = mp_irqs[idx].srcbus;
 	int trigger;
@@ -982,16 +988,6 @@ static int MPBIOS_trigger(int idx)
 	return trigger;
 }
 
-static inline int irq_polarity(int idx)
-{
-	return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
-	return MPBIOS_trigger(idx);
-}
-
 static int pin_2_irq(int idx, int apic, int pin)
 {
 	int irq;
@@ -1244,7 +1240,7 @@ void __setup_vector_irq(int cpu)
 	raw_spin_lock(&vector_lock);
 	/* Mark the inuse vectors */
 	for_each_active_irq(irq) {
-		cfg = get_irq_chip_data(irq);
+		cfg = irq_get_chip_data(irq);
 		if (!cfg)
 			continue;
 		/*
@@ -1275,10 +1271,6 @@ void __setup_vector_irq(int cpu)
 static struct irq_chip ioapic_chip;
 static struct irq_chip ir_ioapic_chip;
 
-#define IOAPIC_AUTO     -1
-#define IOAPIC_EDGE     0
-#define IOAPIC_LEVEL    1
-
 #ifdef CONFIG_X86_32
 static inline int IO_APIC_irq_trigger(int irq)
 {
@@ -1303,39 +1295,35 @@ static inline int IO_APIC_irq_trigger(in
 }
 #endif
 
-static void ioapic_register_intr(unsigned int irq, unsigned long trigger)
+static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
+				 unsigned long trigger)
 {
+	struct irq_chip *chip = &ioapic_chip;
+	irq_flow_handler_t hdl;
+	bool fasteoi;
 
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-	    trigger == IOAPIC_LEVEL)
+	    trigger == IOAPIC_LEVEL) {
 		irq_set_status_flags(irq, IRQ_LEVEL);
-	else
+		fasteoi = true;
+	} else {
 		irq_clear_status_flags(irq, IRQ_LEVEL);
+		fasteoi = false;
+	}
 
-	if (irq_remapped(get_irq_chip_data(irq))) {
+	if (irq_remapped(cfg)) {
 		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-		if (trigger)
-			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
-						      handle_fasteoi_irq,
-						     "fasteoi");
-		else
-			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
-						      handle_edge_irq, "edge");
-		return;
+		chip = &ir_ioapic_chip;
+		fasteoi = trigger != 0;
 	}
 
-	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-	    trigger == IOAPIC_LEVEL)
-		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					      handle_fasteoi_irq,
-					      "fasteoi");
-	else
-		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					      handle_edge_irq, "edge");
+	hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
+	irq_set_chip_and_handler_name(irq, chip, hdl,
+				      fasteoi ? "fasteoi" : "edge");
 }
 #else /* !CONFIG_XEN */
 #define __clear_irq_vector(irq, cfg) ((void)0)
-#define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq)
+#define ioapic_register_intr(irq, cfg, trigger) evtchn_register_pirq(irq)
 #endif
 
 static int setup_ioapic_entry(int apic_id, int irq,
@@ -1442,7 +1430,7 @@ static void setup_ioapic_irq(int apic_id
 		return;
 	}
 
-	ioapic_register_intr(irq, trigger);
+	ioapic_register_intr(irq, cfg, trigger);
 #ifndef CONFIG_XEN
 	if (irq < legacy_pic->nr_legacy_irqs)
 		legacy_pic->mask(irq);
@@ -1455,33 +1443,26 @@ static struct {
 	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
 } mp_ioapic_routing[MAX_IO_APICS];
 
-static void __init setup_IO_APIC_irqs(void)
+static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin)
 {
-	int apic_id, pin, idx, irq, notcon = 0;
-	int node = cpu_to_node(0);
-	struct irq_cfg *cfg;
+	if (idx != -1)
+		return false;
 
-	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+	apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
+		    mp_ioapics[apic_id].apicid, pin);
+	return true;
+}
+
+static void __init __io_apic_setup_irqs(unsigned int apic_id)
+{
+	int idx, node = cpu_to_node(0);
+	struct io_apic_irq_attr attr;
+	unsigned int pin, irq;
 
-	for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
 	for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
 		idx = find_irq_entry(apic_id, pin, mp_INT);
-		if (idx == -1) {
-			if (!notcon) {
-				notcon = 1;
-				apic_printk(APIC_VERBOSE,
-					KERN_DEBUG " %d-%d",
-					mp_ioapics[apic_id].apicid, pin);
-			} else
-				apic_printk(APIC_VERBOSE, " %d-%d",
-					mp_ioapics[apic_id].apicid, pin);
+		if (io_apic_pin_not_connected(idx, apic_id, pin))
 			continue;
-		}
-		if (notcon) {
-			apic_printk(APIC_VERBOSE,
-				" (apicid-pin) not connected\n");
-			notcon = 0;
-		}
 
 		irq = pin_2_irq(idx, apic_id, pin);
 
@@ -1497,26 +1478,25 @@ static void __init setup_IO_APIC_irqs(vo
 		 * installed and if it returns 1:
 		 */
 		if (apic->multi_timer_check &&
-				apic->multi_timer_check(apic_id, irq))
+		    apic->multi_timer_check(apic_id, irq))
 			continue;
 #endif
 
-		cfg = alloc_irq_and_cfg_at(irq, node);
-		if (!cfg)
-			continue;
+		set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
+				     irq_polarity(idx));
 
-		add_pin_to_irq_node(cfg, node, apic_id, pin);
-		/*
-		 * don't mark it in pin_programmed, so later acpi could
-		 * set it correctly when irq < 16
-		 */
-		setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx),
-				  irq_polarity(idx));
+		io_apic_setup_irq_pin(irq, node, &attr);
 	}
+}
 
-	if (notcon)
-		apic_printk(APIC_VERBOSE,
-			" (apicid-pin) not connected\n");
+static void __init setup_IO_APIC_irqs(void)
+{
+	unsigned int apic_id;
+
+	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+	for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
+		__io_apic_setup_irqs(apic_id);
 }
 
 /*
@@ -1527,7 +1507,7 @@ static void __init setup_IO_APIC_irqs(vo
 void setup_IO_APIC_irq_extra(u32 gsi)
 {
 	int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
-	struct irq_cfg *cfg;
+	struct io_apic_irq_attr attr;
 
 	/*
 	 * Convert 'gsi' to 'ioapic.pin'.
@@ -1551,21 +1531,10 @@ void setup_IO_APIC_irq_extra(u32 gsi)
 	if (apic_id == 0 || irq < NR_IRQS_LEGACY)
 		return;
 
-	cfg = alloc_irq_and_cfg_at(irq, node);
-	if (!cfg)
-		return;
-
-	add_pin_to_irq_node(cfg, node, apic_id, pin);
+	set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
+			     irq_polarity(idx));
 
-	if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
-		pr_debug("Pin %d-%d already programmed\n",
-			 mp_ioapics[apic_id].apicid, pin);
-		return;
-	}
-	set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
-
-	setup_ioapic_irq(apic_id, pin, irq, cfg,
-			irq_trigger(idx), irq_polarity(idx));
+	io_apic_setup_irq_pin_once(irq, node, &attr);
 }
 
 #ifndef CONFIG_XEN
@@ -1598,7 +1567,8 @@ static void __init setup_timer_IRQ0_pin(
 	 * The timer IRQ doesn't have to know that behind the
 	 * scene we may have a 8259A-master in AEOI mode ...
 	 */
-	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
+	irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
+				      "edge");
 
 	/*
 	 * Add it to the IO-APIC irq-routing table:
@@ -1705,7 +1675,7 @@ __apicdebuginit(void) print_IO_APIC(void
 	for_each_active_irq(irq) {
 		struct irq_pin_list *entry;
 
-		cfg = get_irq_chip_data(irq);
+		cfg = irq_get_chip_data(irq);
 		if (!cfg)
 			continue;
 		entry = cfg->irq_2_pin;
@@ -1996,7 +1966,7 @@ void disable_IO_APIC(void)
 	 *
 	 * With interrupt-remapping, for now we will use virtual wire A mode,
 	 * as virtual wire B is little complex (need to configure both
-	 * IOAPIC RTE aswell as interrupt-remapping table entry).
+	 * IOAPIC RTE as well as interrupt-remapping table entry).
 	 * As this gets called during crash dump, keep this simple for now.
 	 */
 	if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
@@ -2471,7 +2441,7 @@ static void irq_complete_move(struct irq
 
 void irq_force_complete_move(int irq)
 {
-	struct irq_cfg *cfg = get_irq_chip_data(irq);
+	struct irq_cfg *cfg = irq_get_chip_data(irq);
 
 	if (!cfg)
 		return;
@@ -2485,7 +2455,7 @@ static inline void irq_complete_move(str
 static void ack_apic_edge(struct irq_data *data)
 {
 	irq_complete_move(data->chip_data);
-	move_native_irq(data->irq);
+	irq_move_irq(data);
 	ack_APIC_irq();
 }
 
@@ -2542,7 +2512,7 @@ static void ack_apic_level(struct irq_da
 	irq_complete_move(cfg);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	/* If we are moving the irq we need to mask it */
-	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+	if (unlikely(irqd_is_setaffinity_pending(data))) {
 		do_unmask_irq = 1;
 		mask_ioapic(cfg);
 	}
@@ -2631,7 +2601,7 @@ static void ack_apic_level(struct irq_da
 		 * and you can go talk to the chipset vendor about it.
 		 */
 		if (!io_apic_level_ack_pending(cfg))
-			move_masked_irq(irq);
+			irq_move_masked_irq(data);
 		unmask_ioapic(cfg);
 	}
 }
@@ -2699,7 +2669,7 @@ static inline void init_IO_APIC_traps(vo
 		if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
 			continue;
 #endif
-		cfg = get_irq_chip_data(irq);
+		cfg = irq_get_chip_data(irq);
 		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
@@ -2710,7 +2680,7 @@ static inline void init_IO_APIC_traps(vo
 				legacy_pic->make_irq(irq);
 			else
 				/* Strange. Oh, well.. */
-				set_irq_chip(irq, &no_irq_chip);
+				irq_set_chip(irq, &no_irq_chip);
 		}
 	}
 }
@@ -2751,7 +2721,7 @@ static struct irq_chip lapic_chip __read
 static void lapic_register_intr(int irq)
 {
 	irq_clear_status_flags(irq, IRQ_LEVEL);
-	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+	irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
 				      "edge");
 }
 
@@ -2835,7 +2805,7 @@ int timer_through_8259 __initdata;
  */
 static inline void __init check_timer(void)
 {
-	struct irq_cfg *cfg = get_irq_chip_data(0);
+	struct irq_cfg *cfg = irq_get_chip_data(0);
 	int node = cpu_to_node(0);
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
@@ -3026,7 +2996,7 @@ void __init setup_IO_APIC(void)
 }
 
 /*
- *      Called after all the initialization is done. If we didnt find any
+ *      Called after all the initialization is done. If we didn't find any
  *      APIC bugs then we can allow the modify fast path
  */
 
@@ -3048,89 +3018,84 @@ static int __init io_apic_bug_finalize(v
 late_initcall(io_apic_bug_finalize);
 
 #ifndef CONFIG_XEN
-struct sysfs_ioapic_data {
-	struct sys_device dev;
-	struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+static struct IO_APIC_route_entry *ioapic_saved_data[MAX_IO_APICS];
 
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
+static void suspend_ioapic(int ioapic_id)
 {
-	struct IO_APIC_route_entry *entry;
-	struct sysfs_ioapic_data *data;
+	struct IO_APIC_route_entry *saved_data = ioapic_saved_data[ioapic_id];
 	int i;
 
-	data = container_of(dev, struct sysfs_ioapic_data, dev);
-	entry = data->entry;
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
-		*entry = ioapic_read_entry(dev->id, i);
+	if (!saved_data)
+		return;
+
+	for (i = 0; i < nr_ioapic_registers[ioapic_id]; i++)
+		saved_data[i] = ioapic_read_entry(ioapic_id, i);
+}
+
+static int ioapic_suspend(void)
+{
+	int ioapic_id;
+
+	for (ioapic_id = 0; ioapic_id < nr_ioapics; ioapic_id++)
+		suspend_ioapic(ioapic_id);
 
 	return 0;
 }
 
-static int ioapic_resume(struct sys_device *dev)
+static void resume_ioapic(int ioapic_id)
 {
-	struct IO_APIC_route_entry *entry;
-	struct sysfs_ioapic_data *data;
+	struct IO_APIC_route_entry *saved_data = ioapic_saved_data[ioapic_id];
 	unsigned long flags;
 	union IO_APIC_reg_00 reg_00;
 	int i;
 
-	data = container_of(dev, struct sysfs_ioapic_data, dev);
-	entry = data->entry;
+	if (!saved_data)
+		return;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(dev->id, 0);
-	if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
-		reg_00.bits.ID = mp_ioapics[dev->id].apicid;
-		io_apic_write(dev->id, 0, reg_00.raw);
+	reg_00.raw = io_apic_read(ioapic_id, 0);
+	if (reg_00.bits.ID != mp_ioapics[ioapic_id].apicid) {
+		reg_00.bits.ID = mp_ioapics[ioapic_id].apicid;
+		io_apic_write(ioapic_id, 0, reg_00.raw);
 	}
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
-		ioapic_write_entry(dev->id, i, entry[i]);
+	for (i = 0; i < nr_ioapic_registers[ioapic_id]; i++)
+		ioapic_write_entry(ioapic_id, i, saved_data[i]);
+}
 
-	return 0;
+static void ioapic_resume(void)
+{
+	int ioapic_id;
+
+	for (ioapic_id = nr_ioapics - 1; ioapic_id >= 0; ioapic_id--)
+		resume_ioapic(ioapic_id);
 }
 
-static struct sysdev_class ioapic_sysdev_class = {
-	.name = "ioapic",
+static struct syscore_ops ioapic_syscore_ops = {
 	.suspend = ioapic_suspend,
 	.resume = ioapic_resume,
 };
 
-static int __init ioapic_init_sysfs(void)
+static int __init ioapic_init_ops(void)
 {
-	struct sys_device * dev;
-	int i, size, error;
+	int i;
 
-	error = sysdev_class_register(&ioapic_sysdev_class);
-	if (error)
-		return error;
+	for (i = 0; i < nr_ioapics; i++) {
+		unsigned int size;
 
-	for (i = 0; i < nr_ioapics; i++ ) {
-		size = sizeof(struct sys_device) + nr_ioapic_registers[i]
+		size = nr_ioapic_registers[i]
 			* sizeof(struct IO_APIC_route_entry);
-		mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
-		if (!mp_ioapic_data[i]) {
-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-			continue;
-		}
-		dev = &mp_ioapic_data[i]->dev;
-		dev->id = i;
-		dev->cls = &ioapic_sysdev_class;
-		error = sysdev_register(dev);
-		if (error) {
-			kfree(mp_ioapic_data[i]);
-			mp_ioapic_data[i] = NULL;
-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-			continue;
-		}
+		ioapic_saved_data[i] = kzalloc(size, GFP_KERNEL);
+		if (!ioapic_saved_data[i])
+			pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
 	}
 
+	register_syscore_ops(&ioapic_syscore_ops);
+
 	return 0;
 }
 
-device_initcall(ioapic_init_sysfs);
+device_initcall(ioapic_init_ops);
 
 /*
  * Dynamic irq allocate and deallocation
@@ -3160,7 +3125,7 @@ unsigned int create_irq_nr(unsigned int 
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
 
 	if (ret) {
-		set_irq_chip_data(irq, cfg);
+		irq_set_chip_data(irq, cfg);
 		irq_clear_status_flags(irq, IRQ_NOREQUEST);
 	} else {
 		free_irq_at(irq, cfg);
@@ -3185,7 +3150,7 @@ int create_irq(void)
 
 void destroy_irq(unsigned int irq)
 {
-	struct irq_cfg *cfg = get_irq_chip_data(irq);
+	struct irq_cfg *cfg = irq_get_chip_data(irq);
 	unsigned long flags;
 
 	irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
@@ -3220,7 +3185,7 @@ static int msi_compose_msg(struct pci_de
 
 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
 
-	if (irq_remapped(get_irq_chip_data(irq))) {
+	if (irq_remapped(cfg)) {
 		struct irte irte;
 		int ir_index;
 		u16 sub_handle;
@@ -3392,6 +3357,7 @@ static int msi_alloc_irte(struct pci_dev
 
 static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
 {
+	struct irq_chip *chip = &msi_chip;
 	struct msi_msg msg;
 	int ret;
 
@@ -3399,14 +3365,15 @@ static int setup_msi_irq(struct pci_dev 
 	if (ret < 0)
 		return ret;
 
-	set_irq_msi(irq, msidesc);
+	irq_set_msi_desc(irq, msidesc);
 	write_msi_msg(irq, &msg);
 
-	if (irq_remapped(get_irq_chip_data(irq))) {
+	if (irq_remapped(irq_get_chip_data(irq))) {
 		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
-	} else
-		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+		chip = &msi_ir_chip;
+	}
+
+	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
 
 	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
 
@@ -3524,8 +3491,8 @@ int arch_setup_dmar_msi(unsigned int irq
 	if (ret < 0)
 		return ret;
 	dmar_msi_write(irq, &msg);
-	set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
-		"edge");
+	irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+				      "edge");
 	return 0;
 }
 #endif
@@ -3583,6 +3550,7 @@ static struct irq_chip hpet_msi_type = {
 
 int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
 {
+	struct irq_chip *chip = &hpet_msi_type;
 	struct msi_msg msg;
 	int ret;
 
@@ -3602,15 +3570,12 @@ int arch_setup_hpet_msi(unsigned int irq
 	if (ret < 0)
 		return ret;
 
-	hpet_msi_write(get_irq_data(irq), &msg);
+	hpet_msi_write(irq_get_handler_data(irq), &msg);
 	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-	if (irq_remapped(get_irq_chip_data(irq)))
-		set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
-					      handle_edge_irq, "edge");
-	else
-		set_irq_chip_and_handler_name(irq, &hpet_msi_type,
-					      handle_edge_irq, "edge");
+	if (irq_remapped(irq_get_chip_data(irq)))
+		chip = &ir_hpet_msi_type;
 
+	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
 	return 0;
 }
 #endif
@@ -3697,7 +3662,7 @@ int arch_setup_ht_irq(unsigned int irq, 
 
 		write_ht_irq_msg(irq, &msg);
 
-		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
+		irq_set_chip_and_handler_name(irq, &ht_irq_chip,
 					      handle_edge_irq, "edge");
 
 		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
@@ -3706,7 +3671,40 @@ int arch_setup_ht_irq(unsigned int irq, 
 }
 #endif /* CONFIG_HT_IRQ */
 
-int __init io_apic_get_redir_entries (int ioapic)
+static int
+io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
+{
+	struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
+	int ret;
+
+	if (!cfg)
+		return -EINVAL;
+	ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
+	if (!ret)
+		setup_ioapic_irq(attr->ioapic, attr->ioapic_pin, irq, cfg,
+				 attr->trigger, attr->polarity);
+	return ret;
+}
+
+int io_apic_setup_irq_pin_once(unsigned int irq, int node,
+			       struct io_apic_irq_attr *attr)
+{
+	unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
+	int ret;
+
+	/* Avoid redundant programming */
+	if (test_bit(pin, mp_ioapic_routing[id].pin_programmed)) {
+		pr_debug("Pin %d-%d already programmed\n",
+			 mp_ioapics[id].apicid, pin);
+		return 0;
+	}
+	ret = io_apic_setup_irq_pin(irq, node, attr);
+	if (!ret)
+		set_bit(pin, mp_ioapic_routing[id].pin_programmed);
+	return ret;
+}
+
+static int __init io_apic_get_redir_entries(int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
 	unsigned long flags;
@@ -3762,105 +3760,32 @@ int __init arch_probe_nr_irqs(void)
 #endif
 #endif /* CONFIG_XEN */
 
-static int __io_apic_set_pci_routing(struct device *dev, int irq,
-				struct io_apic_irq_attr *irq_attr)
+int io_apic_set_pci_routing(struct device *dev, int irq,
+			    struct io_apic_irq_attr *irq_attr)
 {
-	struct irq_cfg *cfg;
 	int node;
-	int ioapic, pin;
-	int trigger, polarity;
 
-	ioapic = irq_attr->ioapic;
 #ifdef CONFIG_XEN
 	if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs) {
 		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ %d\n",
-			    ioapic, irq);
+			    irq_attr->ioapic, irq);
 		return -EINVAL;
 	}
 #endif
 	if (!IO_APIC_IRQ(irq)) {
 		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-			ioapic);
+			    irq_attr->ioapic);
 		return -EINVAL;
 	}
 
-	if (dev)
-		node = dev_to_node(dev);
-	else
-		node = cpu_to_node(0);
-
-	cfg = alloc_irq_and_cfg_at(irq, node);
-	if (!cfg)
-		return 0;
-
-	pin = irq_attr->ioapic_pin;
-	trigger = irq_attr->trigger;
-	polarity = irq_attr->polarity;
-
-	/*
-	 * IRQs < 16 are already in the irq_2_pin[] map
-	 */
-	if (irq >= legacy_pic->nr_legacy_irqs) {
-		if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) {
-			printk(KERN_INFO "can not add pin %d for irq %d\n",
-				pin, irq);
-			return 0;
-		}
-	}
-
-	setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity);
+	node = dev ? dev_to_node(dev) : cpu_to_node(0);
 
-	return 0;
-}
-
-int io_apic_set_pci_routing(struct device *dev, int irq,
-				struct io_apic_irq_attr *irq_attr)
-{
-	int ioapic, pin;
-	/*
-	 * Avoid pin reprogramming.  PRTs typically include entries
-	 * with redundant pin->gsi mappings (but unique PCI devices);
-	 * we only program the IOAPIC on the first.
-	 */
-	ioapic = irq_attr->ioapic;
-	pin = irq_attr->ioapic_pin;
-	if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
-		pr_debug("Pin %d-%d already programmed\n",
-			 mp_ioapics[ioapic].apicid, pin);
-		return 0;
-	}
-	set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
-
-	return __io_apic_set_pci_routing(dev, irq, irq_attr);
+	return io_apic_setup_irq_pin_once(irq, node, irq_attr);
 }
 
-u8 __init io_apic_unique_id(u8 id)
-{
 #ifdef CONFIG_X86_32
 #ifndef CONFIG_XEN
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-		return io_apic_get_unique_id(nr_ioapics, id);
-	else
-#endif
-		return id;
-#else
-	int i;
-	DECLARE_BITMAP(used, 256);
-
-	bitmap_zero(used, 256);
-	for (i = 0; i < nr_ioapics; i++) {
-		struct mpc_ioapic *ia = &mp_ioapics[i];
-		__set_bit(ia->apicid, used);
-	}
-	if (!test_bit(id, used))
-		return id;
-	return find_first_zero_bit(used, 256);
-#endif
-}
-
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
-int __init io_apic_get_unique_id(int ioapic, int apic_id)
+static int __init io_apic_get_unique_id(int ioapic, int apic_id)
 {
 	union IO_APIC_reg_00 reg_00;
 	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
@@ -3935,7 +3860,34 @@ int __init io_apic_get_unique_id(int ioa
 }
 #endif
 
-int __init io_apic_get_version(int ioapic)
+static u8 __init io_apic_unique_id(u8 id)
+{
+#ifndef CONFIG_XEN
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		return io_apic_get_unique_id(nr_ioapics, id);
+	else
+#endif
+		return id;
+}
+#else
+static u8 __init io_apic_unique_id(u8 id)
+{
+	int i;
+	DECLARE_BITMAP(used, 256);
+
+	bitmap_zero(used, 256);
+	for (i = 0; i < nr_ioapics; i++) {
+		struct mpc_ioapic *ia = &mp_ioapics[i];
+		__set_bit(ia->apicid, used);
+	}
+	if (!test_bit(id, used))
+		return id;
+	return find_first_zero_bit(used, 256);
+}
+#endif
+
+static int __init io_apic_get_version(int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
 	unsigned long flags;
@@ -3981,8 +3933,8 @@ int acpi_get_override_irq(u32 gsi, int *
 void __init setup_ioapic_dest(void)
 {
 	int pin, ioapic, irq, irq_entry;
-	struct irq_desc *desc;
 	const struct cpumask *mask;
+	struct irq_data *idata;
 
 	if (skip_ioapic_setup == 1)
 		return;
@@ -3997,21 +3949,20 @@ void __init setup_ioapic_dest(void)
 		if ((ioapic > 0) && (irq > 16))
 			continue;
 
-		desc = irq_to_desc(irq);
+		idata = irq_get_irq_data(irq);
 
 		/*
 		 * Honour affinities which have been set in early boot
 		 */
-		if (desc->status &
-		    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-			mask = desc->irq_data.affinity;
+		if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
+			mask = idata->affinity;
 		else
 			mask = apic->target_cpus();
 
 		if (intr_remapping_enabled)
-			ir_ioapic_set_affinity(&desc->irq_data, mask, false);
+			ir_ioapic_set_affinity(idata, mask, false);
 		else
-			ioapic_set_affinity(&desc->irq_data, mask, false);
+			ioapic_set_affinity(idata, mask, false);
 	}
 
 }
@@ -4140,10 +4091,10 @@ int mp_find_ioapic_pin(int ioapic, u32 g
 	return gsi - mp_gsi_routing[ioapic].gsi_base;
 }
 
-static int bad_ioapic(unsigned long address)
+static __init int bad_ioapic(unsigned long address)
 {
 	if (nr_ioapics >= MAX_IO_APICS) {
-		printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded "
+		printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded "
 		       "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
 		return 1;
 	}
@@ -4203,21 +4154,17 @@ void __init mp_register_ioapic(int id, u
 /* Enable IOAPIC early just for system timer */
 void __init pre_init_apic_IRQ0(void)
 {
-	struct irq_cfg *cfg;
+	struct io_apic_irq_attr attr = { 0, 0, 0, 0 };
 
 	printk(KERN_INFO "Early APIC setup for system timer0\n");
 #ifndef CONFIG_SMP
 	physid_set_mask_of_physid(boot_cpu_physical_apicid,
 					 &phys_cpu_present_map);
 #endif
-	/* Make sure the irq descriptor is set up */
-	cfg = alloc_irq_and_cfg_at(0, 0);
-
 	setup_local_APIC();
 
-	add_pin_to_irq_node(cfg, 0, 0, 0);
-	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
-
-	setup_ioapic_irq(0, 0, 0, cfg, 0, 0);
+	io_apic_setup_irq_pin(0, 0, &attr);
+	irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
+				      "edge");
 }
 #endif
--- head.orig/arch/x86/kernel/cpu/amd.c	2012-05-08 10:52:17.000000000 +0200
+++ head/arch/x86/kernel/cpu/amd.c	2012-05-08 10:52:53.000000000 +0200
@@ -671,6 +671,7 @@ static void __cpuinit init_amd(struct cp
 	if (c->x86 > 0x11)
 		set_cpu_cap(c, X86_FEATURE_ARAT);
 
+#ifndef CONFIG_XEN
 	/*
 	 * Disable GART TLB Walk Errors on Fam10h. We do this here
 	 * because this is always needed when GART is enabled, even in a
@@ -694,6 +695,7 @@ static void __cpuinit init_amd(struct cp
 	}
 
 	rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
+#endif
 }
 
 #ifdef CONFIG_X86_32
--- head.orig/arch/x86/kernel/cpu/common-xen.c	2011-05-18 10:47:16.000000000 +0200
+++ head/arch/x86/kernel/cpu/common-xen.c	2011-05-18 10:47:21.000000000 +0200
@@ -910,7 +910,7 @@ static void __cpuinit identify_cpu(struc
 
 	select_idle_routine(c);
 
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
 	numa_add_cpu(smp_processor_id());
 #endif
 }
--- head.orig/arch/x86/kernel/e820-xen.c	2011-04-26 09:19:42.000000000 +0200
+++ head/arch/x86/kernel/e820-xen.c	2011-09-23 16:02:14.000000000 +0200
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/crash_dump.h>
 #include <linux/bootmem.h>
 #include <linux/pfn.h>
 #include <linux/suspend.h>
@@ -722,21 +723,15 @@ __init void e820_setup_gap(void)
  * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
  * linked list of struct setup_data, which is parsed here.
  */
-void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
+void __init parse_e820_ext(struct setup_data *sdata)
 {
-	u32 map_len;
 	int entries;
 	struct e820entry *extmap;
 
 	entries = sdata->len / sizeof(struct e820entry);
-	map_len = sdata->len + sizeof(struct setup_data);
-	if (map_len > PAGE_SIZE)
-		sdata = early_ioremap(pa_data, map_len);
 	extmap = (struct e820entry *)(sdata->data);
 	__append_e820_map(extmap, entries);
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-	if (map_len > PAGE_SIZE)
-		early_iounmap(sdata, map_len);
 	printk(KERN_INFO "extended physical RAM map:\n");
 	_e820_print_map(&e820, "extended");
 }
@@ -946,15 +941,23 @@ static int __init parse_memopt(char *p)
 	if (!p)
 		return -EINVAL;
 
-#ifdef CONFIG_X86_32
+#ifndef CONFIG_XEN
 	if (!strcmp(p, "nopentium")) {
+#ifdef CONFIG_X86_32
 		setup_clear_cpu_cap(X86_FEATURE_PSE);
 		return 0;
+#else
+		printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n");
+		return -EINVAL;
+#endif
 	}
 #endif
 
 	userdef = 1;
 	mem_size = memparse(p, &p);
+	/* don't remove all of memory when handling "mem={invalid}" param */
+	if (mem_size == 0)
+		return -EINVAL;
 	e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
 
 	i = e820.nr_map - 1;
--- head.orig/arch/x86/kernel/entry_32-xen.S	2012-02-29 14:18:49.000000000 +0100
+++ head/arch/x86/kernel/entry_32-xen.S	2012-02-29 14:19:12.000000000 +0100
@@ -66,6 +66,8 @@
 #define sysexit_audit	syscall_exit_work
 #endif
 
+	.section .entry.text, "ax"
+
 /*
  * We use macros for low-level operations which need to be overridden
  * for paravirtualization.  The following will never clobber any registers:
@@ -399,7 +401,7 @@ sysenter_past_esp:
 	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
 	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
 	 */
-	pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
+	pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
 	CFI_REL_OFFSET eip, 0
 
 	pushl_cfi %eax
@@ -858,7 +860,7 @@ ENDPROC(ptregs_clone)
  */
 .section .init.rodata,"a"
 ENTRY(interrupt)
-.text
+.section .entry.text, "ax"
 	.p2align 5
 	.p2align CONFIG_X86_L1_CACHE_SHIFT
 ENTRY(irq_entries_start)
@@ -877,7 +879,7 @@ vector=FIRST_EXTERNAL_VECTOR
       .endif
       .previous
 	.long 1b
-      .text
+      .section .entry.text, "ax"
 vector=vector+1
     .endif
   .endr
@@ -1685,11 +1687,10 @@ END(general_protection)
 #ifdef CONFIG_KVM_GUEST
 ENTRY(async_page_fault)
 	RING0_EC_FRAME
-	pushl $do_async_page_fault
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $do_async_page_fault
 	jmp error_code
 	CFI_ENDPROC
-END(apf_page_fault)
+END(async_page_fault)
 #endif
 
 /*
--- head.orig/arch/x86/kernel/entry_64-xen.S	2011-10-07 11:41:33.000000000 +0200
+++ head/arch/x86/kernel/entry_64-xen.S	2011-10-07 11:41:39.000000000 +0200
@@ -21,7 +21,7 @@
  * A note on terminology:
  * - top of stack: Architecture defined interrupt frame from SS to RIP
  * at the top of the kernel process stack.
- * - partial stack frame: partially saved registers upto R11.
+ * - partial stack frame: partially saved registers up to R11.
  * - full stack frame: Like partial stack frame, but all register saved.
  *
  * Some macro usage:
@@ -66,6 +66,8 @@
 #define __AUDIT_ARCH_LE	   0x40000000
 
 	.code64
+	.section .entry.text, "ax"
+
 #ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_DYNAMIC_FTRACE
 ENTRY(mcount)
@@ -457,7 +459,7 @@ ENTRY(ret_from_fork)
 END(ret_from_fork)
 
 /*
- * System call entry. Upto 6 arguments in registers are supported.
+ * System call entry. Up to 6 arguments in registers are supported.
  *
  * SYSCALL does not save anything on the stack and does not change the
  * stack pointer.
@@ -858,9 +860,12 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
 	x86_platform_ipi smp_x86_platform_ipi
 
 #ifdef CONFIG_SMP
-.irpc idx, "01234567"
+.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+	16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+.if NUM_INVALIDATE_TLB_VECTORS > \idx
 apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
 	invalidate_interrupt\idx smp_invalidate_interrupt
+.endif
 .endr
 #endif
 
--- head.orig/arch/x86/kernel/head32-xen.c	2011-05-09 11:42:46.000000000 +0200
+++ head/arch/x86/kernel/head32-xen.c	2011-05-09 11:43:03.000000000 +0200
@@ -51,15 +51,6 @@ void __init i386_start_kernel(void)
 
 	memblock_init();
 
-#ifdef CONFIG_X86_TRAMPOLINE
-	/*
-	 * But first pinch a few for the stack/trampoline stuff
-	 * FIXME: Don't need the extra page at 4K, but need to fix
-	 * trampoline before removing it. (see the GDT stuff)
-	 */
-	memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
-#endif
-
 	memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
 
 #ifndef CONFIG_XEN
--- head.orig/arch/x86/kernel/head64-xen.c	2011-02-01 15:09:47.000000000 +0100
+++ head/arch/x86/kernel/head64-xen.c	2011-04-12 15:59:10.000000000 +0200
@@ -95,9 +95,6 @@ void __init x86_64_start_kernel(char * r
 	/* Make NULL pointers segfault */
 	zap_identity_mappings();
 
-	/* Cleanup the over mapped high alias */
-	cleanup_highmap();
-
 	for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
 #ifdef CONFIG_EARLY_PRINTK
 		set_intr_gate(i, &early_idt_handlers[i]);
--- head.orig/arch/x86/kernel/head_32-xen.S	2011-08-09 11:07:43.000000000 +0200
+++ head/arch/x86/kernel/head_32-xen.S	2011-08-09 11:10:21.000000000 +0200
@@ -92,7 +92,7 @@ ENTRY(startup_32)
 	movl $__HYPERVISOR_update_va_mapping, %eax
 	int $0x82
 
-	movl $(PAGE_SIZE_asm / 8), %ecx
+	movl $(PAGE_SIZE / 8), %ecx
 	movl %esp, %ebx
 	movl $__HYPERVISOR_set_gdt, %eax
 	int $0x82
@@ -121,7 +121,7 @@ ENTRY(hypercall_page)
  * BSS section
  */
 __PAGE_ALIGNED_BSS
-	.align PAGE_SIZE_asm
+	.align PAGE_SIZE
 ENTRY(swapper_pg_fixmap)
 	.fill 1024,4,0
 ENTRY(empty_zero_page)
--- head.orig/arch/x86/kernel/ioport-xen.c	2011-02-01 14:55:46.000000000 +0100
+++ head/arch/x86/kernel/ioport-xen.c	2011-04-12 16:53:32.000000000 +0200
@@ -14,23 +14,10 @@
 #include <linux/slab.h>
 #include <linux/thread_info.h>
 #include <linux/syscalls.h>
+#include <linux/bitmap.h>
 #include <asm/syscalls.h>
 #include <xen/interface/physdev.h>
 
-/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-static void set_bitmap(unsigned long *bitmap, unsigned int base,
-		       unsigned int extent, int new_value)
-{
-	unsigned int i;
-
-	for (i = base; i < base + extent; i++) {
-		if (new_value)
-			__set_bit(i, bitmap);
-		else
-			__clear_bit(i, bitmap);
-	}
-}
-
 /*
  * this changes the io permissions bitmap in the current task.
  */
@@ -65,7 +52,10 @@ asmlinkage long sys_ioperm(unsigned long
 					      &set_iobitmap));
 	}
 
-	set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
+	if (turn_on)
+		bitmap_clear(t->io_bitmap_ptr, from, num);
+	else
+		bitmap_set(t->io_bitmap_ptr, from, num);
 
 	return 0;
 }
--- head.orig/arch/x86/kernel/irq-xen.c	2011-02-18 15:17:23.000000000 +0100
+++ head/arch/x86/kernel/irq-xen.c	2011-04-13 17:01:32.000000000 +0200
@@ -8,6 +8,7 @@
 #include <linux/seq_file.h>
 #include <linux/smp.h>
 #include <linux/ftrace.h>
+#include <linux/delay.h>
 
 #include <asm/apic.h>
 #include <asm/io_apic.h>
@@ -48,9 +49,9 @@ void ack_bad_irq(unsigned int irq)
 
 #define irq_stats(x)		(&per_cpu(irq_stat, x))
 /*
- * /proc/interrupts printing:
+ * /proc/interrupts printing for arch specific interrupts
  */
-static int show_other_interrupts(struct seq_file *p, int prec)
+int arch_show_interrupts(struct seq_file *p, int prec)
 {
 	int j;
 
@@ -135,59 +136,6 @@ static int show_other_interrupts(struct 
 	return 0;
 }
 
-int show_interrupts(struct seq_file *p, void *v)
-{
-	unsigned long flags, any_count = 0;
-	int i = *(loff_t *) v, j, prec;
-	struct irqaction *action;
-	struct irq_desc *desc;
-
-	if (i > nr_irqs)
-		return 0;
-
-	for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
-		j *= 10;
-
-	if (i == nr_irqs)
-		return show_other_interrupts(p, prec);
-
-	/* print header */
-	if (i == 0) {
-		seq_printf(p, "%*s", prec + 8, "");
-		for_each_online_cpu(j)
-			seq_printf(p, "CPU%-8d", j);
-		seq_putc(p, '\n');
-	}
-
-	desc = irq_to_desc(i);
-	if (!desc)
-		return 0;
-
-	raw_spin_lock_irqsave(&desc->lock, flags);
-	for_each_online_cpu(j)
-		any_count |= kstat_irqs_cpu(i, j);
-	action = desc->action;
-	if (!action && !any_count)
-		goto out;
-
-	seq_printf(p, "%*d: ", prec, i);
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-	seq_printf(p, " %8s", desc->irq_data.chip->name);
-	seq_printf(p, "-%-8s", desc->name);
-
-	if (action) {
-		seq_printf(p, "  %s", action->name);
-		while ((action = action->next) != NULL)
-			seq_printf(p, ", %s", action->name);
-	}
-
-	seq_putc(p, '\n');
-out:
-	raw_spin_unlock_irqrestore(&desc->lock, flags);
-	return 0;
-}
-
 /*
  * /proc/stat helpers
  */
@@ -295,15 +243,6 @@ void smp_x86_platform_ipi(struct pt_regs
 }
 #endif
 
-#ifdef CONFIG_OF
-unsigned int irq_create_of_mapping(struct device_node *controller,
-		const u32 *intspec, unsigned int intsize)
-{
-	return intspec[0];
-}
-EXPORT_SYMBOL_GPL(irq_create_of_mapping);
-#endif
-
 #ifdef CONFIG_HOTPLUG_CPU
 #include <xen/evtchn.h>
 /* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
@@ -313,6 +252,7 @@ void fixup_irqs(void)
 	static int warned;
 	struct irq_desc *desc;
 	struct irq_data *data;
+	struct irq_chip *chip;
 	static DECLARE_BITMAP(irqs_used, NR_IRQS);
 
 	for_each_irq_desc(irq, desc) {
@@ -328,7 +268,7 @@ void fixup_irqs(void)
 		/* interrupt's are disabled at this point */
 		raw_spin_lock(&desc->lock);
 
-		data = &desc->irq_data;
+		data = irq_desc_get_irq_data(desc);
 		affinity = data->affinity;
 		if (!irq_has_action(irq) ||
 		    cpumask_subset(affinity, cpu_online_mask)) {
@@ -344,16 +284,17 @@ void fixup_irqs(void)
 			affinity = cpu_all_mask;
 		}
 
-		if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask)
-			data->chip->irq_mask(data);
+		chip = irq_data_get_irq_chip(data);
+		if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
+			chip->irq_mask(data);
 
-		if (data->chip->irq_set_affinity)
-			data->chip->irq_set_affinity(data, affinity, true);
+		if (chip->irq_set_affinity)
+			chip->irq_set_affinity(data, affinity, true);
 		else if (data->chip != &no_irq_chip && !(warned++))
 			set_affinity = 0;
 
-		if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask)
-			data->chip->irq_unmask(data);
+		if (!irqd_can_move_in_process_context(data) && chip->irq_unmask)
+			chip->irq_unmask(data);
 
 		raw_spin_unlock(&desc->lock);
 
@@ -380,10 +321,11 @@ void fixup_irqs(void)
 
 		if (xen_test_irq_pending(irq)) {
 			desc = irq_to_desc(irq);
-			data = &desc->irq_data;
+			data = irq_desc_get_irq_data(desc);
+			chip = irq_data_get_irq_chip(data);
 			raw_spin_lock(&desc->lock);
-			if (data->chip->irq_retrigger)
-				data->chip->irq_retrigger(data);
+			if (chip->irq_retrigger)
+				chip->irq_retrigger(data);
 			raw_spin_unlock(&desc->lock);
 		}
 	}
--- head.orig/arch/x86/kernel/mpparse-xen.c	2011-02-01 16:09:24.000000000 +0100
+++ head/arch/x86/kernel/mpparse-xen.c	2011-04-12 15:59:10.000000000 +0200
@@ -752,10 +752,6 @@ static void __init check_irq_src(struct 
 		*nr_m_spare += 1;
 	}
 }
-#else /* CONFIG_X86_IO_APIC */
-static
-inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
-#endif /* CONFIG_X86_IO_APIC */
 
 static int
 check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
@@ -769,6 +765,10 @@ check_slot(unsigned long mpc_new_phys, u
 
 	return ret;
 }
+#else /* CONFIG_X86_IO_APIC */
+static
+inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
+#endif /* CONFIG_X86_IO_APIC */
 
 static int  __init replace_intsrc_all(struct mpc_table *mpc,
 					unsigned long mpc_new_phys,
@@ -921,7 +921,7 @@ static int __init update_mp_table(void)
 
 	if (!mpc_new_phys) {
 		unsigned char old, new;
-		/* check if we can change the postion */
+		/* check if we can change the position */
 		mpc->checksum = 0;
 		old = mpf_checksum((unsigned char *)mpc, mpc->length);
 		mpc->checksum = 0xff;
@@ -930,7 +930,7 @@ static int __init update_mp_table(void)
 			printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n");
 			return 0;
 		}
-		printk(KERN_INFO "use in-positon replacing\n");
+		printk(KERN_INFO "use in-position replacing\n");
 	} else {
 		maddr_t mpc_new_bus;
 
--- head.orig/arch/x86/kernel/process-xen.c	2011-03-03 16:13:18.000000000 +0100
+++ head/arch/x86/kernel/process-xen.c	2011-04-13 17:01:32.000000000 +0200
@@ -89,7 +89,7 @@ void exit_thread(void)
 void show_regs(struct pt_regs *regs)
 {
 	show_registers(regs);
-	show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
+	show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 0);
 }
 
 void show_regs_common(void)
@@ -112,12 +112,9 @@ void show_regs_common(void)
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
-	printk(KERN_CONT " ");
-	printk(KERN_CONT "%s %s", vendor, product);
-	if (board) {
-		printk(KERN_CONT "/");
-		printk(KERN_CONT "%s", board);
-	}
+	printk(KERN_CONT " %s %s", vendor, product);
+	if (board)
+		printk(KERN_CONT "/%s", board);
 	printk(KERN_CONT "\n");
 }
 
--- head.orig/arch/x86/kernel/process_64-xen.c	2011-02-02 08:48:24.000000000 +0100
+++ head/arch/x86/kernel/process_64-xen.c	2011-04-12 15:59:10.000000000 +0200
@@ -569,6 +569,10 @@ void set_personality_64bit(void)
 	/* Make sure to be in 64bit mode */
 	clear_thread_flag(TIF_IA32);
 
+	/* Ensure the corresponding mm is not marked. */
+	if (current->mm)
+		current->mm->context.ia32_compat = 0;
+
 	/* TBD: overwrites user setup. Should have two bits.
 	   But 64bit processes have always behaved this way,
 	   so it's not too bad. The main problem is just that
@@ -584,6 +588,10 @@ void set_personality_ia32(void)
 	set_thread_flag(TIF_IA32);
 	current->personality |= force_personality32;
 
+	/* Mark the associated mm as containing 32-bit tasks. */
+	if (current->mm)
+		current->mm->context.ia32_compat = 1;
+
 	/* Prepare the first "return" to user space */
 	current_thread_info()->status |= TS_COMPAT;
 }
--- head.orig/arch/x86/kernel/setup-xen.c	2012-06-08 10:35:55.000000000 +0200
+++ head/arch/x86/kernel/setup-xen.c	2012-06-08 10:36:10.000000000 +0200
@@ -113,6 +113,7 @@
 #endif
 #include <asm/mce.h>
 #include <asm/alternative.h>
+#include <asm/prom.h>
 
 #ifdef CONFIG_XEN
 #include <asm/hypervisor.h>
@@ -332,6 +333,9 @@ static void __init init_gbpages(void)
 static inline void init_gbpages(void)
 {
 }
+static void __init cleanup_highmap(void)
+{
+}
 #endif
 
 static void __init reserve_brk(void)
@@ -486,16 +490,30 @@ static void __init parse_setup_data(void
 		return;
 	pa_data = boot_params.hdr.setup_data;
 	while (pa_data) {
-		data = early_memremap(pa_data, PAGE_SIZE);
+		u32 data_len, map_len;
+
+		map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK),
+			      (u64)sizeof(struct setup_data));
+		data = early_memremap(pa_data, map_len);
+		data_len = data->len + sizeof(struct setup_data);
+		if (data_len > map_len) {
+			early_iounmap(data, map_len);
+			data = early_memremap(pa_data, data_len);
+			map_len = data_len;
+		}
+
 		switch (data->type) {
 		case SETUP_E820_EXT:
-			parse_e820_ext(data, pa_data);
+			parse_e820_ext(data);
+			break;
+		case SETUP_DTB:
+			add_dtb(pa_data);
 			break;
 		default:
 			break;
 		}
 		pa_data = data->next;
-		early_iounmap(data, PAGE_SIZE);
+		early_iounmap(data, map_len);
 	}
 #endif
 }
@@ -669,28 +687,6 @@ void __init reserve_standard_io_resource
 
 }
 
-/*
- * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
- * is_kdump_kernel() to determine if we are booting after a panic. Hence
- * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
- */
-
-#ifdef CONFIG_CRASH_DUMP
-/* elfcorehdr= specifies the location of elf core header
- * stored by the crashed kernel. This option will be passed
- * by kexec loader to the capture kernel.
- */
-static int __init setup_elfcorehdr(char *arg)
-{
-	char *end;
-	if (!arg)
-		return -EINVAL;
-	elfcorehdr_addr = memparse(arg, &end);
-	return end > arg ? 0 : -EINVAL;
-}
-early_param("elfcorehdr", setup_elfcorehdr);
-#endif
-
 static __init void reserve_ibft_region(void)
 {
 	unsigned long addr, size = 0;
@@ -752,15 +748,6 @@ static int __init parse_reservelow(char 
 early_param("reservelow", parse_reservelow);
 #endif
 
-static u64 __init get_max_mapped(void)
-{
-	u64 end = max_pfn_mapped;
-
-	end <<= PAGE_SHIFT;
-
-	return end;
-}
-
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -776,11 +763,8 @@ static u64 __init get_max_mapped(void)
 
 void __init setup_arch(char **cmdline_p)
 {
-	int acpi = 0;
-	int amd = 0;
 	unsigned long flags;
 #ifdef CONFIG_XEN
-	unsigned int i;
 	unsigned long p2m_pages;
 	struct physdev_set_iopl set_iopl;
 
@@ -1047,6 +1031,8 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	reserve_brk();
 
+	cleanup_highmap();
+
 	memblock.current_limit = get_max_mapped();
 	memblock_x86_fill();
 
@@ -1060,15 +1046,10 @@ void __init setup_arch(char **cmdline_p)
 	printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
 			max_pfn_mapped<<PAGE_SHIFT);
 
-	reserve_trampoline_memory();
-
-#ifdef CONFIG_ACPI_SLEEP
-	/*
-	 * Reserve low memory region for sleep support.
-	 * even before init_memory_mapping
-	 */
-	acpi_reserve_wakeup_memory();
+#ifndef CONFIG_XEN
+	setup_trampolines();
 #endif
+
 	init_gbpages();
 
 	/* max_pfn_mapped is updated here */
@@ -1118,19 +1099,7 @@ void __init setup_arch(char **cmdline_p)
 
 	early_acpi_boot_init();
 
-#ifdef CONFIG_ACPI_NUMA
-	/*
-	 * Parse SRAT to discover nodes.
-	 */
-	acpi = acpi_numa_init();
-#endif
-
-#ifdef CONFIG_AMD_NUMA
-	if (!acpi)
-		amd = !amd_numa_init(0, max_pfn);
-#endif
-
-	initmem_init(0, max_pfn, acpi, amd);
+	initmem_init();
 	memblock_find_dma_reserve();
 	dma32_reserve_bootmem();
 
@@ -1142,6 +1111,11 @@ void __init setup_arch(char **cmdline_p)
 	paging_init();
 	x86_init.paging.pagetable_setup_done(swapper_pg_dir);
 
+	if (boot_cpu_data.cpuid_level >= 0) {
+		/* A CPU has %cr4 if and only if it has CPUID */
+		mmu_cr4_features = read_cr4();
+	}
+
 #if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
 	/* sync back kernel address range */
 	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
@@ -1224,10 +1198,14 @@ void __init setup_arch(char **cmdline_p)
 			virt_to_mfn(pfn_to_mfn_frame_list_list);
 	}
 
+#ifdef CONFIG_ISA_DMA_API
+# define ch p2m_pages
 	/* Mark all ISA DMA channels in-use - using them wouldn't work. */
-	for (i = 0; i < MAX_DMA_CHANNELS; ++i)
-		if (i != 4 && request_dma(i, "xen") != 0)
+	for (ch = 0; ch < MAX_DMA_CHANNELS; ++ch)
+		if (ch != 4 && request_dma(ch, "xen") != 0)
 			BUG();
+# undef ch
+#endif
 #else /* CONFIG_XEN */
 	generic_apic_probe();
 
@@ -1238,8 +1216,8 @@ void __init setup_arch(char **cmdline_p)
 	 * Read APIC and some other early information from ACPI tables.
 	 */
 	acpi_boot_init();
-
 	sfi_init();
+	x86_dtb_init();
 
 	/*
 	 * get boot-time SMP configuration:
@@ -1249,9 +1227,7 @@ void __init setup_arch(char **cmdline_p)
 
 	prefill_possible_map();
 
-#ifdef CONFIG_X86_64
 	init_cpu_to_node();
-#endif
 
 #ifndef CONFIG_XEN
 	init_apic_mappings();
@@ -1289,6 +1265,8 @@ void __init setup_arch(char **cmdline_p)
 #endif
 	x86_init.oem.banner();
 
+	x86_init.timers.wallclock_init();
+
 	mcheck_init();
 
 	local_irq_save(flags);
--- head.orig/arch/x86/kernel/setup_percpu.c	2012-05-23 13:34:18.000000000 +0200
+++ head/arch/x86/kernel/setup_percpu.c	2012-05-23 13:38:58.000000000 +0200
@@ -231,7 +231,8 @@ void __init setup_per_cpu_areas(void)
 		 * are zeroed indicating that the static arrays are
 		 * gone.
 		 */
-#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
+#ifndef CONFIG_XEN
+#ifdef CONFIG_X86_LOCAL_APIC
 		per_cpu(x86_cpu_to_apicid, cpu) =
 			early_per_cpu_map(x86_cpu_to_apicid, cpu);
 		per_cpu(x86_bios_cpu_apicid, cpu) =
@@ -241,6 +242,7 @@ void __init setup_per_cpu_areas(void)
 		per_cpu(x86_cpu_to_logical_apicid, cpu) =
 			early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
 #endif
+#endif
 #ifdef CONFIG_X86_64
 		per_cpu(irq_stack_ptr, cpu) =
 			per_cpu(irq_stack_union.irq_stack, cpu) +
@@ -268,13 +270,15 @@ void __init setup_per_cpu_areas(void)
 	}
 
 	/* indicate the early static arrays will soon be gone */
-#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
+#ifndef CONFIG_XEN
+#ifdef CONFIG_X86_LOCAL_APIC
 	early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
 	early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
 #endif
 #ifdef CONFIG_X86_32
 	early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
 #endif
+#endif
 #ifdef CONFIG_NUMA
 	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
--- head.orig/arch/x86/kernel/time-xen.c	2012-02-10 13:29:07.000000000 +0100
+++ head/arch/x86/kernel/time-xen.c	2012-02-10 13:29:28.000000000 +0100
@@ -19,6 +19,9 @@
 #include <linux/cpufreq.h>
 #include <linux/clocksource.h>
 
+extern seqlock_t xtime_lock;
+extern void do_timer(unsigned long ticks);
+
 #include <asm/vsyscall.h>
 #include <asm/delay.h>
 #include <asm/time.h>
@@ -269,19 +272,14 @@ static void sync_xen_wallclock(unsigned 
 static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0);
 static void sync_xen_wallclock(unsigned long dummy)
 {
-	struct timespec now;
-	unsigned long seq;
+	struct timespec now, ignore;
 	struct xen_platform_op op;
 
 	BUG_ON(!is_initial_xendomain());
 	if (!ntp_synced() || independent_wallclock)
 		return;
 
-	do {
-		seq = read_seqbegin(&xtime_lock);
-		now = __current_kernel_time();
-	} while (read_seqretry(&xtime_lock, seq));
-
+	get_xtime_and_monotonic_and_sleep_offset(&now, &ignore, &ignore);
 	set_normalized_timespec(&now, now.tv_sec, now.tv_nsec);
 
 	op.cmd = XENPF_settime;
--- head.orig/arch/x86/kernel/x86_init-xen.c	2011-07-11 13:00:38.000000000 +0200
+++ head/arch/x86/kernel/x86_init-xen.c	2011-07-11 13:01:21.000000000 +0200
@@ -59,6 +59,10 @@ struct x86_init_ops x86_init __initdata 
 		.banner			= x86_init_noop,
 	},
 
+	.mapping = {
+		.pagetable_reserve		= xen_pagetable_reserve,
+	},
+
 	.paging = {
 		.pagetable_setup_start	= x86_init_pgd_noop,
 		.pagetable_setup_done	= x86_init_pgd_noop,
@@ -68,6 +72,7 @@ struct x86_init_ops x86_init __initdata 
 		.setup_percpu_clockev	= NULL,
 		.tsc_pre_init		= x86_init_noop,
 		.timer_init		= x86_init_noop,
+		.wallclock_init		= x86_init_noop,
 	},
 
 	.iommu = {
--- head.orig/arch/x86/mm/init-xen.c	2011-02-01 15:41:35.000000000 +0100
+++ head/arch/x86/mm/init-xen.c	2011-05-23 11:32:36.000000000 +0200
@@ -19,9 +19,9 @@
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
-unsigned long __meminitdata e820_table_start;
-unsigned long __meminitdata e820_table_end;
-unsigned long __meminitdata e820_table_top;
+unsigned long __meminitdata pgt_buf_start;
+unsigned long __meminitdata pgt_buf_end;
+unsigned long __meminitdata pgt_buf_top;
 
 int after_bootmem;
 
@@ -72,21 +72,14 @@ static void __init find_early_table_spac
 #ifdef CONFIG_X86_32
 	/* for fixmap */
 	tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
-#endif
 
-	/*
-	 * RED-PEN putting page tables only on node 0 could
-	 * cause a hotspot and fill up ZONE_DMA. The page tables
-	 * need roughly 0.5KB per GB.
-	 */
-#ifdef CONFIG_X86_32
-	e820_table_start = extend_init_mapping(tables);
-	e820_table_end = e820_table_start;
+	pgt_buf_start = extend_init_mapping(tables);
+	pgt_buf_end = pgt_buf_start;
 #else /* CONFIG_X86_64 */
-	if (!e820_table_top) {
-		e820_table_start = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
+	if (!pgt_buf_top) {
+		pgt_buf_start = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
 			xen_start_info->nr_pt_frames;
-		e820_table_end = e820_table_start;
+		pgt_buf_end = pgt_buf_start;
 	} else {
 		/*
 		 * [table_start, table_top) gets passed to reserve_early(),
@@ -94,16 +87,21 @@ static void __init find_early_table_spac
 		 * to allocate from there. table_end possibly being below
 		 * table_start is otoh not a problem.
 		 */
-		e820_table_start = e820_table_top;
+		pgt_buf_start = pgt_buf_top;
 	}
 #endif
-	if (e820_table_start == -1UL)
+	if (pgt_buf_start == -1UL)
 		panic("Cannot find space for the kernel page tables");
 
-	e820_table_top = e820_table_start + (tables >> PAGE_SHIFT);
+	pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
 
 	printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
-		end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT);
+		end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
+}
+
+void __init xen_pagetable_reserve(u64 start, u64 end)
+{
+	memblock_x86_reserve_range(start, end, "PGTABLE");
 }
 
 struct map_range {
@@ -303,7 +301,7 @@ unsigned long __init_refok init_memory_m
 		}
 
 		/* Blow away any spurious initial mappings. */
-		va = __START_KERNEL_map + (e820_table_start << PAGE_SHIFT);
+		va = __START_KERNEL_map + (pgt_buf_start << PAGE_SHIFT);
 
 		addr = page[pgd_index(va)];
 		page = addr_to_page(addr);
@@ -329,20 +327,35 @@ unsigned long __init_refok init_memory_m
 #endif
 
 #ifdef CONFIG_X86_64
-	BUG_ON(e820_table_end > e820_table_top);
+	BUG_ON(pgt_buf_end > pgt_buf_top);
 	if (!start)
 		xen_finish_init_mapping();
 	else
 #endif
-	if (e820_table_end < e820_table_top)
+	if (pgt_buf_end < pgt_buf_top)
 		/* Disable the 'table_end' allocator. */
-		e820_table_top = e820_table_end;
+		pgt_buf_top = pgt_buf_end;
 
 	__flush_tlb_all();
 
-	if (!after_bootmem && e820_table_top > e820_table_start)
-		memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT,
-			      e820_table_top << PAGE_SHIFT, "PGTABLE");
+	/*
+	 * Reserve the kernel pagetable pages we used (pgt_buf_start -
+	 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
+	 * so that they can be reused for other purposes.
+	 *
+	 * On native it just means calling memblock_x86_reserve_range, on Xen it
+	 * also means marking RW the pagetable pages that we allocated before
+	 * but that haven't been used.
+	 *
+	 * In fact on xen we mark RO the whole range pgt_buf_start -
+	 * pgt_buf_top, because we have to make sure that when
+	 * init_memory_mapping reaches the pagetable pages area, it maps
+	 * RO all the pagetable pages, including the ones that are beyond
+	 * pgt_buf_end at that time.
+	 */
+	if (!after_bootmem && pgt_buf_top > pgt_buf_start)
+		x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
+				PFN_PHYS(pgt_buf_top));
 
 	if (!after_bootmem)
 		early_memtest(start, end);
--- head.orig/arch/x86/mm/init_32-xen.c	2011-02-01 15:41:35.000000000 +0100
+++ head/arch/x86/mm/init_32-xen.c	2011-04-13 17:01:32.000000000 +0200
@@ -65,10 +65,10 @@ bool __read_mostly __vmalloc_start_set =
 
 static __init void *alloc_low_page(void)
 {
-	unsigned long pfn = e820_table_end++;
+	unsigned long pfn = pgt_buf_end++;
 	void *adr;
 
-	if (pfn >= e820_table_top)
+	if (pfn >= pgt_buf_top)
 		panic("alloc_low_page: ran out of memory");
 
 	adr = __va(pfn * PAGE_SIZE);
@@ -173,8 +173,8 @@ static pte_t *__init page_table_kmap_che
 	if (pmd_idx_kmap_begin != pmd_idx_kmap_end
 	    && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
 	    && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
-	    && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start
-		|| (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) {
+	    && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start
+		|| (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) {
 		pte_t *newpte;
 		int i;
 
@@ -646,8 +646,7 @@ void __init find_low_pfn_range(void)
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
-				int acpi, int k8)
+void __init initmem_init(void)
 {
 #ifdef CONFIG_HIGHMEM
 	highstart_pfn = highend_pfn = max_pfn;
@@ -986,7 +985,7 @@ static void mark_nxdata_nx(void)
 {
 	/*
 	 * When this called, init has already been executed and released,
-	 * so everything past _etext sould be NX.
+	 * so everything past _etext should be NX.
 	 */
 	unsigned long start = PFN_ALIGN(_etext);
 	/*
--- head.orig/arch/x86/mm/init_64-xen.c	2011-03-17 14:22:21.000000000 +0100
+++ head/arch/x86/mm/init_64-xen.c	2011-11-03 12:51:20.000000000 +0100
@@ -210,9 +210,9 @@ static __ref void *spp_getpage(void)
 
 	if (after_bootmem)
 		ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
-	else if (e820_table_end < e820_table_top) {
-		ptr = __va(e820_table_end << PAGE_SHIFT);
-		e820_table_end++;
+	else if (pgt_buf_end < pgt_buf_top) {
+		ptr = __va(pgt_buf_end << PAGE_SHIFT);
+		pgt_buf_end++;
 		clear_page(ptr);
 	} else
 		ptr = alloc_bootmem_pages(PAGE_SIZE);
@@ -369,18 +369,18 @@ void __init init_extra_mapping_uc(unsign
  * to the compile time generated pmds. This results in invalid pmds up
  * to the point where we hit the physaddr 0 mapping.
  *
- * We limit the mappings to the region from _text to _end.  _end is
- * rounded up to the 2MB boundary. This catches the invalid pmds as
+ * We limit the mappings to the region from _text to _brk_end.  _brk_end
+ * is rounded up to the 2MB boundary. This catches the invalid pmds as
  * well, as they are located before _text:
  */
 void __init cleanup_highmap(void)
 {
 	unsigned long vaddr = __START_KERNEL_map;
-	unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1;
+	unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
+	unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
 	pmd_t *pmd = level2_kernel_pgt;
-	pmd_t *last_pmd = pmd + PTRS_PER_PMD;
 
-	for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
+	for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) {
 		if (pmd_none(*pmd))
 			continue;
 		if (vaddr < (unsigned long) _text || vaddr > end)
@@ -401,9 +401,9 @@ static __ref void *alloc_low_page(unsign
 		return adr;
 	}
 
-	BUG_ON(!e820_table_end);
-	pfn = e820_table_end++;
-	if (pfn >= e820_table_top)
+	BUG_ON(!pgt_buf_end);
+	pfn = pgt_buf_end++;
+	if (pfn >= pgt_buf_top)
 		panic("alloc_low_page: ran out of memory");
 
 	adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
@@ -412,12 +412,28 @@ static __ref void *alloc_low_page(unsign
 	return adr;
 }
 
+static __ref void *map_low_page(void *virt)
+{
+	void *adr;
+	unsigned long phys, left;
+
+	if (after_bootmem)
+		return virt;
+
+	phys = __pa(virt);
+	left = phys & (PAGE_SIZE - 1);
+	adr = early_memremap_ro(phys & PAGE_MASK, PAGE_SIZE);
+	adr = (void *)(((unsigned long)adr) | left);
+
+	return adr;
+}
+
 static __ref void unmap_low_page(void *adr)
 {
 	if (after_bootmem)
 		return;
 
-	early_iounmap(adr, PAGE_SIZE);
+	early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
 }
 
 static inline int __meminit make_readonly(unsigned long paddr)
@@ -428,13 +444,13 @@ static inline int __meminit make_readonl
 	/* Make new page tables read-only on the first pass. */
 	if (!xen_feature(XENFEAT_writable_page_tables)
 	    && !max_pfn_mapped
-	    && (paddr >= (e820_table_start << PAGE_SHIFT))
-	    && (paddr < (e820_table_top << PAGE_SHIFT)))
+	    && (paddr >= (pgt_buf_start << PAGE_SHIFT))
+	    && (paddr < (pgt_buf_top << PAGE_SHIFT)))
 		readonly = 1;
 	/* Make old page tables read-only. */
 	if (!xen_feature(XENFEAT_writable_page_tables)
 	    && (paddr >= (xen_start_info->pt_base - __START_KERNEL_map))
-	    && (paddr < (e820_table_end << PAGE_SHIFT)))
+	    && (paddr < (pgt_buf_end << PAGE_SHIFT)))
 		readonly = 1;
 
 	/*
@@ -503,16 +519,6 @@ phys_pte_init(pte_t *pte_page, unsigned 
 }
 
 static unsigned long __meminit
-phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
-		pgprot_t prot)
-{
-	pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
-
-	BUG_ON(!max_pfn_mapped);
-	return phys_pte_init(pte, address, end, prot);
-}
-
-static unsigned long __meminit
 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 	      unsigned long page_size_mask, pgprot_t prot)
 {
@@ -533,8 +539,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned 
 		if (__pmd_val(*pmd)) {
 			if (!pmd_large(*pmd)) {
 				spin_lock(&init_mm.page_table_lock);
-				last_map_addr = phys_pte_update(pmd, address,
+				pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
+				last_map_addr = phys_pte_init(pte, address,
 								end, prot);
+				unmap_low_page(pte);
 				spin_unlock(&init_mm.page_table_lock);
 				continue;
 			}
@@ -576,9 +584,15 @@ phys_pmd_init(pmd_t *pmd_page, unsigned 
 			if (max_pfn_mapped)
 				make_page_readonly(__va(pte_phys),
 						   XENFEAT_writable_page_tables);
-			if (page_size_mask & (1 << PG_LEVEL_NUM))
-				xen_l2_entry_update(pmd, __pmd(pte_phys | _PAGE_TABLE));
-			else
+			if (page_size_mask & (1 << PG_LEVEL_NUM)) {
+				mmu_update_t u;
+
+				u.ptr = arbitrary_virt_to_machine(pmd);
+				u.val = phys_to_machine(pte_phys) | _PAGE_TABLE;
+				if (HYPERVISOR_mmu_update(&u, 1, NULL,
+							  DOMID_SELF) < 0)
+					BUG();
+			} else
 				*pmd = __pmd(pte_phys | _PAGE_TABLE);
 		} else {
 			make_page_readonly(pte, XENFEAT_writable_page_tables);
@@ -592,21 +606,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned 
 }
 
 static unsigned long __meminit
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
-		unsigned long page_size_mask, pgprot_t prot)
-{
-	pmd_t *pmd = pmd_offset(pud, 0);
-	unsigned long last_map_addr;
-
-	BUG_ON(!max_pfn_mapped);
-	last_map_addr = phys_pmd_init(pmd, address, end,
-				      page_size_mask | (1 << PG_LEVEL_NUM),
-				      prot);
-	__flush_tlb_all();
-	return last_map_addr;
-}
-
-static unsigned long __meminit
 phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 			 unsigned long page_size_mask)
 {
@@ -625,8 +624,12 @@ phys_pud_init(pud_t *pud_page, unsigned 
 
 		if (__pud_val(*pud)) {
 			if (!pud_large(*pud)) {
-				last_map_addr = phys_pmd_update(pud, addr, end,
-							 page_size_mask, prot);
+				pmd = map_low_page(pmd_offset(pud, 0));
+				last_map_addr = phys_pmd_init(pmd, addr, end,
+					page_size_mask | (1 << PG_LEVEL_NUM),
+					prot);
+				unmap_low_page(pmd);
+				__flush_tlb_all();
 				continue;
 			}
 			/*
@@ -668,9 +671,15 @@ phys_pud_init(pud_t *pud_page, unsigned 
 			if (max_pfn_mapped)
 				make_page_readonly(__va(pmd_phys),
 						   XENFEAT_writable_page_tables);
-			if (page_size_mask & (1 << PG_LEVEL_NUM))
-				xen_l3_entry_update(pud, __pud(pmd_phys | _PAGE_TABLE));
-			else
+			if (page_size_mask & (1 << PG_LEVEL_NUM)) {
+				mmu_update_t u;
+
+				u.ptr = arbitrary_virt_to_machine(pud);
+				u.val = phys_to_machine(pmd_phys) | _PAGE_TABLE;
+				if (HYPERVISOR_mmu_update(&u, 1, NULL,
+							  DOMID_SELF) < 0)
+					BUG();
+			} else
 				*pud = __pud(pmd_phys | _PAGE_TABLE);
 		} else {
 			make_page_readonly(pmd, XENFEAT_writable_page_tables);
@@ -686,17 +695,6 @@ phys_pud_init(pud_t *pud_page, unsigned 
 	return last_map_addr;
 }
 
-static unsigned long __meminit
-phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
-		 unsigned long page_size_mask)
-{
-	pud_t *pud;
-
-	pud = (pud_t *)pgd_page_vaddr(*pgd);
-
-	return phys_pud_init(pud, addr, end, page_size_mask | (1 << PG_LEVEL_NUM));
-}
-
 void __init xen_init_pt(void)
 {
 	unsigned long addr, *page;
@@ -799,15 +797,15 @@ void __init xen_finish_init_mapping(void
 
 	/* Destroy the Xen-created mappings beyond the kernel image. */
 	start = PAGE_ALIGN(_brk_end);
-	end   = __START_KERNEL_map + (e820_table_start << PAGE_SHIFT);
+	end   = __START_KERNEL_map + (pgt_buf_start << PAGE_SHIFT);
 	for (; start < end; start += PAGE_SIZE)
 		if (HYPERVISOR_update_va_mapping(start, __pte_ma(0), 0))
 			BUG();
 
-	WARN(e820_table_end != e820_table_top, "start=%lx cur=%lx top=%lx\n",
-	     e820_table_start, e820_table_end, e820_table_top);
-	if (e820_table_end > e820_table_top)
-		e820_table_top = e820_table_end;
+	WARN(pgt_buf_end != pgt_buf_top, "start=%lx cur=%lx top=%lx\n",
+	     pgt_buf_start, pgt_buf_end, pgt_buf_top);
+	if (pgt_buf_end > pgt_buf_top)
+		pgt_buf_top = pgt_buf_end;
 }
 
 unsigned long __meminit
@@ -833,8 +831,10 @@ kernel_physical_mapping_init(unsigned lo
 			next = end;
 
 		if (__pgd_val(*pgd)) {
-			last_map_addr = phys_pud_update(pgd, __pa(start),
-						 __pa(end), page_size_mask);
+			pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
+			last_map_addr = phys_pud_init(pud, __pa(start),
+				__pa(end), page_size_mask | (1 << PG_LEVEL_NUM));
+			unmap_low_page(pud);
 			continue;
 		}
 
@@ -864,14 +864,13 @@ kernel_physical_mapping_init(unsigned lo
 }
 
 #ifndef CONFIG_NUMA
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
-				int acpi, int k8)
+void __init initmem_init(void)
 {
-	memblock_x86_register_active_regions(0, start_pfn, end_pfn);
+	memblock_x86_register_active_regions(0, 0, max_pfn);
 #ifdef CONFIG_XEN
-	if (end_pfn > xen_start_info->nr_pages)
+	if (max_pfn > xen_start_info->nr_pages)
 		memblock_x86_reserve_range(xen_start_info->nr_pages << PAGE_SHIFT,
-					   end_pfn << PAGE_SHIFT, "BALLOON");
+					   max_pfn << PAGE_SHIFT, "BALLOON");
 #endif
 }
 #endif
@@ -1149,18 +1148,18 @@ static struct vm_area_struct gate_vma = 
 	.vm_flags	= VM_READ | VM_EXEC
 };
 
-struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
 {
 #ifdef CONFIG_IA32_EMULATION
-	if (test_tsk_thread_flag(tsk, TIF_IA32))
+	if (!mm || mm->context.ia32_compat)
 		return NULL;
 #endif
 	return &gate_vma;
 }
 
-int in_gate_area(struct task_struct *task, unsigned long addr)
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
 {
-	struct vm_area_struct *vma = get_gate_vma(task);
+	struct vm_area_struct *vma = get_gate_vma(mm);
 
 	if (!vma)
 		return 0;
@@ -1169,11 +1168,11 @@ int in_gate_area(struct task_struct *tas
 }
 
 /*
- * Use this when you have no reliable task/vma, typically from interrupt
- * context. It is less reliable than using the task's vma and may give
- * false positives:
+ * Use this when you have no reliable mm, typically from interrupt
+ * context. It is less reliable than using a task's mm and may give
+ * false positives.
  */
-int in_gate_area_no_task(unsigned long addr)
+int in_gate_area_no_mm(unsigned long addr)
 {
 	return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
 }
@@ -1187,6 +1186,19 @@ const char *arch_vma_name(struct vm_area
 	return NULL;
 }
 
+#ifdef CONFIG_X86_UV
+#define MIN_MEMORY_BLOCK_SIZE   (1 << SECTION_SIZE_BITS)
+
+unsigned long memory_block_size_bytes(void)
+{
+	if (is_uv_system()) {
+		printk(KERN_INFO "UV: memory block size 2GB\n");
+		return 2UL * 1024 * 1024 * 1024;
+	}
+	return MIN_MEMORY_BLOCK_SIZE;
+}
+#endif
+
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
--- head.orig/arch/x86/mm/ioremap-xen.c	2011-05-09 11:42:49.000000000 +0200
+++ head/arch/x86/mm/ioremap-xen.c	2011-05-09 11:42:57.000000000 +0200
@@ -758,6 +758,12 @@ early_memremap(resource_size_t phys_addr
 	return __early_ioremap(phys_to_machine(phys_addr), size, PAGE_KERNEL);
 }
 
+void __init __iomem *
+early_memremap_ro(resource_size_t phys_addr, unsigned long size)
+{
+	return __early_ioremap(phys_to_machine(phys_addr), size, PAGE_KERNEL_RO);
+}
+
 void __init early_iounmap(void __iomem *addr, unsigned long size)
 {
 	unsigned long virt_addr;
--- head.orig/arch/x86/mm/pageattr-xen.c	2011-03-23 10:10:15.000000000 +0100
+++ head/arch/x86/mm/pageattr-xen.c	2011-04-13 17:01:32.000000000 +0200
@@ -310,7 +310,7 @@ static inline pgprot_t static_protection
 		 * these shared mappings are made of small page mappings.
 		 * Thus this don't enforce !RW mapping for small page kernel
 		 * text mapping logic will help Linux Xen parvirt guest boot
-		 * aswell.
+		 * as well.
 		 */
 		if (lookup_address(address, &level) && (level != PG_LEVEL_4K))
 			pgprot_val(forbidden) |= _PAGE_RW;
--- head.orig/arch/x86/mm/pgtable-xen.c	2011-03-17 14:26:03.000000000 +0100
+++ head/arch/x86/mm/pgtable-xen.c	2011-04-12 15:59:10.000000000 +0200
@@ -528,8 +528,7 @@ void pud_populate(struct mm_struct *mm, 
 	 * section 8.1: in PAE mode we explicitly have to flush the
 	 * TLB via cr3 if the top-level pgd is changed...
 	 */
-	if (mm == current->active_mm)
-		xen_tlb_flush();
+	flush_tlb_mm(mm);
 }
 #else  /* !CONFIG_X86_PAE */
 
--- head.orig/arch/x86/vdso/vdso32-setup-xen.c	2012-02-29 14:18:20.000000000 +0100
+++ head/arch/x86/vdso/vdso32-setup-xen.c	2012-02-29 14:19:15.000000000 +0100
@@ -465,24 +465,25 @@ const char *arch_vma_name(struct vm_area
 	return NULL;
 }
 
-struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
 {
-	struct mm_struct *mm = tsk->mm;
-
-	/* Check to see if this task was created in compat vdso mode */
+	/*
+	 * Check to see if the corresponding task was created in compat vdso
+	 * mode.
+	 */
 	if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
 		return &gate_vma;
 	return NULL;
 }
 
-int in_gate_area(struct task_struct *task, unsigned long addr)
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
 {
-	const struct vm_area_struct *vma = get_gate_vma(task);
+	const struct vm_area_struct *vma = get_gate_vma(mm);
 
 	return vma && addr >= vma->vm_start && addr < vma->vm_end;
 }
 
-int in_gate_area_no_task(unsigned long addr)
+int in_gate_area_no_mm(unsigned long addr)
 {
 	return 0;
 }
--- head.orig/drivers/net/Kconfig	2012-02-08 12:16:22.000000000 +0100
+++ head/drivers/net/Kconfig	2012-02-08 12:54:02.000000000 +0100
@@ -306,15 +306,15 @@ config PARAVIRT_XEN_NETDEV_FRONTEND
 	  domain 0).
 
 	  The corresponding Linux backend driver is enabled by the
-	  CONFIG_XEN_NETDEV_BACKEND option.
+	  PARAVIRT_XEN_NETDEV_BACKEND option.
 
 	  If you are compiling a kernel for use as Xen guest, you
 	  should say Y here. To compile this driver as a module, chose
 	  M here: the module will be called xen-netfront.
 
-config XEN_NETDEV_BACKEND
+config PARAVIRT_XEN_NETDEV_BACKEND
 	tristate "Xen backend network device"
-	depends on XEN_BACKEND
+	depends on PARAVIRT_XEN_BACKEND
 	help
 	  This driver allows the kernel to act as a Xen network driver
 	  domain which exports paravirtual network devices to other
@@ -322,7 +322,7 @@ config XEN_NETDEV_BACKEND
 	  system that implements a compatible front end.
 
 	  The corresponding Linux frontend driver is enabled by the
-	  CONFIG_XEN_NETDEV_FRONTEND configuration option.
+	  PARAVIRT_XEN_NETDEV_FRONTEND configuration option.
 
 	  The backend driver presents a standard network device
 	  endpoint for each paravirtual network device to the driver
--- head.orig/drivers/net/Makefile	2012-02-08 11:35:14.000000000 +0100
+++ head/drivers/net/Makefile	2012-02-08 12:54:12.000000000 +0100
@@ -57,7 +57,7 @@ obj-$(CONFIG_WIMAX) += wimax/
 
 obj-$(CONFIG_VMXNET3) += vmxnet3/
 obj-$(CONFIG_PARAVIRT_XEN_NETDEV_FRONTEND) += xen-netfront.o
-obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/
+obj-$(CONFIG_PARAVIRT_XEN_NETDEV_BACKEND) += xen-netback/
 
 obj-$(CONFIG_USB_CATC)          += usb/
 obj-$(CONFIG_USB_KAWETH)        += usb/
--- head.orig/drivers/net/xen-netback/Makefile	2012-06-06 13:23:56.000000000 +0200
+++ head/drivers/net/xen-netback/Makefile	2011-04-13 14:42:19.000000000 +0200
@@ -1,3 +1,3 @@
-obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
+obj-$(CONFIG_PARAVIRT_XEN_NETDEV_BACKEND) := xen-netback.o
 
 xen-netback-y := netback.o xenbus.o interface.o
--- head.orig/drivers/watchdog/Kconfig	2012-06-06 13:23:56.000000000 +0200
+++ head/drivers/watchdog/Kconfig	2012-04-10 17:11:45.000000000 +0200
@@ -1184,7 +1184,7 @@ config WATCHDOG_RIO
 
 config XEN_WDT
 	tristate "Xen Watchdog support"
-	depends on XEN
+	depends on XEN || PARAVIRT_XEN
 	help
 	  Say Y here to support the hypervisor watchdog capability provided
 	  by Xen 4.0 and newer.  The watchdog timeout period is normally one
--- head.orig/drivers/watchdog/xen_wdt.c	2012-06-06 13:23:56.000000000 +0200
+++ head/drivers/watchdog/xen_wdt.c	2012-04-10 17:11:39.000000000 +0200
@@ -1,7 +1,8 @@
 /*
  *	Xen Watchdog Driver
  *
- *	(c) Copyright 2010 Novell, Inc.
+ *	(c) Copyright 2010,2011 Novell, Inc.
+ *	(c) Copyright 2011,2012 SuSE
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
@@ -28,8 +29,10 @@
 #include <linux/spinlock.h>
 #include <linux/uaccess.h>
 #include <linux/watchdog.h>
+#ifdef CONFIG_PARAVIRT_XEN
 #include <xen/xen.h>
 #include <asm/xen/hypercall.h>
+#endif
 #include <xen/interface/sched.h>
 
 static struct platform_device *platform_device;
@@ -329,17 +332,19 @@ static int __init xen_wdt_init_module(vo
 {
 	int err;
 
+#ifdef CONFIG_PARAVIRT_XEN
 	if (!xen_domain())
 		return -ENODEV;
+#endif
 
-	pr_info("Xen WatchDog Timer Driver v%s\n", DRV_VERSION);
+	printk(KERN_INFO "Xen WatchDog Timer Driver v%s\n", DRV_VERSION);
 
 	err = platform_driver_register(&xen_wdt_driver);
 	if (err)
 		return err;
 
 	platform_device = platform_device_register_simple(DRV_NAME,
-								  -1, NULL, 0);
+							  -1, NULL, 0);
 	if (IS_ERR(platform_device)) {
 		err = PTR_ERR(platform_device);
 		platform_driver_unregister(&xen_wdt_driver);
--- head.orig/drivers/xen/Kconfig	2012-04-03 13:15:48.000000000 +0200
+++ head/drivers/xen/Kconfig	2012-04-03 13:15:53.000000000 +0200
@@ -503,7 +503,7 @@ config XEN_GNTDEV
 
 config XEN_GRANT_DEV_ALLOC
 	tristate "User-space grant reference allocator driver"
-	depends on XEN
+	depends on PARAVIRT_XEN
 	default m
 	help
 	  Allows userspace processes to create pages with access granted
--- head.orig/drivers/xen/Makefile	2011-08-18 11:16:13.000000000 +0200
+++ head/drivers/xen/Makefile	2011-08-18 11:16:19.000000000 +0200
@@ -1,10 +1,10 @@
-obj-$(CONFIG_PARAVIRT_XEN)	+= grant-table.o features.o events.o manage.o
+obj-$(CONFIG_PARAVIRT_XEN)	+= grant-table.o features.o events.o manage.o balloon.o
 xen-biomerge-$(CONFIG_PARAVIRT_XEN) := biomerge.o
 xen-hotplug-$(CONFIG_PARAVIRT_XEN) := cpu_hotplug.o
-xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o
+xen-balloon_$(CONFIG_PARAVIRT_XEN) := xen-balloon.o
 xen-evtchn-name-$(CONFIG_PARAVIRT_XEN) := xen-evtchn
 
-xen-balloon-$(CONFIG_XEN)	:= balloon/
+xen-balloon_$(CONFIG_XEN)	:= balloon/
 obj-$(CONFIG_XEN)		+= core/
 obj-$(CONFIG_XEN)		+= console/
 obj-y				+= xenbus/
@@ -25,10 +25,11 @@ obj-$(CONFIG_XEN_PRIVILEGED_GUEST)	+= $(
 obj-$(CONFIG_BLOCK)			+= $(xen-biomerge-y)
 obj-$(CONFIG_HOTPLUG_CPU)		+= $(xen-hotplug-y)
 obj-$(CONFIG_XEN_XENCOMM)		+= xencomm.o
-obj-$(CONFIG_XEN_BALLOON)		+= $(xen-balloon-y)
+obj-$(CONFIG_XEN_BALLOON)		+= $(xen-balloon_y)
 obj-$(CONFIG_XEN_DEV_EVTCHN)		+= $(xen-evtchn-name-y).o
 obj-$(CONFIG_XEN_GNTDEV)		+= xen-gntdev.o
 obj-$(CONFIG_XENFS)			+= xenfs/
+obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
 obj-$(CONFIG_XEN_PLATFORM_PCI)		+= xen-platform-pci.o
 obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
@@ -36,6 +37,7 @@ obj-$(CONFIG_XEN_DOM0)			+= pci.o
 
 xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
+xen-gntalloc-y				:= gntalloc.o
 
 xen-platform-pci-y			:= platform-pci.o
 
--- head.orig/drivers/xen/blkback/blkback.c	2012-04-04 10:26:03.000000000 +0200
+++ head/drivers/xen/blkback/blkback.c	2012-03-26 12:28:48.000000000 +0200
@@ -149,8 +149,6 @@ static void unplug_queue(blkif_t *blkif)
 {
 	if (blkif->plug == NULL)
 		return;
-	if (blkif->plug->unplug_fn)
-		blkif->plug->unplug_fn(blkif->plug);
 	kobject_put(&blkif->plug->kobj);
 	blkif->plug = NULL;
 }
--- head.orig/drivers/xen/blktap2-new/device.c	2012-02-16 16:38:28.000000000 +0100
+++ head/drivers/xen/blktap2-new/device.c	2012-02-16 16:38:39.000000000 +0100
@@ -527,10 +527,9 @@ blktap_device_debug(struct blktap *tap, 
 		      queue_logical_block_size(q));
 
 	s += snprintf(s, end - s,
-		      "queue flags:%#lx plugged:%d stopped:%d empty:%d\n",
+		      "queue flags:%#lx stopped:%d\n",
 		      q->queue_flags,
-		      blk_queue_plugged(q), blk_queue_stopped(q),
-		      elv_queue_empty(q));
+		      blk_queue_stopped(q));
 
 	bdev = bdget_disk(disk, 0);
 	if (bdev) {
--- head.orig/drivers/xen/core/evtchn.c	2012-04-03 17:02:16.000000000 +0200
+++ head/drivers/xen/core/evtchn.c	2012-06-08 10:36:16.000000000 +0200
@@ -33,7 +33,6 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/irq.h>
-#include <linux/irqdesc.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
@@ -47,6 +46,7 @@
 #include <xen/interface/physdev.h>
 #include <asm/hypervisor.h>
 #include <linux/mc146818rtc.h> /* RTC_IRQ */
+#include "../../../kernel/irq/internals.h" /* IRQS_AUTODETECT, IRQS_PENDING */
 
 /*
  * This lock protects updates to the following mapping and reference-count
@@ -89,7 +89,7 @@ static struct irq_cfg _irq_cfg[] = {
 static inline struct irq_cfg *__pure irq_cfg(unsigned int irq)
 {
 #ifdef CONFIG_SPARSE_IRQ
-	return get_irq_chip_data(irq);
+	return irq_get_chip_data(irq);
 #else
 	return irq < NR_IRQS ? _irq_cfg + irq : NULL;
 #endif
@@ -420,18 +420,18 @@ static int find_unbound_irq(unsigned int
 
 	for (irq = DYNIRQ_BASE; irq < nr_irqs; irq++) {
 		struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
-		struct irq_desc *desc = irq_to_desc(irq);
+		struct irq_data *data = irq_get_irq_data(irq);
 
 		if (unlikely(!cfg))
 			return -ENOMEM;
-		if (desc->irq_data.chip != &no_irq_chip &&
-		    desc->irq_data.chip != chip)
+		if (data->chip != &no_irq_chip &&
+		    data->chip != chip)
 			continue;
 
 		if (!cfg->bindcount) {
 			*pcfg = cfg;
-			desc->status |= IRQ_NOPROBE;
-			set_irq_chip_and_handler_name(irq, chip,
+			irq_set_noprobe(irq);
+			irq_set_chip_and_handler_name(irq, chip,
 						      handle_fasteoi_irq,
 						      "fasteoi");
 			return irq;
@@ -600,7 +600,7 @@ static int bind_ipi_to_irq(unsigned int 
 static void unbind_from_irq(unsigned int irq)
 {
 	struct irq_cfg *cfg = irq_cfg(irq);
-	unsigned int cpu, evtchn = evtchn_from_irq_cfg(cfg);
+	unsigned int evtchn = evtchn_from_irq_cfg(cfg);
 
 	spin_lock(&irq_mapping_update_lock);
 
@@ -628,17 +628,7 @@ static void unbind_from_irq(unsigned int
 		evtchn_to_irq[evtchn] = -1;
 		cfg->info = IRQ_UNBOUND;
 
-		/* Zap stats across IRQ changes of use. */
-		for_each_possible_cpu(cpu) {
-#ifdef CONFIG_GENERIC_HARDIRQS
-			struct irq_desc *desc = irq_to_desc(irq);
-
-			if (desc->kstat_irqs)
-				*per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
-#else
-			kstat_cpu(cpu).irqs[irq] = 0;
-#endif
-		}
+		dynamic_irq_cleanup(irq);
 	}
 
 	spin_unlock(&irq_mapping_update_lock);
@@ -785,8 +775,11 @@ static int set_affinity_irq(struct irq_d
 
 	masked = test_and_set_evtchn_mask(port);
 	rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &ebv);
-	if (rc == 0)
+	if (rc == 0) {
 		bind_evtchn_to_cpu(port, cpu);
+		rc = evtchn_to_irq[port] != -1 ? IRQ_SET_MASK_OK_NOCOPY
+					       : IRQ_SET_MASK_OK;
+	}
 	if (!masked)
 		unmask_evtchn(port);
 
@@ -840,8 +833,8 @@ static unsigned int startup_dynirq(struc
 
 static void end_dynirq(struct irq_data *data)
 {
-	if (!(irq_to_desc(data->irq)->status & IRQ_DISABLED)) {
-		move_masked_irq(data->irq);
+	if (!irqd_irq_disabled(data)) {
+		irq_move_masked_irq(data);
 		unmask_dynirq(data);
 	}
 }
@@ -936,7 +929,7 @@ static void enable_pirq(struct irq_data 
 	/* NB. We are happy to share unless we are probing. */
 	bind_pirq.flags = (pirq < nr_pirqs
 			   && test_and_clear_bit(pirq, probing_pirq))
-			  || (irq_to_desc(irq)->status & IRQ_AUTODETECT)
+			  || (irq_to_desc(irq)->istate & IRQS_AUTODETECT)
 			  ? 0 : BIND_PIRQ__WILL_SHARE;
 	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) {
 		if (bind_pirq.flags)
@@ -993,14 +986,13 @@ static void unmask_pirq(struct irq_data 
 
 static void end_pirq(struct irq_data *data)
 {
-	const struct irq_desc *desc = irq_to_desc(data->irq);
+	bool disabled = irqd_irq_disabled(data);
 
-	if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
-	    (IRQ_DISABLED|IRQ_PENDING))
+	if (disabled && (irq_to_desc(data->irq)->istate & IRQS_PENDING))
 		shutdown_pirq(data);
 	else {
-		if (!(desc->status & IRQ_DISABLED))
-			move_masked_irq(data->irq);
+		if (!disabled)
+			irq_move_masked_irq(data);
 		unmask_pirq(data);
 	}
 }
@@ -1134,10 +1126,13 @@ static void restore_cpu_ipis(unsigned in
 	int ipi, irq, evtchn;
 
 	for (ipi = 0; ipi < NR_IPIS; ipi++) {
+		struct irq_data *data;
+
 		if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
 			continue;
 
-		BUG_ON(irq_cfg(irq)->info != mk_irq_info(IRQT_IPI, ipi, 0));
+		data = irq_get_irq_data(irq);
+		BUG_ON(irq_data_cfg(data)->info != mk_irq_info(IRQT_IPI, ipi, 0));
 
 		/* Get a new binding from Xen. */
 		bind_ipi.vcpu = cpu;
@@ -1148,11 +1143,11 @@ static void restore_cpu_ipis(unsigned in
 
 		/* Record the new mapping. */
 		evtchn_to_irq[evtchn] = irq;
-		irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, evtchn);
+		irq_data_cfg(data)->info = mk_irq_info(IRQT_IPI, ipi, evtchn);
 		bind_evtchn_to_cpu(evtchn, cpu);
 
 		/* Ready for use. */
-		if (!(irq_to_desc(irq)->status & IRQ_DISABLED))
+		if (!irqd_irq_disabled(data))
 			unmask_evtchn(evtchn);
 	}
 }
@@ -1209,7 +1204,7 @@ int __init arch_early_irq_init(void)
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(_irq_cfg); i++)
-		set_irq_chip_data(i, _irq_cfg + i);
+		irq_set_chip_data(i, _irq_cfg + i);
 
 	return 0;
 }
@@ -1222,7 +1217,7 @@ struct irq_cfg *alloc_irq_and_cfg_at(uns
 	if (res < 0) {
 		if (res != -EEXIST)
 			return NULL;
-		cfg = get_irq_chip_data(at);
+		cfg = irq_get_chip_data(at);
 		if (cfg)
 			return cfg;
 	}
@@ -1235,7 +1230,7 @@ struct irq_cfg *alloc_irq_and_cfg_at(uns
 
 	cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
 	if (cfg)
-		set_irq_chip_data(at, cfg);
+		irq_set_chip_data(at, cfg);
 	else
 		irq_free_desc(at);
 
@@ -1318,7 +1313,7 @@ void evtchn_register_pirq(int irq)
 	if (identity_mapped_irq(irq) || type_from_irq_cfg(cfg) != IRQT_UNBOUND)
 		return;
 	cfg->info = mk_irq_info(IRQT_PIRQ, irq, 0);
-	set_irq_chip_and_handler_name(irq, &pirq_chip, handle_fasteoi_irq,
+	irq_set_chip_and_handler_name(irq, &pirq_chip, handle_fasteoi_irq,
 				      "fasteoi");
 }
 
@@ -1366,7 +1361,7 @@ int evtchn_map_pirq(int irq, int xen_pir
 		spin_unlock(&irq_alloc_lock);
 		if (irq < PIRQ_BASE)
 			return -ENOSPC;
-		set_irq_chip_and_handler_name(irq, &pirq_chip,
+		irq_set_chip_and_handler_name(irq, &pirq_chip,
 					      handle_fasteoi_irq, "fasteoi");
 #endif
 	} else if (!xen_pirq) {
@@ -1380,7 +1375,7 @@ int evtchn_map_pirq(int irq, int xen_pir
 		 * when a driver didn't free_irq() its MSI(-X) IRQ(s), which
 		 * then causes a warning in dynamic_irq_cleanup().
 		 */
-		set_irq_chip_and_handler(irq, NULL, NULL);
+		irq_set_chip_and_handler(irq, NULL, NULL);
 		cfg->info = IRQ_UNBOUND;
 #ifdef CONFIG_SPARSE_IRQ
 		cfg->bindcount--;
@@ -1431,8 +1426,8 @@ void __init xen_init_IRQ(void)
 
 #ifndef CONFIG_SPARSE_IRQ
 	for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) {
-		irq_to_desc(i)->status |= IRQ_NOPROBE;
-		set_irq_chip_and_handler_name(i, &dynirq_chip,
+		irq_set_noprobe(i);
+		irq_set_chip_and_handler_name(i, &dynirq_chip,
 					      handle_fasteoi_irq, "fasteoi");
 	}
 
@@ -1449,7 +1444,7 @@ void __init xen_init_IRQ(void)
 			continue;
 #endif
 
-		set_irq_chip_and_handler_name(i, &pirq_chip,
+		irq_set_chip_and_handler_name(i, &pirq_chip,
 					      handle_fasteoi_irq, "fasteoi");
 	}
 }
--- head.orig/drivers/xen/netback/netback.c	2011-04-11 15:05:22.000000000 +0200
+++ head/drivers/xen/netback/netback.c	2012-06-08 10:36:23.000000000 +0200
@@ -514,7 +514,7 @@ static int netbk_check_gop(int nr_frags,
 	multicall_entry_t *mcl;
 	gnttab_transfer_t *gop;
 	gnttab_copy_t     *copy_op;
-	int status = NETIF_RSP_OKAY;
+	int status = XEN_NETIF_RSP_OKAY;
 	int i;
 
 	for (i = 0; i <= nr_frags; i++) {
@@ -525,7 +525,7 @@ static int netbk_check_gop(int nr_frags,
 			if (unlikely(copy_op->status != GNTST_okay)) {
 				DPRINTK("Bad status %d from copy to DOM%d.\n",
 					copy_op->status, domid);
-				status = NETIF_RSP_ERROR;
+				status = XEN_NETIF_RSP_ERROR;
 			}
 		} else {
 			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -545,7 +545,7 @@ static int netbk_check_gop(int nr_frags,
 				 * a fatal error anyway.
 				 */
 				BUG_ON(gop->status == GNTST_bad_page);
-				status = NETIF_RSP_ERROR;
+				status = XEN_NETIF_RSP_ERROR;
 			}
 		}
 	}
@@ -561,7 +561,7 @@ static void netbk_add_frag_responses(net
 
 	for (i = 0; i < nr_frags; i++) {
 		int id = meta[i].id;
-		int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
+		int flags = (i == nr_frags - 1) ? 0 : XEN_NETRXF_more_data;
 
 		if (meta[i].copy)
 			offset = 0;
@@ -702,14 +702,15 @@ static void net_rx_action(unsigned long 
 		skb->dev->stats.tx_packets++;
 
 		id = meta[npo.meta_cons].id;
-		flags = nr_frags ? NETRXF_more_data : 0;
+		flags = nr_frags ? XEN_NETRXF_more_data : 0;
 
 		switch (skb->ip_summed) {
 		case CHECKSUM_PARTIAL: /* local packet? */
-			flags |= NETRXF_csum_blank | NETRXF_data_validated;
+			flags |= XEN_NETRXF_csum_blank |
+				 XEN_NETRXF_data_validated;
 			break;
 		case CHECKSUM_UNNECESSARY: /* remote but checksummed? */
-			flags |= NETRXF_data_validated;
+			flags |= XEN_NETRXF_data_validated;
 			break;
 		}
 
@@ -726,7 +727,7 @@ static void net_rx_action(unsigned long 
 				RING_GET_RESPONSE(&netif->rx,
 						  netif->rx.rsp_prod_pvt++);
 
-			resp->flags |= NETRXF_extra_info;
+			resp->flags |= XEN_NETRXF_extra_info;
 
 			gso->u.gso.size = meta[npo.meta_cons].frag.size;
 			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
@@ -994,7 +995,7 @@ inline static void net_tx_action_dealloc
 		netif = pending_tx_info[pending_idx].netif;
 
 		make_tx_response(netif, &pending_tx_info[pending_idx].req, 
-				 NETIF_RSP_OKAY);
+				 XEN_NETIF_RSP_OKAY);
 
 		/* Ready for next use. */
 		gnttab_reset_grant_page(mmap_pages[pending_idx]);
@@ -1012,7 +1013,7 @@ static void netbk_tx_err(netif_t *netif,
 	RING_IDX cons = netif->tx.req_cons;
 
 	do {
-		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
 		if (cons >= end)
 			break;
 		txp = RING_GET_REQUEST(&netif->tx, cons++);
@@ -1028,7 +1029,7 @@ static int netbk_count_requests(netif_t 
 	RING_IDX cons = netif->tx.req_cons;
 	int frags = 0;
 
-	if (!(first->flags & NETTXF_more_data))
+	if (!(first->flags & XEN_NETTXF_more_data))
 		return 0;
 
 	do {
@@ -1057,7 +1058,7 @@ static int netbk_count_requests(netif_t 
 				txp->offset, txp->size);
 			return -frags;
 		}
-	} while ((txp++)->flags & NETTXF_more_data);
+	} while ((txp++)->flags & XEN_NETTXF_more_data);
 
 	return frags;
 }
@@ -1106,7 +1107,7 @@ static int netbk_tx_check_mop(struct sk_
 	err = mop->status;
 	if (unlikely(err != GNTST_okay)) {
 		txp = &pending_tx_info[pending_idx].req;
-		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
 		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
 		netif_put(netif);
 	} else {
@@ -1137,7 +1138,7 @@ static int netbk_tx_check_mop(struct sk_
 
 		/* Error on this fragment: respond to client with an error. */
 		txp = &pending_tx_info[pending_idx].req;
-		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
 		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
 		netif_put(netif);
 
@@ -1310,7 +1311,7 @@ static void net_tx_action(unsigned long 
 		netif->tx.req_cons = ++i;
 
 		memset(extras, 0, sizeof(extras));
-		if (txreq.flags & NETTXF_extra_info) {
+		if (txreq.flags & XEN_NETTXF_extra_info) {
 			work_to_do = netbk_get_extras(netif, extras,
 						      work_to_do);
 			i = netif->tx.req_cons;
@@ -1449,9 +1450,9 @@ static void net_tx_action(unsigned long 
 			netif_idx_release(pending_idx);
 		}
 
-		if (txp->flags & NETTXF_csum_blank)
+		if (txp->flags & XEN_NETTXF_csum_blank)
 			skb->ip_summed = CHECKSUM_PARTIAL;
-		else if (txp->flags & NETTXF_data_validated)
+		else if (txp->flags & XEN_NETTXF_data_validated)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		else
 			skb->ip_summed = CHECKSUM_NONE;
@@ -1549,8 +1550,8 @@ static void make_tx_response(netif_t *ne
 	resp->id     = txp->id;
 	resp->status = st;
 
-	if (txp->flags & NETTXF_extra_info)
-		RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
+	if (txp->flags & XEN_NETTXF_extra_info)
+		RING_GET_RESPONSE(&netif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
 
 	netif->tx.rsp_prod_pvt = ++i;
 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
--- head.orig/drivers/xen/netfront/netfront.c	2012-03-12 13:53:17.000000000 +0100
+++ head/drivers/xen/netfront/netfront.c	2012-06-08 10:36:26.000000000 +0200
@@ -651,7 +651,7 @@ static void network_tx_buf_gc(struct net
 			struct netif_tx_response *txrsp;
 
 			txrsp = RING_GET_RESPONSE(&np->tx, cons);
-			if (txrsp->status == NETIF_RSP_NULL)
+			if (txrsp->status == XEN_NETIF_RSP_NULL)
 				continue;
 
 			id  = txrsp->id;
@@ -875,7 +875,7 @@ static void xennet_make_frags(struct sk_
 
 	while (len > PAGE_SIZE - offset) {
 		tx->size = PAGE_SIZE - offset;
-		tx->flags |= NETTXF_more_data;
+		tx->flags |= XEN_NETTXF_more_data;
 		len -= tx->size;
 		data += tx->size;
 		offset = 0;
@@ -900,7 +900,7 @@ static void xennet_make_frags(struct sk_
 	for (i = 0; i < frags; i++) {
 		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
 
-		tx->flags |= NETTXF_more_data;
+		tx->flags |= XEN_NETTXF_more_data;
 
 		id = get_id_from_freelist(np->tx_skbs);
 		np->tx_skbs[id] = skb_get(skb);
@@ -981,9 +981,9 @@ static int network_start_xmit(struct sk_
 	extra = NULL;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
-		tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
+		tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
 	else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
-		tx->flags |= NETTXF_data_validated;
+		tx->flags |= XEN_NETTXF_data_validated;
 
 #if HAVE_TSO
 	if (skb_shinfo(skb)->gso_size) {
@@ -993,7 +993,7 @@ static int network_start_xmit(struct sk_
 		if (extra)
 			extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
 		else
-			tx->flags |= NETTXF_extra_info;
+			tx->flags |= XEN_NETTXF_extra_info;
 
 		gso->u.gso.size = skb_shinfo(skb)->gso_size;
 		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
@@ -1131,7 +1131,7 @@ static int xennet_get_responses(struct n
 	int err = 0;
 	unsigned long ret;
 
-	if (rx->flags & NETRXF_extra_info) {
+	if (rx->flags & XEN_NETRXF_extra_info) {
 		err = xennet_get_extras(np, extras, rp);
 		cons = np->rx.rsp_cons;
 	}
@@ -1206,7 +1206,7 @@ static int xennet_get_responses(struct n
 		__skb_queue_tail(list, skb);
 
 next:
-		if (!(rx->flags & NETRXF_more_data))
+		if (!(rx->flags & XEN_NETRXF_more_data))
 			break;
 
 		if (cons + frags == rp) {
@@ -1407,9 +1407,9 @@ err:	
 		skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
 		skb->len += skb->data_len;
 
-		if (rx->flags & NETRXF_csum_blank)
+		if (rx->flags & XEN_NETRXF_csum_blank)
 			skb->ip_summed = CHECKSUM_PARTIAL;
-		else if (rx->flags & NETRXF_data_validated)
+		else if (rx->flags & XEN_NETRXF_data_validated)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		else
 			skb->ip_summed = CHECKSUM_NONE;
--- head.orig/drivers/xen/usbfront/usbfront-hub.c	2009-10-15 11:45:41.000000000 +0200
+++ head/drivers/xen/usbfront/usbfront-hub.c	2011-04-14 17:50:18.000000000 +0200
@@ -271,8 +271,8 @@ static void xenhcd_hub_descriptor(struct
 	desc->bDescLength = 7 + 2 * temp;
 
 	/* bitmaps for DeviceRemovable and PortPwrCtrlMask */
-	memset(&desc->bitmap[0], 0, temp);
-	memset(&desc->bitmap[temp], 0xff, temp);
+	memset(&desc->u.hs.DeviceRemovable[0], 0, temp);
+	memset(&desc->u.hs.DeviceRemovable[temp], 0xff, temp);
 
 	/* per-port over current reporting and no power switching */
 	temp = 0x000a;
--- head.orig/drivers/xen/xenbus/xenbus_probe.c	2012-03-22 14:19:07.000000000 +0100
+++ head/drivers/xen/xenbus/xenbus_probe.c	2012-03-22 14:22:22.000000000 +0100
@@ -809,7 +809,7 @@ static struct xenbus_watch fe_watch = {
 
 static int __maybe_unused suspend_dev(struct device *dev, void *data)
 #else
-int xenbus_dev_suspend(struct device *dev, pm_message_t state)
+int xenbus_dev_suspend(struct device *dev)
 #endif
 {
 	int err = 0;
@@ -823,11 +823,7 @@ int xenbus_dev_suspend(struct device *de
 		return 0;
 	drv = to_xenbus_driver(dev->driver);
 	if (drv->suspend)
-#if !defined(CONFIG_XEN) && !defined(MODULE)
-		err = drv->suspend(xdev, state);
-#else
 		err = drv->suspend(xdev);
-#endif
 	if (err)
 		pr_warning("xenbus: suspend %s failed: %i\n",
 			   dev_name(dev), err);
@@ -900,7 +896,15 @@ int xenbus_dev_resume(struct device *dev
 }
 PARAVIRT_EXPORT_SYMBOL(xenbus_dev_resume);
 
-#if (defined(CONFIG_XEN) && defined(CONFIG_PM_SLEEP)) || defined(MODULE)
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+int xenbus_dev_cancel(struct device *dev)
+{
+	/* Do nothing */
+	DPRINTK("cancel");
+	return 0;
+}
+PARAVIRT_EXPORT_SYMBOL(xenbus_dev_cancel);
+#elif defined(CONFIG_PM_SLEEP) || defined(MODULE)
 void xenbus_suspend(void)
 {
 	DPRINTK("");
--- head.orig/drivers/xen/xenbus/xenbus_probe.h	2011-12-21 11:24:56.000000000 +0100
+++ head/drivers/xen/xenbus/xenbus_probe.h	2011-12-21 11:28:53.000000000 +0100
@@ -91,8 +91,9 @@ extern void xenbus_dev_changed(const cha
 
 extern void xenbus_dev_shutdown(struct device *_dev);
 
-extern int xenbus_dev_suspend(struct device *dev, pm_message_t state);
+extern int xenbus_dev_suspend(struct device *dev);
 extern int xenbus_dev_resume(struct device *dev);
+extern int xenbus_dev_cancel(struct device *dev);
 
 extern void xenbus_otherend_changed(struct xenbus_watch *watch,
 				    const char **vec, unsigned int len,
--- head.orig/drivers/xen/xenoprof/xenoprofile.c	2012-02-16 12:36:22.000000000 +0100
+++ head/drivers/xen/xenoprof/xenoprofile.c	2011-04-15 12:46:57.000000000 +0200
@@ -19,7 +19,7 @@
 #include <linux/notifier.h>
 #include <linux/smp.h>
 #include <linux/oprofile.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/vmalloc.h>
@@ -58,9 +58,9 @@ static int ovf_irq[NR_CPUS];
 /* cpu model type string - copied from Xen on XENOPROF_init command */
 static char cpu_type[XENOPROF_CPU_TYPE_SIZE];
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 
-static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
+static int xenoprof_suspend(void)
 {
 	if (xenoprof_enabled == 1)
 		xenoprof_stop();
@@ -68,46 +68,35 @@ static int xenoprof_suspend(struct sys_d
 }
 
 
-static int xenoprof_resume(struct sys_device * dev)
+static void xenoprof_resume(void)
 {
 	if (xenoprof_enabled == 1)
 		xenoprof_start();
-	return 0;
 }
 
 
-static struct sysdev_class oprofile_sysclass = {
-	.name		= "oprofile",
+static struct syscore_ops oprofile_syscore_ops = {
 	.resume		= xenoprof_resume,
 	.suspend	= xenoprof_suspend
 };
 
 
-static struct sys_device device_oprofile = {
-	.id	= 0,
-	.cls	= &oprofile_sysclass,
-};
-
-
 static int __init init_driverfs(void)
 {
-	int error;
-	if (!(error = sysdev_class_register(&oprofile_sysclass)))
-		error = sysdev_register(&device_oprofile);
-	return error;
+	register_syscore_ops(&oprofile_syscore_ops);
+	return 0;
 }
 
 
 static void exit_driverfs(void)
 {
-	sysdev_unregister(&device_oprofile);
-	sysdev_class_unregister(&oprofile_sysclass);
+	unregister_syscore_ops(&oprofile_syscore_ops);
 }
 
 #else
 #define init_driverfs() do { } while (0)
 #define exit_driverfs() do { } while (0)
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
 
 static unsigned long long oprofile_samples;
 static unsigned long long p_oprofile_samples;
--- head.orig/include/xen/balloon.h	2011-01-31 18:07:35.000000000 +0100
+++ head/include/xen/balloon.h	2011-04-13 17:01:31.000000000 +0200
@@ -56,6 +56,31 @@ void balloon_release_driver_page(struct 
 extern spinlock_t balloon_lock;
 #define balloon_lock(__flags)   spin_lock_irqsave(&balloon_lock, __flags)
 #define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags)
-#endif
+
+#else /* CONFIG_PARAVIRT_XEN */
+
+#define RETRY_UNLIMITED	0
+
+struct balloon_stats {
+	/* We aim for 'current allocation' == 'target allocation'. */
+	unsigned long current_pages;
+	unsigned long target_pages;
+	/* Number of pages in high- and low-memory balloons. */
+	unsigned long balloon_low;
+	unsigned long balloon_high;
+	unsigned long schedule_delay;
+	unsigned long max_schedule_delay;
+	unsigned long retry_count;
+	unsigned long max_retry_count;
+};
+
+extern struct balloon_stats balloon_stats;
+
+void balloon_set_new_target(unsigned long target);
+
+int alloc_xenballooned_pages(int nr_pages, struct page** pages);
+void free_xenballooned_pages(int nr_pages, struct page** pages);
+
+#endif /* CONFIG_PARAVIRT_XEN */
 
 #endif /* __XEN_BALLOON_H__ */
--- head.orig/include/xen/interface/io/blkif.h	2012-04-04 08:57:09.000000000 +0200
+++ head/include/xen/interface/io/blkif.h	2012-04-04 10:26:18.000000000 +0200
@@ -431,8 +431,17 @@ struct blkif_request {
     uint8_t        nr_segments;  /* number of segments                   */
     blkif_vdev_t   handle;       /* only for read/write requests         */
     uint64_t       id;           /* private guest value, echoed in resp  */
+#if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
     blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
     struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+#else
+    union {
+        struct blkif_request_rw {
+            blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+            struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+        } rw;
+    } u;
+#endif
 };
 typedef struct blkif_request blkif_request_t;
 
--- head.orig/include/xen/interface/io/netif.h	2011-01-31 17:49:31.000000000 +0100
+++ head/include/xen/interface/io/netif.h	2011-04-13 15:41:23.000000000 +0200
@@ -50,20 +50,20 @@
  */
 
 /* Protocol checksum field is blank in the packet (hardware offload)? */
-#define _NETTXF_csum_blank     (0)
-#define  NETTXF_csum_blank     (1U<<_NETTXF_csum_blank)
+#define _XEN_NETTXF_csum_blank		(0)
+#define  XEN_NETTXF_csum_blank		(1U<<_XEN_NETTXF_csum_blank)
 
 /* Packet data has been validated against protocol checksum. */
-#define _NETTXF_data_validated (1)
-#define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
+#define _XEN_NETTXF_data_validated	(1)
+#define  XEN_NETTXF_data_validated	(1U<<_XEN_NETTXF_data_validated)
 
 /* Packet continues in the next request descriptor. */
-#define _NETTXF_more_data      (2)
-#define  NETTXF_more_data      (1U<<_NETTXF_more_data)
+#define _XEN_NETTXF_more_data		(2)
+#define  XEN_NETTXF_more_data		(1U<<_XEN_NETTXF_more_data)
 
 /* Packet to be followed by extra descriptor(s). */
-#define _NETTXF_extra_info     (3)
-#define  NETTXF_extra_info     (1U<<_NETTXF_extra_info)
+#define _XEN_NETTXF_extra_info		(3)
+#define  XEN_NETTXF_extra_info		(1U<<_XEN_NETTXF_extra_info)
 
 struct netif_tx_request {
     grant_ref_t gref;      /* Reference to buffer page */
@@ -160,20 +160,24 @@ struct netif_rx_request {
 typedef struct netif_rx_request netif_rx_request_t;
 
 /* Packet data has been validated against protocol checksum. */
-#define _NETRXF_data_validated (0)
-#define  NETRXF_data_validated (1U<<_NETRXF_data_validated)
+#define _XEN_NETRXF_data_validated	(0)
+#define  XEN_NETRXF_data_validated	(1U<<_XEN_NETRXF_data_validated)
 
 /* Protocol checksum field is blank in the packet (hardware offload)? */
-#define _NETRXF_csum_blank     (1)
-#define  NETRXF_csum_blank     (1U<<_NETRXF_csum_blank)
+#define _XEN_NETRXF_csum_blank		(1)
+#define  XEN_NETRXF_csum_blank		(1U<<_XEN_NETRXF_csum_blank)
 
 /* Packet continues in the next request descriptor. */
-#define _NETRXF_more_data      (2)
-#define  NETRXF_more_data      (1U<<_NETRXF_more_data)
+#define _XEN_NETRXF_more_data		(2)
+#define  XEN_NETRXF_more_data		(1U<<_XEN_NETRXF_more_data)
 
 /* Packet to be followed by extra descriptor(s). */
-#define _NETRXF_extra_info     (3)
-#define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)
+#define _XEN_NETRXF_extra_info		(3)
+#define  XEN_NETRXF_extra_info		(1U<<_XEN_NETRXF_extra_info)
+
+/* GSO Prefix descriptor. */
+#define _XEN_NETRXF_gso_prefix		(4)
+#define  XEN_NETRXF_gso_prefix		(1U<<_XEN_NETRXF_gso_prefix)
 
 struct netif_rx_response {
     uint16_t id;
@@ -204,10 +208,10 @@ DEFINE_RING_TYPES(xen_netif_rx,
 #define xen_netif_extra_info netif_extra_info
 #endif
 
-#define NETIF_RSP_DROPPED         -2
-#define NETIF_RSP_ERROR           -1
-#define NETIF_RSP_OKAY             0
+#define XEN_NETIF_RSP_DROPPED	-2
+#define XEN_NETIF_RSP_ERROR	-1
+#define XEN_NETIF_RSP_OKAY	 0
 /* No response: used for auxiliary requests (e.g., netif_tx_extra). */
-#define NETIF_RSP_NULL             1
+#define XEN_NETIF_RSP_NULL	 1
 
 #endif
--- head.orig/include/xen/public/gntdev.h	2011-02-03 13:52:28.000000000 +0100
+++ head/include/xen/public/gntdev.h	2011-04-13 15:21:38.000000000 +0200
@@ -116,4 +116,35 @@ struct ioctl_gntdev_set_max_grants {
 	uint32_t count;
 };
 
+/*
+ * Sets up an unmap notification within the page, so that the other side can do
+ * cleanup if this side crashes. Required to implement cross-domain robust
+ * mutexes or close notification on communication channels.
+ *
+ * Each mapped page only supports one notification; multiple calls referring to
+ * the same page overwrite the previous notification. You must clear the
+ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
+ * to occur.
+ */
+#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
+_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
+struct ioctl_gntdev_unmap_notify {
+	/* IN parameters */
+	/* Offset in the file descriptor for a byte within the page (same as
+	 * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
+	 * be cleared. Otherwise, it can be any byte in the page whose
+	 * notification we are adjusting.
+	 */
+	uint64_t index;
+	/* Action(s) to take on unmap */
+	uint32_t action;
+	/* Event channel to notify */
+	uint32_t event_channel_port;
+};
+
+/* Clear (set to zero) the byte specified by index */
+#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
+/* Send an interrupt on the indicated event channel */
+#define UNMAP_NOTIFY_SEND_EVENT 0x2
+
 #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
--- head.orig/include/xen/xenbus.h	2011-12-21 11:17:37.000000000 +0100
+++ head/include/xen/xenbus.h	2011-04-13 15:43:04.000000000 +0200
@@ -105,10 +105,8 @@ struct xenbus_driver {
 	void (*otherend_changed)(struct xenbus_device *dev,
 				 enum xenbus_state backend_state);
 	int (*remove)(struct xenbus_device *dev);
-#if !defined(CONFIG_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H)
-	int (*suspend)(struct xenbus_device *dev, pm_message_t state);
-#else
 	int (*suspend)(struct xenbus_device *dev);
+#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
 	int (*suspend_cancel)(struct xenbus_device *dev);
 #endif
 	int (*resume)(struct xenbus_device *dev);