Tree - kernel/kernel-source - Pagure for openSUSE

kernel / kernel-source

Source
Stats
Files

Commit: dcee3975936b33996dd656617da8e1879ef4dcbd
Blob Blame History Raw
From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.37
Patch-mainline: 2.6.37

 This patch contains the differences between 2.6.36 and 2.6.37.

Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.kernel.org/patch-2.6.37" by xen-port-patches.py

--- head.orig/arch/x86/Kconfig	2014-04-30 10:47:53.000000000 +0200
+++ head/arch/x86/Kconfig	2014-06-26 15:01:39.000000000 +0200
@@ -1947,7 +1947,6 @@ config ARCH_ENABLE_HUGEPAGE_MIGRATION
 	depends on X86_64 && HUGETLB_PAGE && MIGRATION
 
 menu "Power management and ACPI options"
-	depends on !XEN_UNPRIVILEGED_GUEST
 
 config ARCH_HIBERNATION_HEADER
 	def_bool y
@@ -1955,6 +1954,8 @@ config ARCH_HIBERNATION_HEADER
 
 source "kernel/power/Kconfig"
 
+if !XEN_UNPRIVILEGED_GUEST
+
 source "drivers/acpi/Kconfig"
 
 source "drivers/sfi/Kconfig"
@@ -2091,6 +2092,8 @@ source "drivers/cpuidle/Kconfig"
 
 source "drivers/idle/Kconfig"
 
+endif # !XEN_UNPRIVILEGED_GUEST
+
 endmenu
 
 
@@ -2169,7 +2172,7 @@ config PCI_OLPC
 
 config PCI_XEN
 	def_bool y
-	depends on PCI && XEN
+	depends on PCI && PARAVIRT_XEN
 	select SWIOTLB_XEN
 
 config PCI_DOMAINS
@@ -2193,21 +2196,6 @@ config PCI_CNB20LE_QUIRK
 
 	  You should say N unless you know you need this.
 
-config XEN_PCIDEV_FRONTEND
-	def_bool y
-	prompt "Xen PCI Frontend" if X86_64
-	depends on PCI && XEN && (PCI_GOXEN_FE || PCI_GOANY || X86_64)
-	select HOTPLUG
-	help
-	  The PCI device frontend driver allows the kernel to import arbitrary
-	  PCI devices from a PCI backend to support PCI driver domains.
-
-config XEN_PCIDEV_FE_DEBUG
-	bool "Xen PCI Frontend Debugging"
-	depends on XEN_PCIDEV_FRONTEND
-	help
-	  Enables some debug statements within the PCI Frontend.
-
 source "drivers/pci/pcie/Kconfig"
 
 source "drivers/pci/Kconfig"
--- head.orig/arch/x86/include/asm/hw_irq.h	2013-08-09 14:59:34.000000000 +0200
+++ head/arch/x86/include/asm/hw_irq.h	2013-09-26 13:00:29.000000000 +0200
@@ -194,6 +194,7 @@ extern irqreturn_t smp_reschedule_interr
 extern irqreturn_t smp_call_function_interrupt(int, void *);
 extern irqreturn_t smp_call_function_single_interrupt(int, void *);
 extern irqreturn_t smp_reboot_interrupt(int, void *);
+extern irqreturn_t smp_irq_work_interrupt(int, void *);
 #endif
 #endif
 
--- head.orig/arch/x86/include/asm/io.h	2014-06-08 20:19:54.000000000 +0200
+++ head/arch/x86/include/asm/io.h	2014-04-30 10:50:21.000000000 +0200
@@ -319,7 +319,7 @@ extern void __iomem *ioremap_wc(resource
 
 extern bool is_early_ioremap_ptep(pte_t *ptep);
 
-#ifdef CONFIG_XEN
+#ifdef CONFIG_PARAVIRT_XEN
 #include <xen/xen.h>
 struct bio_vec;
 
@@ -329,7 +329,7 @@ extern bool xen_biovec_phys_mergeable(co
 #define BIOVEC_PHYS_MERGEABLE(vec1, vec2)				\
 	(__BIOVEC_PHYS_MERGEABLE(vec1, vec2) &&				\
 	 (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
-#endif	/* CONFIG_XEN */
+#endif	/* CONFIG_PARAVIRT_XEN */
 
 #define IO_SPACE_LIMIT 0xffff
 
--- head.orig/arch/x86/include/mach-xen/asm/fixmap.h	2011-02-01 15:03:03.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/fixmap.h	2011-02-01 15:09:47.000000000 +0100
@@ -217,5 +217,20 @@ static inline unsigned long virt_to_fix(
 	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
 	return __virt_to_fix(vaddr);
 }
+
+/* Return an pointer with offset calculated */
+static __always_inline unsigned long
+__set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
+{
+	__set_fixmap(idx, phys, flags);
+	return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1));
+}
+
+#define set_fixmap_offset(idx, phys)			\
+	__set_fixmap_offset(idx, phys, PAGE_KERNEL)
+
+#define set_fixmap_offset_nocache(idx, phys)			\
+	__set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE)
+
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASM_X86_FIXMAP_H */
--- head.orig/arch/x86/include/mach-xen/asm/highmem.h	2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/highmem.h	2011-02-01 15:09:47.000000000 +0100
@@ -58,15 +58,16 @@ extern void kunmap_high(struct page *pag
 
 void *kmap(struct page *page);
 void kunmap(struct page *page);
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
-void *kmap_atomic(struct page *page, enum km_type type);
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
-void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+
+void *kmap_atomic_prot(struct page *page, pgprot_t prot);
+void *__kmap_atomic(struct page *page);
+void __kunmap_atomic(void *kvaddr);
+void *kmap_atomic_pfn(unsigned long pfn);
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
 struct page *kmap_atomic_to_page(void *ptr);
 
-#define kmap_atomic_pte(page, type) \
-	kmap_atomic_prot(page, type, \
+#define kmap_atomic_pte(page) \
+	kmap_atomic_prot(page, \
 	                 PagePinned(page) ? PAGE_KERNEL_RO : kmap_prot)
 
 #define flush_cache_kmaps()	do { } while (0)
--- head.orig/arch/x86/include/mach-xen/asm/io.h	2011-02-01 15:03:03.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/io.h	2011-02-01 15:09:47.000000000 +0100
@@ -212,6 +212,7 @@ static inline void __iomem *ioremap(reso
 
 extern void iounmap(volatile void __iomem *addr);
 
+extern void set_iounmap_nonlazy(void);
 
 #ifdef __KERNEL__
 
@@ -353,6 +354,7 @@ extern void __iomem *early_memremap(reso
 				    unsigned long size);
 extern void early_iounmap(void __iomem *addr, unsigned long size);
 extern void fixup_early_ioremap(void);
+extern bool is_early_ioremap_ptep(pte_t *ptep);
 
 #define IO_SPACE_LIMIT 0xffff
 
--- head.orig/arch/x86/include/mach-xen/asm/irq_vectors.h	2011-02-15 17:49:16.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/irq_vectors.h	2011-02-15 17:50:13.000000000 +0100
@@ -13,7 +13,12 @@
 #define NMI_VECTOR			0x02
 #define CALL_FUNC_SINGLE_VECTOR		3
 #define REBOOT_VECTOR			4
+#ifdef CONFIG_IRQ_WORK
+#define IRQ_WORK_VECTOR			5
+#define NR_IPIS				6
+#else
 #define NR_IPIS				5
+#endif
 
 /*
  * The maximum number of vectors supported by i386 processors
--- head.orig/arch/x86/include/mach-xen/asm/irqflags.h	2011-02-01 14:54:13.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/irqflags.h	2012-05-24 09:12:20.000000000 +0200
@@ -36,19 +36,19 @@ void xen_safe_halt(void);
 
 void xen_halt(void);
 
-#define __raw_local_save_flags() xen_save_fl()
+#define arch_local_save_flags() xen_save_fl()
 
-#define raw_local_irq_restore(flags) xen_restore_fl(flags)
+#define arch_local_irq_restore(flags) xen_restore_fl(flags)
 
-#define raw_local_irq_disable() xen_irq_disable()
+#define arch_local_irq_disable() xen_irq_disable()
 
-#define raw_local_irq_enable() xen_irq_enable()
+#define arch_local_irq_enable() xen_irq_enable()
 
 /*
  * Used in the idle loop; sti takes one instruction cycle
  * to complete:
  */
-static inline void raw_safe_halt(void)
+static inline void arch_safe_halt(void)
 {
 	xen_safe_halt();
 }
@@ -65,11 +65,11 @@ static inline void halt(void)
 /*
  * For spinlocks, etc:
  */
-#define __raw_local_irq_save()						\
+#define arch_local_irq_save()						\
 ({									\
-	unsigned long flags = __raw_local_save_flags();			\
+	unsigned long flags = arch_local_save_flags();			\
 									\
-	raw_local_irq_disable();					\
+	arch_local_irq_disable();					\
 									\
 	flags;								\
 })
@@ -129,22 +129,16 @@ sysexit_ecrit:	/**** END OF SYSEXIT CRIT
 #endif /* __ASSEMBLY__ */
 
 #ifndef __ASSEMBLY__
-#define raw_local_save_flags(flags)				\
-	do { (flags) = __raw_local_save_flags(); } while (0)
-
-#define raw_local_irq_save(flags)				\
-	do { (flags) = __raw_local_irq_save(); } while (0)
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
+static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
 	return (flags != 0);
 }
 
-#define raw_irqs_disabled()						\
+#define arch_irqs_disabled()						\
 ({									\
-	unsigned long flags = __raw_local_save_flags();			\
+	unsigned long flags = arch_local_save_flags();			\
 									\
-	raw_irqs_disabled_flags(flags);					\
+	arch_irqs_disabled_flags(flags);				\
 })
 
 #else
--- head.orig/arch/x86/include/mach-xen/asm/pci.h	2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pci.h	2011-02-01 15:09:47.000000000 +0100
@@ -7,6 +7,7 @@
 #include <linux/string.h>
 #include <asm/scatterlist.h>
 #include <asm/io.h>
+#include <asm/x86_init.h>
 
 #ifdef __KERNEL__
 
@@ -100,9 +101,36 @@ static inline void early_quirks(void) { 
 
 extern void pci_iommu_alloc(void);
 
-/* MSI arch hooks */
-#define arch_setup_msi_irqs arch_setup_msi_irqs
-#define arch_teardown_msi_irqs arch_teardown_msi_irqs
+#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
+/* MSI arch specific hooks */
+static inline int x86_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	return x86_msi.setup_msi_irqs(dev, nvec, type);
+}
+
+static inline void x86_teardown_msi_irqs(struct pci_dev *dev)
+{
+	x86_msi.teardown_msi_irqs(dev);
+}
+
+static inline void x86_teardown_msi_irq(unsigned int irq)
+{
+	x86_msi.teardown_msi_irq(irq);
+}
+#define arch_setup_msi_irqs x86_setup_msi_irqs
+#define arch_teardown_msi_irqs x86_teardown_msi_irqs
+#define arch_teardown_msi_irq x86_teardown_msi_irq
+/* implemented in arch/x86/kernel/apic/io_apic. */
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
+void native_teardown_msi_irq(unsigned int irq);
+/* default to the implementation in drivers/lib/msi.c */
+#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
+void default_teardown_msi_irqs(struct pci_dev *dev);
+#else
+#define native_setup_msi_irqs		NULL
+#define native_teardown_msi_irq		NULL
+#define default_teardown_msi_irqs	NULL
+#endif
 
 #define PCI_DMA_BUS_IS_PHYS 0
 
--- head.orig/arch/x86/include/mach-xen/asm/pgtable.h	2011-03-23 10:00:38.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable.h	2011-03-23 10:02:30.000000000 +0100
@@ -28,6 +28,8 @@ extern unsigned long empty_zero_page[PAG
 extern spinlock_t pgd_lock;
 extern struct list_head pgd_list;
 
+extern struct mm_struct *pgd_page_get_mm(struct page *page);
+
 #define set_pte(ptep, pte)		xen_set_pte(ptep, pte)
 #define set_pte_at(mm, addr, ptep, pte)	xen_set_pte_at(mm, addr, ptep, pte)
 
@@ -634,6 +636,8 @@ static inline void ptep_set_wrprotect(st
 		set_pte_at(mm, addr, ptep, pte_wrprotect(pte));
 }
 
+#define flush_tlb_fix_spurious_fault(vma, address)
+
 /*
  * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
  *
--- head.orig/arch/x86/include/mach-xen/asm/pgtable_32.h	2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable_32.h	2011-02-01 15:09:47.000000000 +0100
@@ -25,7 +25,7 @@
 struct vm_area_struct;
 
 extern pgd_t *swapper_pg_dir;
-extern pgd_t trampoline_pg_dir[1024];
+extern pgd_t initial_page_table[1024];
 
 static inline void pgtable_cache_init(void) { }
 static inline void check_pgt_cache(void) { }
@@ -48,24 +48,14 @@ extern void set_pmd_pfn(unsigned long, u
 #endif
 
 #if defined(CONFIG_HIGHPTE)
-#define __KM_PTE			\
-	(in_nmi() ? KM_NMI_PTE : 	\
-	 in_irq() ? KM_IRQ_PTE :	\
-	 KM_PTE0)
 #define pte_offset_map(dir, address)					\
-	((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) +		\
+	((pte_t *)kmap_atomic_pte(pmd_page(*(dir))) +		\
 	 pte_index((address)))
-#define pte_offset_map_nested(dir, address)				\
-	((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) +		\
-	 pte_index((address)))
-#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
-#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
+#define pte_unmap(pte) kunmap_atomic((pte))
 #else
 #define pte_offset_map(dir, address)					\
 	((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address)))
-#define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address))
 #define pte_unmap(pte) do { } while (0)
-#define pte_unmap_nested(pte) do { } while (0)
 #endif
 
 /* Clear a kernel PTE and flush it from the TLB */
--- head.orig/arch/x86/include/mach-xen/asm/pgtable_64.h	2011-03-23 10:02:08.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable_64.h	2011-03-23 10:02:27.000000000 +0100
@@ -104,6 +104,8 @@ static inline void xen_pgd_clear(pgd_t *
 
 #define __pte_mfn(_pte) (((_pte).pte & PTE_PFN_MASK) >> PAGE_SHIFT)
 
+extern void sync_global_pgds(unsigned long start, unsigned long end);
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
@@ -127,9 +129,7 @@ static inline int pgd_large(pgd_t pgd) {
 
 /* x86-64 always has all page tables mapped. */
 #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
-#define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address))
 #define pte_unmap(pte) ((void)(pte))/* NOP */
-#define pte_unmap_nested(pte) ((void)(pte)) /* NOP */
 
 #define update_mmu_cache(vma, address, ptep) do { } while (0)
 
--- head.orig/arch/x86/include/mach-xen/asm/processor.h	2012-05-23 13:36:57.000000000 +0200
+++ head/arch/x86/include/mach-xen/asm/processor.h	2011-03-03 16:47:48.000000000 +0100
@@ -120,6 +120,8 @@ struct cpuinfo_x86 {
 	u16			phys_proc_id;
 	/* Core id: */
 	u16			cpu_core_id;
+	/* Compute unit id */
+	u8			compute_unit_id;
 #endif
 #ifdef CONFIG_SMP
 	/* Index into per_cpu list: */
@@ -556,7 +558,7 @@ extern unsigned long		mmu_cr4_features;
 
 static inline void set_in_cr4(unsigned long mask)
 {
-	unsigned cr4;
+	unsigned long cr4;
 
 	mmu_cr4_features |= mask;
 	cr4 = read_cr4();
@@ -566,7 +568,7 @@ static inline void set_in_cr4(unsigned l
 
 static inline void clear_in_cr4(unsigned long mask)
 {
-	unsigned cr4;
+	unsigned long cr4;
 
 	mmu_cr4_features &= ~mask;
 	cr4 = read_cr4();
@@ -718,31 +720,6 @@ extern unsigned long		idle_halt;
 extern unsigned long		idle_nomwait;
 extern bool			c1e_detected;
 
-#ifndef CONFIG_XEN
-/*
- * on systems with caches, caches must be flashed as the absolute
- * last instruction before going into a suspended halt.  Otherwise,
- * dirty data can linger in the cache and become stale on resume,
- * leading to strange errors.
- *
- * perform a variety of operations to guarantee that the compiler
- * will not reorder instructions.  wbinvd itself is serializing
- * so the processor will not reorder.
- *
- * Systems without cache can just go into halt.
- */
-static inline void wbinvd_halt(void)
-{
-	mb();
-	/* check for clflush to determine if wbinvd is legal */
-	if (cpu_has_clflush)
-		asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory");
-	else
-		while (1)
-			halt();
-}
-#endif
-
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);
 
--- head.orig/arch/x86/include/mach-xen/asm/smp.h	2011-03-03 16:10:16.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/smp.h	2011-03-03 16:12:15.000000000 +0100
@@ -57,7 +57,7 @@ struct smp_ops {
 	void (*smp_prepare_cpus)(unsigned max_cpus);
 	void (*smp_cpus_done)(unsigned max_cpus);
 
-	void (*smp_send_stop)(void);
+	void (*stop_other_cpus)(int wait);
 	void (*smp_send_reschedule)(int cpu);
 
 	int (*cpu_up)(unsigned cpu);
@@ -76,7 +76,12 @@ extern struct smp_ops smp_ops;
 
 static inline void smp_send_stop(void)
 {
-	smp_ops.smp_send_stop();
+	smp_ops.stop_other_cpus(0);
+}
+
+static inline void stop_other_cpus(void)
+{
+	smp_ops.stop_other_cpus(1);
 }
 
 static inline void smp_prepare_boot_cpu(void)
@@ -148,12 +153,16 @@ void smp_store_cpu_info(int id);
 
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
-void xen_smp_send_stop(void);
+void xen_stop_other_cpus(int wait);
 void xen_smp_send_reschedule(int cpu);
 void xen_send_call_func_ipi(const struct cpumask *mask);
 void xen_send_call_func_single_ipi(int cpu);
 
-#define smp_send_stop		xen_smp_send_stop
+static inline void smp_send_stop(void)
+{
+	xen_stop_other_cpus(0);
+}
+
 #define smp_send_reschedule	xen_smp_send_reschedule
 #define arch_send_call_function_single_ipi	xen_send_call_func_single_ipi
 #define arch_send_call_function_ipi_mask	xen_send_call_func_ipi
--- head.orig/arch/x86/include/mach-xen/asm/spinlock.h	2012-04-03 08:28:39.000000000 +0200
+++ head/arch/x86/include/mach-xen/asm/spinlock.h	2012-01-31 18:08:35.000000000 +0100
@@ -196,16 +196,16 @@ static __always_inline int __ticket_spin
 static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned int token, count;
-	unsigned int flags = __raw_local_irq_save();
+	unsigned int flags = arch_local_irq_save();
 	bool free;
 
 	__ticket_spin_lock_preamble;
 	if (likely(free)) {
-		raw_local_irq_restore(flags);
+		arch_local_irq_restore(flags);
 		return;
 	}
 	token = xen_spin_adjust(lock, token);
-	raw_local_irq_restore(flags);
+	arch_local_irq_restore(flags);
 	do {
 		count = 1 << 12;
 		__ticket_spin_lock_body;
--- head.orig/arch/x86/include/mach-xen/asm/swiotlb.h	2011-02-01 14:55:46.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/swiotlb.h	2011-02-01 15:09:47.000000000 +0100
@@ -1,6 +1,4 @@
 #include_next <asm/swiotlb.h>
 
-#define pci_swiotlb_detect() 1
-
 dma_addr_t swiotlb_map_single_phys(struct device *, phys_addr_t, size_t size,
 				   int dir);
--- head.orig/arch/x86/include/mach-xen/asm/tlbflush.h	2013-09-26 14:51:44.000000000 +0200
+++ head/arch/x86/include/mach-xen/asm/tlbflush.h	2013-09-26 14:52:05.000000000 +0200
@@ -119,6 +119,4 @@ static inline void flush_tlb_kernel_rang
 	flush_tlb_all();
 }
 
-extern void zap_low_mappings(bool early);
-
 #endif /* _ASM_X86_TLBFLUSH_H */
--- head.orig/arch/x86/kernel/Makefile	2013-12-02 17:51:11.000000000 +0100
+++ head/arch/x86/kernel/Makefile	2013-12-02 17:57:49.000000000 +0100
@@ -121,7 +121,6 @@ ifeq ($(CONFIG_X86_64),y)
 	obj-y				+= vsmp_64.o
 endif
 
-disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o i8237.o i8253.o \
-	i8259.o irqinit.o pci-swiotlb.o reboot.o smpboot.o tsc.o tsc_sync.o \
-	uv_%.o vsmp_64.o
+disabled-obj-$(CONFIG_XEN) := crash.o early-quirks.o i8237.o i8253.o i8259.o \
+	irqinit.o pci-swiotlb.o reboot.o smpboot.o tsc.o tsc_sync.o vsmp_64.o
 disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += probe_roms_32.o
--- head.orig/arch/x86/kernel/apic/io_apic-xen.c	2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/kernel/apic/io_apic-xen.c	2011-02-01 15:09:47.000000000 +0100
@@ -144,13 +144,9 @@ struct irq_pin_list {
 	struct irq_pin_list *next;
 };
 
-static struct irq_pin_list *get_one_free_irq_2_pin(int node)
+static struct irq_pin_list *alloc_irq_pin_list(int node)
 {
-	struct irq_pin_list *pin;
-
-	pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
-
-	return pin;
+	return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
 }
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -163,10 +159,7 @@ static struct irq_cfg irq_cfgx[NR_IRQS];
 int __init arch_early_irq_init(void)
 {
 	struct irq_cfg *cfg;
-	struct irq_desc *desc;
-	int count;
-	int node;
-	int i;
+	int count, node, i;
 
 	if (!legacy_pic->nr_legacy_irqs) {
 		nr_irqs_gsi = 0;
@@ -175,13 +168,15 @@ int __init arch_early_irq_init(void)
 
 	cfg = irq_cfgx;
 	count = ARRAY_SIZE(irq_cfgx);
-	node= cpu_to_node(boot_cpu_id);
+	node = cpu_to_node(0);
+
+	/* Make sure the legacy interrupts are marked in the bitmap */
+	irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
 
 	for (i = 0; i < count; i++) {
-		desc = irq_to_desc(i);
-		desc->chip_data = &cfg[i];
-		zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
-		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
+		set_irq_chip_data(i, &cfg[i]);
+		zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
+		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
 		/*
 		 * For legacy IRQ's, start with assigning irq0 to irq15 to
 		 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
@@ -196,170 +191,88 @@ int __init arch_early_irq_init(void)
 }
 
 #ifdef CONFIG_SPARSE_IRQ
-struct irq_cfg *irq_cfg(unsigned int irq)
+static struct irq_cfg *irq_cfg(unsigned int irq)
 {
-	struct irq_cfg *cfg = NULL;
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-	if (desc)
-		cfg = desc->chip_data;
-
-	return cfg;
+	return get_irq_chip_data(irq);
 }
 
-static struct irq_cfg *get_one_free_irq_cfg(int node)
+static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
 {
 	struct irq_cfg *cfg;
 
-	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
-	if (cfg) {
-		if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
-			kfree(cfg);
-			cfg = NULL;
-		} else if (!zalloc_cpumask_var_node(&cfg->old_domain,
-							  GFP_ATOMIC, node)) {
-			free_cpumask_var(cfg->domain);
-			kfree(cfg);
-			cfg = NULL;
-		}
-	}
-
+	cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
+	if (!cfg)
+		return NULL;
+	if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
+		goto out_cfg;
+	if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
+		goto out_domain;
 	return cfg;
+out_domain:
+	free_cpumask_var(cfg->domain);
+out_cfg:
+	kfree(cfg);
+	return NULL;
 }
 
-int arch_init_chip_data(struct irq_desc *desc, int node)
-{
-	struct irq_cfg *cfg;
-
-	cfg = desc->chip_data;
-	if (!cfg) {
-		desc->chip_data = get_one_free_irq_cfg(node);
-		if (!desc->chip_data) {
-			printk(KERN_ERR "can not alloc irq_cfg\n");
-			BUG_ON(1);
-		}
-	}
-
-	return 0;
-}
-
-/* for move_irq_desc */
-static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
+static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
 {
-	struct irq_pin_list *old_entry, *head, *tail, *entry;
-
-	cfg->irq_2_pin = NULL;
-	old_entry = old_cfg->irq_2_pin;
-	if (!old_entry)
-		return;
-
-	entry = get_one_free_irq_2_pin(node);
-	if (!entry)
+	if (!cfg)
 		return;
+	set_irq_chip_data(at, NULL);
+	free_cpumask_var(cfg->domain);
+	free_cpumask_var(cfg->old_domain);
+	kfree(cfg);
+}
 
-	entry->apic	= old_entry->apic;
-	entry->pin	= old_entry->pin;
-	head		= entry;
-	tail		= entry;
-	old_entry	= old_entry->next;
-	while (old_entry) {
-		entry = get_one_free_irq_2_pin(node);
-		if (!entry) {
-			entry = head;
-			while (entry) {
-				head = entry->next;
-				kfree(entry);
-				entry = head;
-			}
-			/* still use the old one */
-			return;
-		}
-		entry->apic	= old_entry->apic;
-		entry->pin	= old_entry->pin;
-		tail->next	= entry;
-		tail		= entry;
-		old_entry	= old_entry->next;
-	}
+#else
 
-	tail->next = NULL;
-	cfg->irq_2_pin = head;
+struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	return irq < nr_irqs ? irq_cfgx + irq : NULL;
 }
 
-static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
+static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
 {
-	struct irq_pin_list *entry, *next;
-
-	if (old_cfg->irq_2_pin == cfg->irq_2_pin)
-		return;
+	return irq_cfgx + irq;
+}
 
-	entry = old_cfg->irq_2_pin;
+static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { }
 
-	while (entry) {
-		next = entry->next;
-		kfree(entry);
-		entry = next;
-	}
-	old_cfg->irq_2_pin = NULL;
-}
+#endif
 
-void arch_init_copy_chip_data(struct irq_desc *old_desc,
-				 struct irq_desc *desc, int node)
+static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
 {
+	int res = irq_alloc_desc_at(at, node);
 	struct irq_cfg *cfg;
-	struct irq_cfg *old_cfg;
-
-	cfg = get_one_free_irq_cfg(node);
-
-	if (!cfg)
-		return;
-
-	desc->chip_data = cfg;
 
-	old_cfg = old_desc->chip_data;
-
-	cfg->vector = old_cfg->vector;
-	cfg->move_in_progress = old_cfg->move_in_progress;
-	cpumask_copy(cfg->domain, old_cfg->domain);
-	cpumask_copy(cfg->old_domain, old_cfg->old_domain);
-
-	init_copy_irq_2_pin(old_cfg, cfg, node);
-}
+	if (res < 0) {
+		if (res != -EEXIST)
+			return NULL;
+		cfg = get_irq_chip_data(at);
+		if (cfg)
+			return cfg;
+	}
 
-static void free_irq_cfg(struct irq_cfg *cfg)
-{
-	free_cpumask_var(cfg->domain);
-	free_cpumask_var(cfg->old_domain);
-	kfree(cfg);
+	cfg = alloc_irq_cfg(at, node);
+	if (cfg)
+		set_irq_chip_data(at, cfg);
+	else
+		irq_free_desc(at);
+	return cfg;
 }
 
-void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+static int alloc_irq_from(unsigned int from, int node)
 {
-	struct irq_cfg *old_cfg, *cfg;
-
-	old_cfg = old_desc->chip_data;
-	cfg = desc->chip_data;
-
-	if (old_cfg == cfg)
-		return;
-
-	if (old_cfg) {
-		free_irq_2_pin(old_cfg, cfg);
-		free_irq_cfg(old_cfg);
-		old_desc->chip_data = NULL;
-	}
+	return irq_alloc_desc_from(from, node);
 }
-/* end for move_irq_desc */
 
-#else
-struct irq_cfg *irq_cfg(unsigned int irq)
+static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
 {
-	return irq < nr_irqs ? irq_cfgx + irq : NULL;
+	free_irq_cfg(at, cfg);
+	irq_free_desc(at);
 }
 
-#endif
-
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -492,7 +405,7 @@ __ioapic_write_entry(int apic, int pin, 
 	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
 }
 
-void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
 	unsigned long flags;
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
@@ -523,7 +436,7 @@ static void ioapic_mask_entry(int apic, 
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
 static int
-add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
+__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
 	struct irq_pin_list **last, *entry;
 
@@ -535,7 +448,7 @@ add_pin_to_irq_node_nopanic(struct irq_c
 		last = &entry->next;
 	}
 
-	entry = get_one_free_irq_2_pin(node);
+	entry = alloc_irq_pin_list(node);
 	if (!entry) {
 		printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
 				node, apic, pin);
@@ -550,7 +463,7 @@ add_pin_to_irq_node_nopanic(struct irq_c
 
 static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
-	if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
+	if (__add_pin_to_irq_node(cfg, node, apic, pin))
 		panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
 }
 
@@ -613,11 +526,6 @@ static void __unmask_and_level_IO_APIC_i
 			     IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
 }
 
-static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
-{
-	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
-}
-
 static void io_apic_sync(struct irq_pin_list *entry)
 {
 	/*
@@ -629,44 +537,37 @@ static void io_apic_sync(struct irq_pin_
 	readl(&io_apic->data);
 }
 
-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
+static void mask_ioapic(struct irq_cfg *cfg)
 {
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
 	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
+static void mask_ioapic_irq(struct irq_data *data)
 {
-	struct irq_cfg *cfg = desc->chip_data;
-	unsigned long flags;
-
-	BUG_ON(!cfg);
+	mask_ioapic(data->chip_data);
+}
 
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	__mask_IO_APIC_irq(cfg);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+static void __unmask_ioapic(struct irq_cfg *cfg)
+{
+	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
 }
 
-static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
+static void unmask_ioapic(struct irq_cfg *cfg)
 {
-	struct irq_cfg *cfg = desc->chip_data;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	__unmask_IO_APIC_irq(cfg);
+	__unmask_ioapic(cfg);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-static void mask_IO_APIC_irq(unsigned int irq)
+static void unmask_ioapic_irq(struct irq_data *data)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	mask_IO_APIC_irq_desc(desc);
-}
-static void unmask_IO_APIC_irq(unsigned int irq)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	unmask_IO_APIC_irq_desc(desc);
+	unmask_ioapic(data->chip_data);
 }
 
 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
@@ -693,7 +594,7 @@ static void clear_IO_APIC (void)
 }
 #else
 #define add_pin_to_irq_node(cfg, node, apic, pin)
-#define add_pin_to_irq_node_nopanic(cfg, node, apic, pin) 0
+#define __add_pin_to_irq_node(cfg, node, apic, pin) 0
 #endif /* !CONFIG_XEN */
 
 #ifdef CONFIG_X86_32
@@ -741,14 +642,14 @@ struct IO_APIC_route_entry **alloc_ioapi
 	struct IO_APIC_route_entry **ioapic_entries;
 
 	ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics,
-				GFP_ATOMIC);
+				GFP_KERNEL);
 	if (!ioapic_entries)
 		return 0;
 
 	for (apic = 0; apic < nr_ioapics; apic++) {
 		ioapic_entries[apic] =
 			kzalloc(sizeof(struct IO_APIC_route_entry) *
-				nr_ioapic_registers[apic], GFP_ATOMIC);
+				nr_ioapic_registers[apic], GFP_KERNEL);
 		if (!ioapic_entries[apic])
 			goto nomem;
 	}
@@ -1310,7 +1211,6 @@ void __setup_vector_irq(int cpu)
 	/* Initialize vector_irq on a new cpu */
 	int irq, vector;
 	struct irq_cfg *cfg;
-	struct irq_desc *desc;
 
 	/*
 	 * vector_lock will make sure that we don't run into irq vector
@@ -1319,9 +1219,10 @@ void __setup_vector_irq(int cpu)
 	 */
 	raw_spin_lock(&vector_lock);
 	/* Mark the inuse vectors */
-	for_each_irq_desc(irq, desc) {
-		cfg = desc->chip_data;
-
+	for_each_active_irq(irq) {
+		cfg = get_irq_chip_data(irq);
+		if (!cfg)
+			continue;
 		/*
 		 * If it is a legacy IRQ handled by the legacy PIC, this cpu
 		 * will be part of the irq_cfg's domain.
@@ -1378,17 +1279,17 @@ static inline int IO_APIC_irq_trigger(in
 }
 #endif
 
-static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
+static void ioapic_register_intr(unsigned int irq, unsigned long trigger)
 {
 
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 	    trigger == IOAPIC_LEVEL)
-		desc->status |= IRQ_LEVEL;
+		irq_set_status_flags(irq, IRQ_LEVEL);
 	else
-		desc->status &= ~IRQ_LEVEL;
+		irq_clear_status_flags(irq, IRQ_LEVEL);
 
-	if (irq_remapped(irq)) {
-		desc->status |= IRQ_MOVE_PCNTXT;
+	if (irq_remapped(get_irq_chip_data(irq))) {
+		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
 		if (trigger)
 			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
 						      handle_fasteoi_irq,
@@ -1410,13 +1311,13 @@ static void ioapic_register_intr(int irq
 }
 #else /* !CONFIG_XEN */
 #define __clear_irq_vector(irq, cfg) ((void)0)
-#define ioapic_register_intr(irq, desc, trigger) evtchn_register_pirq(irq)
+#define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq)
 #endif
 
-int setup_ioapic_entry(int apic_id, int irq,
-		       struct IO_APIC_route_entry *entry,
-		       unsigned int destination, int trigger,
-		       int polarity, int vector, int pin)
+static int setup_ioapic_entry(int apic_id, int irq,
+			      struct IO_APIC_route_entry *entry,
+			      unsigned int destination, int trigger,
+			      int polarity, int vector, int pin)
 {
 	/*
 	 * add it to the IO-APIC irq-routing table:
@@ -1438,21 +1339,7 @@ int setup_ioapic_entry(int apic_id, int 
 		if (index < 0)
 			panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
 
-		memset(&irte, 0, sizeof(irte));
-
-		irte.present = 1;
-		irte.dst_mode = apic->irq_dest_mode;
-		/*
-		 * Trigger mode in the IRTE will always be edge, and the
-		 * actual level or edge trigger will be setup in the IO-APIC
-		 * RTE. This will help simplify level triggered irq migration.
-		 * For more details, see the comments above explainig IO-APIC
-		 * irq migration in the presence of interrupt-remapping.
-		 */
-		irte.trigger_mode = 0;
-		irte.dlvry_mode = apic->irq_delivery_mode;
-		irte.vector = vector;
-		irte.dest_id = IRTE_DEST(destination);
+		prepare_irte(&irte, vector, destination);
 
 		/* Set source-id of interrupt request */
 		set_ioapic_sid(&irte, apic_id);
@@ -1489,18 +1376,14 @@ int setup_ioapic_entry(int apic_id, int 
 	return 0;
 }
 
-static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc,
-			      int trigger, int polarity)
+static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
+			     struct irq_cfg *cfg, int trigger, int polarity)
 {
-	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
 	unsigned int dest;
 
 	if (!IO_APIC_IRQ(irq))
 		return;
-
-	cfg = desc->chip_data;
-
 #ifndef CONFIG_XEN
 	/*
 	 * For legacy irqs, cfg->domain starts with cpu 0 for legacy
@@ -1535,10 +1418,10 @@ static void setup_IO_APIC_irq(int apic_i
 		return;
 	}
 
-	ioapic_register_intr(irq, desc, trigger);
+	ioapic_register_intr(irq, trigger);
 #ifndef CONFIG_XEN
 	if (irq < legacy_pic->nr_legacy_irqs)
-		legacy_pic->chip->mask(irq);
+		legacy_pic->mask(irq);
 #endif
 
 	ioapic_write_entry(apic_id, pin, entry);
@@ -1550,11 +1433,9 @@ static struct {
 
 static void __init setup_IO_APIC_irqs(void)
 {
-	int apic_id, pin, idx, irq;
-	int notcon = 0;
-	struct irq_desc *desc;
+	int apic_id, pin, idx, irq, notcon = 0;
+	int node = cpu_to_node(0);
 	struct irq_cfg *cfg;
-	int node = cpu_to_node(boot_cpu_id);
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
@@ -1596,19 +1477,17 @@ static void __init setup_IO_APIC_irqs(vo
 			continue;
 #endif
 
-		desc = irq_to_desc_alloc_node(irq, node);
-		if (!desc) {
-			printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+		cfg = alloc_irq_and_cfg_at(irq, node);
+		if (!cfg)
 			continue;
-		}
-		cfg = desc->chip_data;
+
 		add_pin_to_irq_node(cfg, node, apic_id, pin);
 		/*
 		 * don't mark it in pin_programmed, so later acpi could
 		 * set it correctly when irq < 16
 		 */
-		setup_IO_APIC_irq(apic_id, pin, irq, desc,
-				irq_trigger(idx), irq_polarity(idx));
+		setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx),
+				  irq_polarity(idx));
 	}
 
 	if (notcon)
@@ -1623,9 +1502,7 @@ static void __init setup_IO_APIC_irqs(vo
  */
 void setup_IO_APIC_irq_extra(u32 gsi)
 {
-	int apic_id = 0, pin, idx, irq;
-	int node = cpu_to_node(boot_cpu_id);
-	struct irq_desc *desc;
+	int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
 	struct irq_cfg *cfg;
 
 	/*
@@ -1645,18 +1522,15 @@ void setup_IO_APIC_irq_extra(u32 gsi)
 	if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
 		return;
 #endif
-#ifdef CONFIG_SPARSE_IRQ
-	desc = irq_to_desc(irq);
-	if (desc)
+
+	/* Only handle the non legacy irqs on secondary ioapics */
+	if (apic_id == 0 || irq < NR_IRQS_LEGACY)
 		return;
-#endif
-	desc = irq_to_desc_alloc_node(irq, node);
-	if (!desc) {
-		printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+
+	cfg = alloc_irq_and_cfg_at(irq, node);
+	if (!cfg)
 		return;
-	}
 
-	cfg = desc->chip_data;
 	add_pin_to_irq_node(cfg, node, apic_id, pin);
 
 	if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
@@ -1666,7 +1540,7 @@ void setup_IO_APIC_irq_extra(u32 gsi)
 	}
 	set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
 
-	setup_IO_APIC_irq(apic_id, pin, irq, desc,
+	setup_ioapic_irq(apic_id, pin, irq, cfg,
 			irq_trigger(idx), irq_polarity(idx));
 }
 
@@ -1718,7 +1592,6 @@ __apicdebuginit(void) print_IO_APIC(void
 	union IO_APIC_reg_03 reg_03;
 	unsigned long flags;
 	struct irq_cfg *cfg;
-	struct irq_desc *desc;
 	unsigned int irq;
 
 	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
@@ -1805,10 +1678,10 @@ __apicdebuginit(void) print_IO_APIC(void
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for_each_irq_desc(irq, desc) {
+	for_each_active_irq(irq) {
 		struct irq_pin_list *entry;
 
-		cfg = desc->chip_data;
+		cfg = get_irq_chip_data(irq);
 		if (!cfg)
 			continue;
 		entry = cfg->irq_2_pin;
@@ -2315,29 +2188,26 @@ static int __init timer_irq_works(void)
  * an edge even if it isn't on the 8259A...
  */
 
-static unsigned int startup_ioapic_irq(unsigned int irq)
+static unsigned int startup_ioapic_irq(struct irq_data *data)
 {
-	int was_pending = 0;
+	int was_pending = 0, irq = data->irq;
 	unsigned long flags;
-	struct irq_cfg *cfg;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
 	if (irq < legacy_pic->nr_legacy_irqs) {
-		legacy_pic->chip->mask(irq);
+		legacy_pic->mask(irq);
 		if (legacy_pic->irq_pending(irq))
 			was_pending = 1;
 	}
-	cfg = irq_cfg(irq);
-	__unmask_IO_APIC_irq(cfg);
+	__unmask_ioapic(data->chip_data);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return was_pending;
 }
 
-static int ioapic_retrigger_irq(unsigned int irq)
+static int ioapic_retrigger_irq(struct irq_data *data)
 {
-
-	struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_cfg *cfg = data->chip_data;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
@@ -2388,7 +2258,7 @@ static void __target_IO_APIC_irq(unsigne
 		 * With interrupt-remapping, destination information comes
 		 * from interrupt-remapping table entry.
 		 */
-		if (!irq_remapped(irq))
+		if (!irq_remapped(cfg))
 			io_apic_write(apic, 0x11 + pin*2, dest);
 		reg = io_apic_read(apic, 0x10 + pin*2);
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
@@ -2398,65 +2268,46 @@ static void __target_IO_APIC_irq(unsigne
 }
 
 /*
- * Either sets desc->affinity to a valid value, and returns
+ * Either sets data->affinity to a valid value, and returns
  * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
- * leaves desc->affinity untouched.
+ * leaves data->affinity untouched.
  */
-unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
-		  unsigned int *dest_id)
+int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+			  unsigned int *dest_id)
 {
-	struct irq_cfg *cfg;
-	unsigned int irq;
+	struct irq_cfg *cfg = data->chip_data;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
 		return -1;
 
-	irq = desc->irq;
-	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
+	if (assign_irq_vector(data->irq, data->chip_data, mask))
 		return -1;
 
-	cpumask_copy(desc->affinity, mask);
+	cpumask_copy(data->affinity, mask);
 
-	*dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+	*dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain);
 	return 0;
 }
 
 static int
-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		    bool force)
 {
-	struct irq_cfg *cfg;
+	unsigned int dest, irq = data->irq;
 	unsigned long flags;
-	unsigned int dest;
-	unsigned int irq;
-	int ret = -1;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
+	int ret;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	ret = set_desc_affinity(desc, mask, &dest);
+	ret = __ioapic_set_affinity(data, mask, &dest);
 	if (!ret) {
 		/* Only the high 8 bits are valid. */
 		dest = SET_APIC_LOGICAL_ID(dest);
-		__target_IO_APIC_irq(irq, dest, cfg);
+		__target_IO_APIC_irq(irq, dest, data->chip_data);
 	}
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
 	return ret;
 }
 
-static int
-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	return set_ioapic_affinity_irq_desc(desc, mask);
-}
-
 #ifdef CONFIG_INTR_REMAP
 
 /*
@@ -2471,24 +2322,21 @@ set_ioapic_affinity_irq(unsigned int irq
  * the interrupt-remapping table entry.
  */
 static int
-migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		       bool force)
 {
-	struct irq_cfg *cfg;
+	struct irq_cfg *cfg = data->chip_data;
+	unsigned int dest, irq = data->irq;
 	struct irte irte;
-	unsigned int dest;
-	unsigned int irq;
-	int ret = -1;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
-		return ret;
+		return -EINVAL;
 
-	irq = desc->irq;
 	if (get_irte(irq, &irte))
-		return ret;
+		return -EBUSY;
 
-	cfg = desc->chip_data;
 	if (assign_irq_vector(irq, cfg, mask))
-		return ret;
+		return -EBUSY;
 
 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
 
@@ -2503,29 +2351,14 @@ migrate_ioapic_irq_desc(struct irq_desc 
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
 
-	cpumask_copy(desc->affinity, mask);
-
+	cpumask_copy(data->affinity, mask);
 	return 0;
 }
 
-/*
- * Migrates the IRQ destination in the process context.
- */
-static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
-					    const struct cpumask *mask)
-{
-	return migrate_ioapic_irq_desc(desc, mask);
-}
-static int set_ir_ioapic_affinity_irq(unsigned int irq,
-				       const struct cpumask *mask)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	return set_ir_ioapic_affinity_irq_desc(desc, mask);
-}
 #else
-static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
-						   const struct cpumask *mask)
+static inline int
+ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		       bool force)
 {
 	return 0;
 }
@@ -2587,10 +2420,8 @@ unlock:
 	irq_exit();
 }
 
-static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
+static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
 {
-	struct irq_desc *desc = *descp;
-	struct irq_cfg *cfg = desc->chip_data;
 	unsigned me;
 
 	if (likely(!cfg->move_in_progress))
@@ -2602,31 +2433,28 @@ static void __irq_complete_move(struct i
 		send_cleanup_vector(cfg);
 }
 
-static void irq_complete_move(struct irq_desc **descp)
+static void irq_complete_move(struct irq_cfg *cfg)
 {
-	__irq_complete_move(descp, ~get_irq_regs()->orig_ax);
+	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
 }
 
 void irq_force_complete_move(int irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg = desc->chip_data;
+	struct irq_cfg *cfg = get_irq_chip_data(irq);
 
 	if (!cfg)
 		return;
 
-	__irq_complete_move(&desc, cfg->vector);
+	__irq_complete_move(cfg, cfg->vector);
 }
 #else
-static inline void irq_complete_move(struct irq_desc **descp) {}
+static inline void irq_complete_move(struct irq_cfg *cfg) { }
 #endif
 
-static void ack_apic_edge(unsigned int irq)
+static void ack_apic_edge(struct irq_data *data)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	irq_complete_move(&desc);
-	move_native_irq(irq);
+	irq_complete_move(data->chip_data);
+	move_native_irq(data->irq);
 	ack_APIC_irq();
 }
 
@@ -2648,10 +2476,12 @@ atomic_t irq_mis_count;
  * Otherwise, we simulate the EOI message manually by changing the trigger
  * mode to edge and then back to level, with RTE being masked during this.
 */
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
 {
 	struct irq_pin_list *entry;
+	unsigned long flags;
 
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
 	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		if (mp_ioapics[entry->apic].apicver >= 0x20) {
 			/*
@@ -2660,7 +2490,7 @@ static void __eoi_ioapic_irq(unsigned in
 			 * intr-remapping table entry. Hence for the io-apic
 			 * EOI we use the pin number.
 			 */
-			if (irq_remapped(irq))
+			if (irq_remapped(cfg))
 				io_apic_eoi(entry->apic, entry->pin);
 			else
 				io_apic_eoi(entry->apic, cfg->vector);
@@ -2669,36 +2499,21 @@ static void __eoi_ioapic_irq(unsigned in
 			__unmask_and_level_IO_APIC_irq(entry);
 		}
 	}
-}
-
-static void eoi_ioapic_irq(struct irq_desc *desc)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int irq;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	__eoi_ioapic_irq(irq, cfg);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-static void ack_apic_level(unsigned int irq)
+static void ack_apic_level(struct irq_data *data)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg = data->chip_data;
+	int i, do_unmask_irq = 0, irq = data->irq;
 	unsigned long v;
-	int i;
-	struct irq_cfg *cfg;
-	int do_unmask_irq = 0;
 
-	irq_complete_move(&desc);
+	irq_complete_move(cfg);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	/* If we are moving the irq we need to mask it */
-	if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
+	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
 		do_unmask_irq = 1;
-		mask_IO_APIC_irq_desc(desc);
+		mask_ioapic(cfg);
 	}
 #endif
 
@@ -2734,7 +2549,6 @@ static void ack_apic_level(unsigned int 
 	 * we use the above logic (mask+edge followed by unmask+level) from
 	 * Manfred Spraul to clear the remote IRR.
 	 */
-	cfg = desc->chip_data;
 	i = cfg->vector;
 	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 
@@ -2754,7 +2568,7 @@ static void ack_apic_level(unsigned int 
 	if (!(v & (1 << (i & 0x1f)))) {
 		atomic_inc(&irq_mis_count);
 
-		eoi_ioapic_irq(desc);
+		eoi_ioapic_irq(irq, cfg);
 	}
 
 	/* Now we can move and renable the irq */
@@ -2785,62 +2599,58 @@ static void ack_apic_level(unsigned int 
 		 * accurate and is causing problems then it is a hardware bug
 		 * and you can go talk to the chipset vendor about it.
 		 */
-		cfg = desc->chip_data;
 		if (!io_apic_level_ack_pending(cfg))
 			move_masked_irq(irq);
-		unmask_IO_APIC_irq_desc(desc);
+		unmask_ioapic(cfg);
 	}
 }
 
 #ifdef CONFIG_INTR_REMAP
-static void ir_ack_apic_edge(unsigned int irq)
+static void ir_ack_apic_edge(struct irq_data *data)
 {
 	ack_APIC_irq();
 }
 
-static void ir_ack_apic_level(unsigned int irq)
+static void ir_ack_apic_level(struct irq_data *data)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-
 	ack_APIC_irq();
-	eoi_ioapic_irq(desc);
+	eoi_ioapic_irq(data->irq, data->chip_data);
 }
 #endif /* CONFIG_INTR_REMAP */
 
 static struct irq_chip ioapic_chip __read_mostly = {
-	.name		= "IO-APIC",
-	.startup	= startup_ioapic_irq,
-	.mask		= mask_IO_APIC_irq,
-	.unmask		= unmask_IO_APIC_irq,
-	.ack		= ack_apic_edge,
-	.eoi		= ack_apic_level,
+	.name			= "IO-APIC",
+	.irq_startup		= startup_ioapic_irq,
+	.irq_mask		= mask_ioapic_irq,
+	.irq_unmask		= unmask_ioapic_irq,
+	.irq_ack		= ack_apic_edge,
+	.irq_eoi		= ack_apic_level,
 #ifdef CONFIG_SMP
-	.set_affinity	= set_ioapic_affinity_irq,
+	.irq_set_affinity	= ioapic_set_affinity,
 #endif
-	.retrigger	= ioapic_retrigger_irq,
+	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
 static struct irq_chip ir_ioapic_chip __read_mostly = {
-	.name		= "IR-IO-APIC",
-	.startup	= startup_ioapic_irq,
-	.mask		= mask_IO_APIC_irq,
-	.unmask		= unmask_IO_APIC_irq,
+	.name			= "IR-IO-APIC",
+	.irq_startup		= startup_ioapic_irq,
+	.irq_mask		= mask_ioapic_irq,
+	.irq_unmask		= unmask_ioapic_irq,
 #ifdef CONFIG_INTR_REMAP
-	.ack		= ir_ack_apic_edge,
-	.eoi		= ir_ack_apic_level,
+	.irq_ack		= ir_ack_apic_edge,
+	.irq_eoi		= ir_ack_apic_level,
 #ifdef CONFIG_SMP
-	.set_affinity	= set_ir_ioapic_affinity_irq,
+	.irq_set_affinity	= ir_ioapic_set_affinity,
 #endif
 #endif
-	.retrigger	= ioapic_retrigger_irq,
+	.irq_retrigger		= ioapic_retrigger_irq,
 };
 #endif /* !CONFIG_XEN */
 
 static inline void init_IO_APIC_traps(void)
 {
-	int irq;
-	struct irq_desc *desc;
 	struct irq_cfg *cfg;
+	unsigned int irq;
 
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -2853,12 +2663,12 @@ static inline void init_IO_APIC_traps(vo
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for_each_irq_desc(irq, desc) {
+	for_each_active_irq(irq) {
 #ifdef CONFIG_XEN
 		if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
 			continue;
 #endif
-		cfg = desc->chip_data;
+		cfg = get_irq_chip_data(irq);
 		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
@@ -2869,7 +2679,7 @@ static inline void init_IO_APIC_traps(vo
 				legacy_pic->make_irq(irq);
 			else
 				/* Strange. Oh, well.. */
-				desc->chip = &no_irq_chip;
+				set_irq_chip(irq, &no_irq_chip);
 		}
 	}
 }
@@ -2879,7 +2689,7 @@ static inline void init_IO_APIC_traps(vo
  * The local APIC irq-chip implementation:
  */
 
-static void mask_lapic_irq(unsigned int irq)
+static void mask_lapic_irq(struct irq_data *data)
 {
 	unsigned long v;
 
@@ -2887,7 +2697,7 @@ static void mask_lapic_irq(unsigned int 
 	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
 }
 
-static void unmask_lapic_irq(unsigned int irq)
+static void unmask_lapic_irq(struct irq_data *data)
 {
 	unsigned long v;
 
@@ -2895,21 +2705,21 @@ static void unmask_lapic_irq(unsigned in
 	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
-static void ack_lapic_irq(unsigned int irq)
+static void ack_lapic_irq(struct irq_data *data)
 {
 	ack_APIC_irq();
 }
 
 static struct irq_chip lapic_chip __read_mostly = {
 	.name		= "local-APIC",
-	.mask		= mask_lapic_irq,
-	.unmask		= unmask_lapic_irq,
-	.ack		= ack_lapic_irq,
+	.irq_mask	= mask_lapic_irq,
+	.irq_unmask	= unmask_lapic_irq,
+	.irq_ack	= ack_lapic_irq,
 };
 
-static void lapic_register_intr(int irq, struct irq_desc *desc)
+static void lapic_register_intr(int irq)
 {
-	desc->status &= ~IRQ_LEVEL;
+	irq_clear_status_flags(irq, IRQ_LEVEL);
 	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
 				      "edge");
 }
@@ -3012,9 +2822,8 @@ int timer_through_8259 __initdata;
  */
 static inline void __init check_timer(void)
 {
-	struct irq_desc *desc = irq_to_desc(0);
-	struct irq_cfg *cfg = desc->chip_data;
-	int node = cpu_to_node(boot_cpu_id);
+	struct irq_cfg *cfg = get_irq_chip_data(0);
+	int node = cpu_to_node(0);
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
 	int no_pin1 = 0;
@@ -3024,7 +2833,7 @@ static inline void __init check_timer(vo
 	/*
 	 * get/set the timer IRQ vector:
 	 */
-	legacy_pic->chip->mask(0);
+	legacy_pic->mask(0);
 	assign_irq_vector(0, cfg, apic->target_cpus());
 
 	/*
@@ -3083,7 +2892,7 @@ static inline void __init check_timer(vo
 			add_pin_to_irq_node(cfg, node, apic1, pin1);
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		} else {
-			/* for edge trigger, setup_IO_APIC_irq already
+			/* for edge trigger, setup_ioapic_irq already
 			 * leave it unmasked.
 			 * so only need to unmask if it is level-trigger
 			 * do we really have level trigger timer?
@@ -3091,12 +2900,12 @@ static inline void __init check_timer(vo
 			int idx;
 			idx = find_irq_entry(apic1, pin1, mp_INT);
 			if (idx != -1 && irq_trigger(idx))
-				unmask_IO_APIC_irq_desc(desc);
+				unmask_ioapic(cfg);
 		}
 		if (timer_irq_works()) {
 			if (nmi_watchdog == NMI_IO_APIC) {
 				setup_nmi();
-				legacy_pic->chip->unmask(0);
+				legacy_pic->unmask(0);
 			}
 			if (disable_timer_pin_1 > 0)
 				clear_IO_APIC_pin(0, pin1);
@@ -3119,14 +2928,14 @@ static inline void __init check_timer(vo
 		 */
 		replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
 		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
-		legacy_pic->chip->unmask(0);
+		legacy_pic->unmask(0);
 		if (timer_irq_works()) {
 			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
 			timer_through_8259 = 1;
 			if (nmi_watchdog == NMI_IO_APIC) {
-				legacy_pic->chip->mask(0);
+				legacy_pic->mask(0);
 				setup_nmi();
-				legacy_pic->chip->unmask(0);
+				legacy_pic->unmask(0);
 			}
 			goto out;
 		}
@@ -3134,7 +2943,7 @@ static inline void __init check_timer(vo
 		 * Cleanup, just in case ...
 		 */
 		local_irq_disable();
-		legacy_pic->chip->mask(0);
+		legacy_pic->mask(0);
 		clear_IO_APIC_pin(apic2, pin2);
 		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
 	}
@@ -3151,16 +2960,16 @@ static inline void __init check_timer(vo
 	apic_printk(APIC_QUIET, KERN_INFO
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
 
-	lapic_register_intr(0, desc);
+	lapic_register_intr(0);
 	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
-	legacy_pic->chip->unmask(0);
+	legacy_pic->unmask(0);
 
 	if (timer_irq_works()) {
 		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
 		goto out;
 	}
 	local_irq_disable();
-	legacy_pic->chip->mask(0);
+	legacy_pic->mask(0);
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
 	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
 
@@ -3340,49 +3149,42 @@ device_initcall(ioapic_init_sysfs);
 /*
  * Dynamic irq allocate and deallocation
  */
-unsigned int create_irq_nr(unsigned int irq_want, int node)
+unsigned int create_irq_nr(unsigned int from, int node)
 {
-	/* Allocate an unused irq */
-	unsigned int irq;
-	unsigned int new;
+	struct irq_cfg *cfg;
 	unsigned long flags;
-	struct irq_cfg *cfg_new = NULL;
-	struct irq_desc *desc_new = NULL;
-
-	irq = 0;
-	if (irq_want < nr_irqs_gsi)
-		irq_want = nr_irqs_gsi;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	for (new = irq_want; new < nr_irqs; new++) {
-		desc_new = irq_to_desc_alloc_node(new, node);
-		if (!desc_new) {
-			printk(KERN_INFO "can not get irq_desc for %d\n", new);
-			continue;
-		}
-		cfg_new = desc_new->chip_data;
-
-		if (cfg_new->vector != 0)
-			continue;
+	unsigned int ret = 0;
+	int irq;
 
-		desc_new = move_irq_desc(desc_new, node);
-		cfg_new = desc_new->chip_data;
+	if (from < nr_irqs_gsi)
+		from = nr_irqs_gsi;
 
-		if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
-			irq = new;
-		break;
+	irq = alloc_irq_from(from, node);
+	if (irq < 0)
+		return 0;
+	cfg = alloc_irq_cfg(irq, node);
+	if (!cfg) {
+		free_irq_at(irq, NULL);
+		return 0;
 	}
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
 
-	if (irq > 0)
-		dynamic_irq_init_keep_chip_data(irq);
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
+		ret = irq;
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
 
-	return irq;
+	if (ret) {
+		set_irq_chip_data(irq, cfg);
+		irq_clear_status_flags(irq, IRQ_NOREQUEST);
+	} else {
+		free_irq_at(irq, cfg);
+	}
+	return ret;
 }
 
 int create_irq(void)
 {
-	int node = cpu_to_node(boot_cpu_id);
+	int node = cpu_to_node(0);
 	unsigned int irq_want;
 	int irq;
 
@@ -3397,14 +3199,17 @@ int create_irq(void)
 
 void destroy_irq(unsigned int irq)
 {
+	struct irq_cfg *cfg = get_irq_chip_data(irq);
 	unsigned long flags;
 
-	dynamic_irq_cleanup_keep_chip_data(irq);
+	irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
 
-	free_irte(irq);
+	if (irq_remapped(cfg))
+		free_irte(irq);
 	raw_spin_lock_irqsave(&vector_lock, flags);
-	__clear_irq_vector(irq, get_irq_chip_data(irq));
+	__clear_irq_vector(irq, cfg);
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	free_irq_at(irq, cfg);
 }
 #endif /* !CONFIG_XEN */
 
@@ -3429,7 +3234,7 @@ static int msi_compose_msg(struct pci_de
 
 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
 
-	if (irq_remapped(irq)) {
+	if (irq_remapped(get_irq_chip_data(irq))) {
 		struct irte irte;
 		int ir_index;
 		u16 sub_handle;
@@ -3437,14 +3242,7 @@ static int msi_compose_msg(struct pci_de
 		ir_index = map_irq_to_irte_handle(irq, &sub_handle);
 		BUG_ON(ir_index == -1);
 
-		memset (&irte, 0, sizeof(irte));
-
-		irte.present = 1;
-		irte.dst_mode = apic->irq_dest_mode;
-		irte.trigger_mode = 0; /* edge */
-		irte.dlvry_mode = apic->irq_delivery_mode;
-		irte.vector = cfg->vector;
-		irte.dest_id = IRTE_DEST(dest);
+		prepare_irte(&irte, cfg->vector, dest);
 
 		/* Set source-id of interrupt request */
 		if (pdev)
@@ -3489,26 +3287,24 @@ static int msi_compose_msg(struct pci_de
 }
 
 #ifdef CONFIG_SMP
-static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int
+msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg;
+	struct irq_cfg *cfg = data->chip_data;
 	struct msi_msg msg;
 	unsigned int dest;
 
-	if (set_desc_affinity(desc, mask, &dest))
+	if (__ioapic_set_affinity(data, mask, &dest))
 		return -1;
 
-	cfg = desc->chip_data;
-
-	get_cached_msi_msg_desc(desc, &msg);
+	__get_cached_msi_msg(data->msi_desc, &msg);
 
 	msg.data &= ~MSI_DATA_VECTOR_MASK;
 	msg.data |= MSI_DATA_VECTOR(cfg->vector);
 	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
-	write_msi_msg_desc(desc, &msg);
+	__write_msi_msg(data->msi_desc, &msg);
 
 	return 0;
 }
@@ -3518,17 +3314,17 @@ static int set_msi_irq_affinity(unsigned
  * done in the process context using interrupt-remapping hardware.
  */
 static int
-ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		    bool force)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg = desc->chip_data;
-	unsigned int dest;
+	struct irq_cfg *cfg = data->chip_data;
+	unsigned int dest, irq = data->irq;
 	struct irte irte;
 
 	if (get_irte(irq, &irte))
 		return -1;
 
-	if (set_desc_affinity(desc, mask, &dest))
+	if (__ioapic_set_affinity(data, mask, &dest))
 		return -1;
 
 	irte.vector = cfg->vector;
@@ -3558,27 +3354,27 @@ ir_set_msi_irq_affinity(unsigned int irq
  * which implement the MSI or MSI-X Capability Structure.
  */
 static struct irq_chip msi_chip = {
-	.name		= "PCI-MSI",
-	.unmask		= unmask_msi_irq,
-	.mask		= mask_msi_irq,
-	.ack		= ack_apic_edge,
+	.name			= "PCI-MSI",
+	.irq_unmask		= unmask_msi_irq,
+	.irq_mask		= mask_msi_irq,
+	.irq_ack		= ack_apic_edge,
 #ifdef CONFIG_SMP
-	.set_affinity	= set_msi_irq_affinity,
+	.irq_set_affinity	= msi_set_affinity,
 #endif
-	.retrigger	= ioapic_retrigger_irq,
+	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
 static struct irq_chip msi_ir_chip = {
-	.name		= "IR-PCI-MSI",
-	.unmask		= unmask_msi_irq,
-	.mask		= mask_msi_irq,
+	.name			= "IR-PCI-MSI",
+	.irq_unmask		= unmask_msi_irq,
+	.irq_mask		= mask_msi_irq,
 #ifdef CONFIG_INTR_REMAP
-	.ack		= ir_ack_apic_edge,
+	.irq_ack		= ir_ack_apic_edge,
 #ifdef CONFIG_SMP
-	.set_affinity	= ir_set_msi_irq_affinity,
+	.irq_set_affinity	= ir_msi_set_affinity,
 #endif
 #endif
-	.retrigger	= ioapic_retrigger_irq,
+	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
 /*
@@ -3610,8 +3406,8 @@ static int msi_alloc_irte(struct pci_dev
 
 static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
 {
-	int ret;
 	struct msi_msg msg;
+	int ret;
 
 	ret = msi_compose_msg(dev, irq, &msg, -1);
 	if (ret < 0)
@@ -3620,12 +3416,8 @@ static int setup_msi_irq(struct pci_dev 
 	set_irq_msi(irq, msidesc);
 	write_msi_msg(irq, &msg);
 
-	if (irq_remapped(irq)) {
-		struct irq_desc *desc = irq_to_desc(irq);
-		/*
-		 * irq migration in process context
-		 */
-		desc->status |= IRQ_MOVE_PCNTXT;
+	if (irq_remapped(get_irq_chip_data(irq))) {
+		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
 		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
 	} else
 		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
@@ -3635,15 +3427,12 @@ static int setup_msi_irq(struct pci_dev 
 	return 0;
 }
 
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
-	unsigned int irq;
-	int ret, sub_handle;
+	int node, ret, sub_handle, index = 0;
+	unsigned int irq, irq_want;
 	struct msi_desc *msidesc;
-	unsigned int irq_want;
 	struct intel_iommu *iommu = NULL;
-	int index = 0;
-	int node;
 
 	/* x86 doesn't support multiple MSI yet */
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
@@ -3696,31 +3485,31 @@ error:
 	return ret;
 }
 
-void arch_teardown_msi_irq(unsigned int irq)
+void native_teardown_msi_irq(unsigned int irq)
 {
 	destroy_irq(irq);
 }
 
 #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
 #ifdef CONFIG_SMP
-static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int
+dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		      bool force)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg;
+	struct irq_cfg *cfg = data->chip_data;
+	unsigned int dest, irq = data->irq;
 	struct msi_msg msg;
-	unsigned int dest;
 
-	if (set_desc_affinity(desc, mask, &dest))
+	if (__ioapic_set_affinity(data, mask, &dest))
 		return -1;
 
-	cfg = desc->chip_data;
-
 	dmar_msi_read(irq, &msg);
 
 	msg.data &= ~MSI_DATA_VECTOR_MASK;
 	msg.data |= MSI_DATA_VECTOR(cfg->vector);
 	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+	msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
 
@@ -3730,14 +3519,14 @@ static int dmar_msi_set_affinity(unsigne
 #endif /* CONFIG_SMP */
 
 static struct irq_chip dmar_msi_type = {
-	.name = "DMAR_MSI",
-	.unmask = dmar_msi_unmask,
-	.mask = dmar_msi_mask,
-	.ack = ack_apic_edge,
+	.name			= "DMAR_MSI",
+	.irq_unmask		= dmar_msi_unmask,
+	.irq_mask		= dmar_msi_mask,
+	.irq_ack		= ack_apic_edge,
 #ifdef CONFIG_SMP
-	.set_affinity = dmar_msi_set_affinity,
+	.irq_set_affinity	= dmar_msi_set_affinity,
 #endif
-	.retrigger = ioapic_retrigger_irq,
+	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
 int arch_setup_dmar_msi(unsigned int irq)
@@ -3758,26 +3547,24 @@ int arch_setup_dmar_msi(unsigned int irq
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int hpet_msi_set_affinity(struct irq_data *data,
+				 const struct cpumask *mask, bool force)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg;
+	struct irq_cfg *cfg = data->chip_data;
 	struct msi_msg msg;
 	unsigned int dest;
 
-	if (set_desc_affinity(desc, mask, &dest))
+	if (__ioapic_set_affinity(data, mask, &dest))
 		return -1;
 
-	cfg = desc->chip_data;
-
-	hpet_msi_read(irq, &msg);
+	hpet_msi_read(data->handler_data, &msg);
 
 	msg.data &= ~MSI_DATA_VECTOR_MASK;
 	msg.data |= MSI_DATA_VECTOR(cfg->vector);
 	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
-	hpet_msi_write(irq, &msg);
+	hpet_msi_write(data->handler_data, &msg);
 
 	return 0;
 }
@@ -3785,34 +3572,33 @@ static int hpet_msi_set_affinity(unsigne
 #endif /* CONFIG_SMP */
 
 static struct irq_chip ir_hpet_msi_type = {
-	.name = "IR-HPET_MSI",
-	.unmask = hpet_msi_unmask,
-	.mask = hpet_msi_mask,
+	.name			= "IR-HPET_MSI",
+	.irq_unmask		= hpet_msi_unmask,
+	.irq_mask		= hpet_msi_mask,
 #ifdef CONFIG_INTR_REMAP
-	.ack = ir_ack_apic_edge,
+	.irq_ack		= ir_ack_apic_edge,
 #ifdef CONFIG_SMP
-	.set_affinity = ir_set_msi_irq_affinity,
+	.irq_set_affinity	= ir_msi_set_affinity,
 #endif
 #endif
-	.retrigger = ioapic_retrigger_irq,
+	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
 static struct irq_chip hpet_msi_type = {
 	.name = "HPET_MSI",
-	.unmask = hpet_msi_unmask,
-	.mask = hpet_msi_mask,
-	.ack = ack_apic_edge,
+	.irq_unmask = hpet_msi_unmask,
+	.irq_mask = hpet_msi_mask,
+	.irq_ack = ack_apic_edge,
 #ifdef CONFIG_SMP
-	.set_affinity = hpet_msi_set_affinity,
+	.irq_set_affinity = hpet_msi_set_affinity,
 #endif
-	.retrigger = ioapic_retrigger_irq,
+	.irq_retrigger = ioapic_retrigger_irq,
 };
 
 int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
 {
-	int ret;
 	struct msi_msg msg;
-	struct irq_desc *desc = irq_to_desc(irq);
+	int ret;
 
 	if (intr_remapping_enabled) {
 		struct intel_iommu *iommu = map_hpet_to_ir(id);
@@ -3830,9 +3616,9 @@ int arch_setup_hpet_msi(unsigned int irq
 	if (ret < 0)
 		return ret;
 
-	hpet_msi_write(irq, &msg);
-	desc->status |= IRQ_MOVE_PCNTXT;
-	if (irq_remapped(irq))
+	hpet_msi_write(get_irq_data(irq), &msg);
+	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+	if (irq_remapped(get_irq_chip_data(irq)))
 		set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
 					      handle_edge_irq, "edge");
 	else
@@ -3865,33 +3651,30 @@ static void target_ht_irq(unsigned int i
 	write_ht_irq_msg(irq, &msg);
 }
 
-static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int
+ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg;
+	struct irq_cfg *cfg = data->chip_data;
 	unsigned int dest;
 
-	if (set_desc_affinity(desc, mask, &dest))
+	if (__ioapic_set_affinity(data, mask, &dest))
 		return -1;
 
-	cfg = desc->chip_data;
-
-	target_ht_irq(irq, dest, cfg->vector);
-
+	target_ht_irq(data->irq, dest, cfg->vector);
 	return 0;
 }
 
 #endif
 
 static struct irq_chip ht_irq_chip = {
-	.name		= "PCI-HT",
-	.mask		= mask_ht_irq,
-	.unmask		= unmask_ht_irq,
-	.ack		= ack_apic_edge,
+	.name			= "PCI-HT",
+	.irq_mask		= mask_ht_irq,
+	.irq_unmask		= unmask_ht_irq,
+	.irq_ack		= ack_apic_edge,
 #ifdef CONFIG_SMP
-	.set_affinity	= set_ht_irq_affinity,
+	.irq_set_affinity	= ht_set_affinity,
 #endif
-	.retrigger	= ioapic_retrigger_irq,
+	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
@@ -3965,6 +3748,11 @@ void __init probe_nr_irqs_gsi(void)
 	printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
 }
 
+int get_nr_irqs_gsi(void)
+{
+	return nr_irqs_gsi;
+}
+
 #ifdef CONFIG_SPARSE_IRQ
 int __init arch_probe_nr_irqs(void)
 {
@@ -3983,7 +3771,7 @@ int __init arch_probe_nr_irqs(void)
 	if (nr < nr_irqs)
 		nr_irqs = nr;
 
-	return 0;
+	return NR_IRQS_LEGACY;
 }
 #endif
 #endif /* CONFIG_XEN */
@@ -3991,7 +3779,6 @@ int __init arch_probe_nr_irqs(void)
 static int __io_apic_set_pci_routing(struct device *dev, int irq,
 				struct io_apic_irq_attr *irq_attr)
 {
-	struct irq_desc *desc;
 	struct irq_cfg *cfg;
 	int node;
 	int ioapic, pin;
@@ -4014,13 +3801,11 @@ static int __io_apic_set_pci_routing(str
 	if (dev)
 		node = dev_to_node(dev);
 	else
-		node = cpu_to_node(boot_cpu_id);
+		node = cpu_to_node(0);
 
-	desc = irq_to_desc_alloc_node(irq, node);
-	if (!desc) {
-		printk(KERN_INFO "can not get irq_desc %d\n", irq);
+	cfg = alloc_irq_and_cfg_at(irq, node);
+	if (!cfg)
 		return 0;
-	}
 
 	pin = irq_attr->ioapic_pin;
 	trigger = irq_attr->trigger;
@@ -4030,15 +3815,14 @@ static int __io_apic_set_pci_routing(str
 	 * IRQs < 16 are already in the irq_2_pin[] map
 	 */
 	if (irq >= legacy_pic->nr_legacy_irqs) {
-		cfg = desc->chip_data;
-		if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
+		if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) {
 			printk(KERN_INFO "can not add pin %d for irq %d\n",
 				pin, irq);
 			return 0;
 		}
 	}
 
-	setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
+	setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity);
 
 	return 0;
 }
@@ -4234,14 +4018,14 @@ void __init setup_ioapic_dest(void)
 		 */
 		if (desc->status &
 		    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-			mask = desc->affinity;
+			mask = desc->irq_data.affinity;
 		else
 			mask = apic->target_cpus();
 
 		if (intr_remapping_enabled)
-			set_ir_ioapic_affinity_irq_desc(desc, mask);
+			ir_ioapic_set_affinity(&desc->irq_data, mask, false);
 		else
-			set_ioapic_affinity_irq_desc(desc, mask);
+			ioapic_set_affinity(&desc->irq_data, mask, false);
 	}
 
 }
@@ -4429,20 +4213,19 @@ void __init mp_register_ioapic(int id, u
 void __init pre_init_apic_IRQ0(void)
 {
 	struct irq_cfg *cfg;
-	struct irq_desc *desc;
 
 	printk(KERN_INFO "Early APIC setup for system timer0\n");
 #ifndef CONFIG_SMP
 	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
 #endif
-	desc = irq_to_desc_alloc_node(0, 0);
+	/* Make sure the irq descriptor is set up */
+	cfg = alloc_irq_and_cfg_at(0, 0);
 
 	setup_local_APIC();
 
-	cfg = irq_cfg(0);
 	add_pin_to_irq_node(cfg, 0, 0, 0);
 	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
 
-	setup_IO_APIC_irq(0, 0, 0, desc, 0, 0);
+	setup_ioapic_irq(0, 0, 0, cfg, 0, 0);
 }
 #endif
--- head.orig/arch/x86/kernel/cpu/common-xen.c	2012-08-01 12:03:22.000000000 +0200
+++ head/arch/x86/kernel/cpu/common-xen.c	2012-08-01 12:03:41.000000000 +0200
@@ -714,7 +714,7 @@ static void __init early_identify_cpu(st
 		this_cpu->c_early_init(c);
 
 #ifdef CONFIG_SMP
-	c->cpu_index = boot_cpu_id;
+	c->cpu_index = 0;
 #endif
 	filter_cpuid_features(c, false);
 }
@@ -753,16 +753,21 @@ void __init early_cpu_init(void)
 }
 
 /*
- * The NOPL instruction is supposed to exist on all CPUs with
- * family >= 6; unfortunately, that's not true in practice because
- * of early VIA chips and (more importantly) broken virtualizers that
- * are not easy to detect.  In the latter case it doesn't even *fail*
- * reliably, so probing for it doesn't even work.  Disable it completely
+ * The NOPL instruction is supposed to exist on all CPUs of family >= 6;
+ * unfortunately, that's not true in practice because of early VIA
+ * chips and (more importantly) broken virtualizers that are not easy
+ * to detect. In the latter case it doesn't even *fail* reliably, so
+ * probing for it doesn't even work. Disable it completely on 32-bit
  * unless we can find a reliable way to detect all the broken cases.
+ * Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
  */
 static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
 {
+#ifdef CONFIG_X86_32
 	clear_cpu_cap(c, X86_FEATURE_NOPL);
+#else
+	set_cpu_cap(c, X86_FEATURE_NOPL);
+#endif
 }
 
 static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
@@ -1376,13 +1381,6 @@ void __cpuinit cpu_init(void)
 	clear_all_debug_regs();
 	dbg_restore_debug_regs();
 
-	/*
-	 * Force FPU initialization:
-	 */
-	current_thread_info()->status = 0;
-	clear_used_math();
-	mxcsr_feature_mask_init();
-
 	fpu_init();
 	xsave_init();
 }
--- head.orig/arch/x86/kernel/e820-xen.c	2011-09-23 15:56:09.000000000 +0200
+++ head/arch/x86/kernel/e820-xen.c	2011-09-23 15:58:51.000000000 +0200
@@ -15,6 +15,7 @@
 #include <linux/pfn.h>
 #include <linux/suspend.h>
 #include <linux/firmware-map.h>
+#include <linux/memblock.h>
 
 #include <asm/e820.h>
 #include <asm/proto.h>
@@ -39,7 +40,7 @@ struct e820map e820;
 #if !defined(CONFIG_XEN)
 struct e820map e820_saved;
 #elif defined(CONFIG_XEN_PRIVILEGED_GUEST)
-static struct e820map machine_e820;
+struct e820map machine_e820;
 # define e820_saved machine_e820
 #else
 # define machine_e820 e820
@@ -793,75 +794,9 @@ core_initcall(e820_mark_nvs_memory);
 #endif
 #endif
 
-/*
- * Find a free area with specified alignment in a specific range.
- */
-u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
-{
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-		u64 addr;
-		u64 ei_start, ei_last;
-
-		if (ei->type != E820_RAM)
-			continue;
-
-		ei_last = ei->addr + ei->size;
-		ei_start = ei->addr;
-		addr = find_early_area(ei_start, ei_last, start, end,
-					 size, align);
-
-		if (addr != -1ULL)
-			return addr;
-	}
-	return -1ULL;
-}
-
-u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
-{
-	return find_e820_area(start, end, size, align);
-}
-
-u64 __init get_max_mapped(void)
-{
-	u64 end = max_pfn_mapped;
-
-	end <<= PAGE_SHIFT;
-
-	return end;
-}
-/*
- * Find next free range after *start
- */
-u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
-{
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-		u64 addr;
-		u64 ei_start, ei_last;
-
-		if (ei->type != E820_RAM)
-			continue;
-
-		ei_last = ei->addr + ei->size;
-		ei_start = ei->addr;
-		addr = find_early_area_size(ei_start, ei_last, start,
-					 sizep, align);
-
-		if (addr != -1ULL)
-			return addr;
-	}
-
-	return -1ULL;
-}
-
 #ifndef CONFIG_XEN_UNPRIVILEGED_GUEST
 /*
- * pre allocated 4k and reserved it in e820
+ * pre allocated 4k and reserved it in memblock and e820_saved
  */
 u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
 {
@@ -880,8 +815,8 @@ u64 __init early_reserve_e820(u64 startt
 		align = PAGE_SIZE;
 #endif
 	for (start = startt; ; start += size) {
-		start = find_e820_area_size(start, &size, align);
-		if (!(start + 1))
+		start = memblock_x86_find_in_range_size(start, &size, align);
+		if (start == MEMBLOCK_ERROR)
 			return 0;
 		if (size >= sizet)
 			break;
@@ -927,10 +862,9 @@ u64 __init early_reserve_e820(u64 startt
 	if (rc)
 		return 0;
 #endif
-	e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
+	memblock_x86_reserve_range(addr, addr + sizet, "new next");
 	e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
-	printk(KERN_INFO "update e820 for early_reserve_e820\n");
-	update_e820();
+	printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
 	update_e820_saved();
 
 	return addr;
@@ -993,83 +927,6 @@ unsigned long __init e820_end_of_low_ram
 {
 	return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
 }
-/*
- * Finds an active region in the address range from start_pfn to last_pfn and
- * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
- */
-int __init e820_find_active_region(const struct e820entry *ei,
-				  unsigned long start_pfn,
-				  unsigned long last_pfn,
-				  unsigned long *ei_startpfn,
-				  unsigned long *ei_endpfn)
-{
-	u64 align = PAGE_SIZE;
-
-#ifdef CONFIG_XEN
-	if (last_pfn > xen_start_info->nr_pages)
-		last_pfn = xen_start_info->nr_pages;
-#endif
-
-	*ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
-	*ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
-
-	/* Skip map entries smaller than a page */
-	if (*ei_startpfn >= *ei_endpfn)
-		return 0;
-
-	/* Skip if map is outside the node */
-	if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
-				    *ei_startpfn >= last_pfn)
-		return 0;
-
-	/* Check for overlaps */
-	if (*ei_startpfn < start_pfn)
-		*ei_startpfn = start_pfn;
-	if (*ei_endpfn > last_pfn)
-		*ei_endpfn = last_pfn;
-
-	return 1;
-}
-
-/* Walk the e820 map and register active regions within a node */
-void __init e820_register_active_regions(int nid, unsigned long start_pfn,
-					 unsigned long last_pfn)
-{
-	unsigned long ei_startpfn;
-	unsigned long ei_endpfn;
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++)
-		if (e820_find_active_region(&e820.map[i],
-					    start_pfn, last_pfn,
-					    &ei_startpfn, &ei_endpfn))
-			add_active_range(nid, ei_startpfn, ei_endpfn);
-#ifdef CONFIG_XEN
-	BUG_ON(nid);
-	add_active_range(nid, last_pfn, last_pfn);
-#endif
-}
-
-/*
- * Find the hole size (in bytes) in the memory range.
- * @start: starting address of the memory range to scan
- * @end: ending address of the memory range to scan
- */
-u64 __init e820_hole_size(u64 start, u64 end)
-{
-	unsigned long start_pfn = start >> PAGE_SHIFT;
-	unsigned long last_pfn = end >> PAGE_SHIFT;
-	unsigned long ei_startpfn, ei_endpfn, ram = 0;
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		if (e820_find_active_region(&e820.map[i],
-					    start_pfn, last_pfn,
-					    &ei_startpfn, &ei_endpfn))
-			ram += ei_endpfn - ei_startpfn;
-	}
-	return end - start - ((u64)ram << PAGE_SHIFT);
-}
 
 static void early_panic(char *msg)
 {
@@ -1350,3 +1207,48 @@ void __init setup_memory_map(void)
 	printk(KERN_INFO "Xen-provided physical RAM map:\n");
 	_e820_print_map(&e820, who);
 }
+
+void __init memblock_x86_fill(void)
+{
+	int i;
+	u64 end;
+
+	/*
+	 * EFI may have more than 128 entries
+	 * We are safe to enable resizing, beause memblock_x86_fill()
+	 * is rather later for x86
+	 */
+	memblock_can_resize = 1;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+
+		end = ei->addr + ei->size;
+		if (end != (resource_size_t)end)
+			continue;
+
+		if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
+			continue;
+
+		memblock_add(ei->addr, ei->size);
+	}
+
+	memblock_analyze();
+	memblock_dump_all();
+}
+
+void __init memblock_find_dma_reserve(void)
+{
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
+	u64 free_size_pfn;
+	u64 mem_size_pfn;
+	/*
+	 * need to find out used area below MAX_DMA_PFN
+	 * need to use memblock to get free size in [0, MAX_DMA_PFN]
+	 * at first, and assume boot_mem will not take below MAX_DMA_PFN
+	 */
+	mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
+	free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
+	set_dma_reserve(mem_size_pfn - free_size_pfn);
+#endif
+}
--- head.orig/arch/x86/kernel/early_printk-xen.c	2011-02-01 15:03:10.000000000 +0100
+++ head/arch/x86/kernel/early_printk-xen.c	2011-02-01 15:09:47.000000000 +0100
@@ -13,6 +13,7 @@
 #include <asm/setup.h>
 #include <asm/pci-direct.h>
 #include <asm/fixmap.h>
+#include <asm/mrst.h>
 #include <asm/pgtable.h>
 #include <linux/usb/ehci_def.h>
 
@@ -271,6 +272,18 @@ static int __init setup_early_printk(cha
 		if (!strncmp(buf, "xen", 3))
 			early_console_register(&xenboot_console, keep);
 #endif
+#ifdef CONFIG_X86_MRST_EARLY_PRINTK
+		if (!strncmp(buf, "mrst", 4)) {
+			mrst_early_console_init();
+			early_console_register(&early_mrst_console, keep);
+		}
+
+		if (!strncmp(buf, "hsu", 3)) {
+			hsu_early_console_init();
+			early_console_register(&early_hsu_console, keep);
+		}
+
+#endif
 		buf++;
 	}
 	return 0;
--- head.orig/arch/x86/kernel/entry_32-xen.S	2013-01-30 11:51:38.000000000 +0100
+++ head/arch/x86/kernel/entry_32-xen.S	2013-01-30 11:53:28.000000000 +0100
@@ -119,8 +119,7 @@ NMI_MASK	= 0x80000000
 
  /* unfortunately push/pop can't be no-op */
 .macro PUSH_GS
-	pushl $0
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $0
 .endm
 .macro POP_GS pop=0
 	addl $(4 + \pop), %esp
@@ -144,14 +143,12 @@ NMI_MASK	= 0x80000000
 #else	/* CONFIG_X86_32_LAZY_GS */
 
 .macro PUSH_GS
-	pushl %gs
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %gs
 	/*CFI_REL_OFFSET gs, 0*/
 .endm
 
 .macro POP_GS pop=0
-98:	popl %gs
-	CFI_ADJUST_CFA_OFFSET -4
+98:	popl_cfi %gs
 	/*CFI_RESTORE gs*/
   .if \pop <> 0
 	add $\pop, %esp
@@ -199,35 +196,25 @@ NMI_MASK	= 0x80000000
 .macro SAVE_ALL
 	cld
 	PUSH_GS
-	pushl %fs
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %fs
 	/*CFI_REL_OFFSET fs, 0;*/
-	pushl %es
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %es
 	/*CFI_REL_OFFSET es, 0;*/
-	pushl %ds
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %ds
 	/*CFI_REL_OFFSET ds, 0;*/
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %eax
 	CFI_REL_OFFSET eax, 0
-	pushl %ebp
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %ebp
 	CFI_REL_OFFSET ebp, 0
-	pushl %edi
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %edi
 	CFI_REL_OFFSET edi, 0
-	pushl %esi
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %esi
 	CFI_REL_OFFSET esi, 0
-	pushl %edx
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %edx
 	CFI_REL_OFFSET edx, 0
-	pushl %ecx
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %ecx
 	CFI_REL_OFFSET ecx, 0
-	pushl %ebx
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %ebx
 	CFI_REL_OFFSET ebx, 0
 	movl $(__USER_DS), %edx
 	movl %edx, %ds
@@ -238,39 +225,29 @@ NMI_MASK	= 0x80000000
 .endm
 
 .macro RESTORE_INT_REGS
-	popl %ebx
-	CFI_ADJUST_CFA_OFFSET -4
+	popl_cfi %ebx
 	CFI_RESTORE ebx
-	popl %ecx
-	CFI_ADJUST_CFA_OFFSET -4
+	popl_cfi %ecx
 	CFI_RESTORE ecx
-	popl %edx
-	CFI_ADJUST_CFA_OFFSET -4
+	popl_cfi %edx
 	CFI_RESTORE edx
-	popl %esi
-	CFI_ADJUST_CFA_OFFSET -4
+	popl_cfi %esi
 	CFI_RESTORE esi
-	popl %edi
-	CFI_ADJUST_CFA_OFFSET -4
+	popl_cfi %edi
 	CFI_RESTORE edi
-	popl %ebp
-	CFI_ADJUST_CFA_OFFSET -4
+	popl_cfi %ebp
 	CFI_RESTORE ebp
-	popl %eax
-	CFI_ADJUST_CFA_OFFSET -4
+	popl_cfi %eax
 	CFI_RESTORE eax
 .endm
 
 .macro RESTORE_REGS pop=0
 	RESTORE_INT_REGS
-1:	popl %ds
-	CFI_ADJUST_CFA_OFFSET -4
+1:	popl_cfi %ds
 	/*CFI_RESTORE ds;*/
-2:	popl %es
-	CFI_ADJUST_CFA_OFFSET -4
+2:	popl_cfi %es
 	/*CFI_RESTORE es;*/
-3:	popl %fs
-	CFI_ADJUST_CFA_OFFSET -4
+3:	popl_cfi %fs
 	/*CFI_RESTORE fs;*/
 	POP_GS \pop
 .pushsection .fixup, "ax"
@@ -324,16 +301,12 @@ NMI_MASK	= 0x80000000
 
 ENTRY(ret_from_fork)
 	CFI_STARTPROC
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %eax
 	call schedule_tail
 	GET_THREAD_INFO(%ebp)
-	popl %eax
-	CFI_ADJUST_CFA_OFFSET -4
-	pushl $0x0202			# Reset kernel eflags
-	CFI_ADJUST_CFA_OFFSET 4
-	popfl
-	CFI_ADJUST_CFA_OFFSET -4
+	popl_cfi %eax
+	pushl_cfi $0x0202		# Reset kernel eflags
+	popfl_cfi
 	jmp syscall_exit
 	CFI_ENDPROC
 END(ret_from_fork)
@@ -413,29 +386,23 @@ sysenter_past_esp:
 	 * enough kernel state to call TRACE_IRQS_OFF can be called - but
 	 * we immediately enable interrupts at that point anyway.
 	 */
-	pushl $(__USER_DS)
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $__USER_DS
 	/*CFI_REL_OFFSET ss, 0*/
-	pushl %ebp
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %ebp
 	CFI_REL_OFFSET esp, 0
-	pushfl
+	pushfl_cfi
 	orl $X86_EFLAGS_IF, (%esp)
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl $(__USER_CS)
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $__USER_CS
 	/*CFI_REL_OFFSET cs, 0*/
 	/*
 	 * Push current_thread_info()->sysenter_return to the stack.
 	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
 	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
 	 */
-	pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
 	CFI_REL_OFFSET eip, 0
 
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %eax
 	SAVE_ALL
 	ENABLE_INTERRUPTS(CLBR_NONE)
 
@@ -490,8 +457,7 @@ sysenter_audit:
 	movl %eax,%edx			/* 2nd arg: syscall number */
 	movl $AUDIT_ARCH_I386,%eax	/* 1st arg: audit arch */
 	call audit_syscall_entry
-	pushl %ebx
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %ebx
 	movl PT_EAX(%esp),%eax		/* reload syscall number */
 	jmp sysenter_do_call
 
@@ -535,8 +501,7 @@ ENTRY(ia32pv_sysenter_target)
 	addl $4,%esp
 	CFI_ADJUST_CFA_OFFSET -4
 	/* +5*4 is SS:ESP,EFLAGS,CS:EIP. +8 is esp0 setting. */
-	pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
 /*
  * Load the potential sixth argument from user stack.
  * Careful about security.
@@ -559,8 +524,7 @@ ENDPROC(ia32pv_sysenter_target)
 	# system call handler stub
 ENTRY(system_call)
 	RING0_INT_FRAME			# can't unwind into user space anyway
-	pushl %eax			# save orig_eax
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %eax			# save orig_eax
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
 					# system call tracing in operation / emulation
@@ -610,7 +574,6 @@ restore_nocheck:
 	jnz restore_all_enable_events	#        != 0 => enable event delivery
 #endif
 	RESTORE_REGS 4			# skip orig_eax/error_code
-	CFI_ADJUST_CFA_OFFSET -4
 irq_return:
 	INTERRUPT_RETURN
 .section .fixup,"ax"
@@ -664,10 +627,8 @@ ldt_ss:
 	shr $16, %edx
 	mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
 	mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
-	pushl $__ESPFIX_SS
-	CFI_ADJUST_CFA_OFFSET 4
-	push %eax			/* new kernel esp */
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $__ESPFIX_SS
+	pushl_cfi %eax			/* new kernel esp */
 	/* Disable interrupts, but do not irqtrace this section: we
 	 * will soon execute iret and the tracer was already set to
 	 * the irqstate after the iret */
@@ -736,11 +697,9 @@ work_notifysig:				# deal with pending s
 
 	ALIGN
 work_notifysig_v86:
-	pushl %ecx			# save ti_flags for do_notify_resume
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %ecx			# save ti_flags for do_notify_resume
 	call save_v86_state		# %eax contains pt_regs pointer
-	popl %ecx
-	CFI_ADJUST_CFA_OFFSET -4
+	popl_cfi %ecx
 	movl %eax, %esp
 #else
 	movl %esp, %eax
@@ -820,14 +779,18 @@ ptregs_##name: \
 #define PTREGSCALL3(name) \
 	ALIGN; \
 ptregs_##name: \
+	CFI_STARTPROC; \
 	leal 4(%esp),%eax; \
-	pushl %eax; \
+	pushl_cfi %eax; \
 	movl PT_EDX(%eax),%ecx; \
 	movl PT_ECX(%eax),%edx; \
 	movl PT_EBX(%eax),%eax; \
 	call sys_##name; \
 	addl $4,%esp; \
-	ret
+	CFI_ADJUST_CFA_OFFSET -4; \
+	ret; \
+	CFI_ENDPROC; \
+ENDPROC(ptregs_##name)
 
 PTREGSCALL1(iopl)
 PTREGSCALL0(fork)
@@ -842,15 +805,19 @@ PTREGSCALL1(vm86old)
 /* Clone is an oddball.  The 4th arg is in %edi */
 	ALIGN;
 ptregs_clone:
+	CFI_STARTPROC
 	leal 4(%esp),%eax
-	pushl %eax
-	pushl PT_EDI(%eax)
+	pushl_cfi %eax
+	pushl_cfi PT_EDI(%eax)
 	movl PT_EDX(%eax),%ecx
 	movl PT_ECX(%eax),%edx
 	movl PT_EBX(%eax),%eax
 	call sys_clone
 	addl $8,%esp
+	CFI_ADJUST_CFA_OFFSET -8
 	ret
+	CFI_ENDPROC
+ENDPROC(ptregs_clone)
 
 #ifndef CONFIG_XEN
 .macro FIXUP_ESPFIX_STACK
@@ -866,10 +833,8 @@ ptregs_clone:
 	mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
 	shl $16, %eax
 	addl %esp, %eax			/* the adjusted stack pointer */
-	pushl $__KERNEL_DS
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $__KERNEL_DS
+	pushl_cfi %eax
 	lss (%esp), %esp		/* switch to the normal stack segment */
 	CFI_ADJUST_CFA_OFFSET -8
 .endm
@@ -906,8 +871,7 @@ vector=FIRST_EXTERNAL_VECTOR
       .if vector <> FIRST_EXTERNAL_VECTOR
 	CFI_ADJUST_CFA_OFFSET -4
       .endif
-1:	pushl $(~vector+0x80)	/* Note: always in signed byte range */
-	CFI_ADJUST_CFA_OFFSET 4
+1:	pushl_cfi $(~vector+0x80)	/* Note: always in signed byte range */
       .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
 	jmp 2f
       .endif
@@ -947,8 +911,7 @@ ENDPROC(common_interrupt)
 #define BUILD_INTERRUPT3(name, nr, fn)	\
 ENTRY(name)				\
 	RING0_INT_FRAME;		\
-	pushl $~(nr);			\
-	CFI_ADJUST_CFA_OFFSET 4;	\
+	pushl_cfi $~(nr);		\
 	SAVE_ALL;			\
 	TRACE_IRQS_OFF			\
 	movl %esp,%eax;			\
@@ -985,8 +948,7 @@ ENDPROC(name)
 # so we can simply throw away the new one.
 ENTRY(hypervisor_callback)
 	RING0_INT_FRAME
-	pushl $-1
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $-1
 	SAVE_ALL
 	movl PT_CS(%esp),%ecx
 	movl PT_EIP(%esp),%eax
@@ -1006,8 +968,7 @@ ENTRY(hypervisor_callback)
 	addl $PT_OLDESP,%esp		# Remove eflags...ebx from stack frame.
 #endif
 .Ldo_upcall:
-	push %esp
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %esp
 	call evtchn_do_upcall
 	add  $4,%esp
 	CFI_ADJUST_CFA_OFFSET -4
@@ -1081,8 +1042,7 @@ ENTRY(failsafe_callback)
 	leal 16(%esp),%esp
 	RING0_INT_FRAME
 	jnz iret_exc		# EAX != 0 => Category 2 (Bad IRET)
-	pushl $-1		# EAX == 0 => Category 1 (Bad segment)
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $-1		# EAX == 0 => Category 1 (Bad segment)
 	SAVE_ALL
 	jmp ret_from_exception
 .section .fixup,"ax";		\
@@ -1111,21 +1071,18 @@ ENTRY(failsafe_callback)
 
 ENTRY(coprocessor_error)
 	RING0_INT_FRAME
-	pushl $0
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_coprocessor_error
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $0
+	pushl_cfi $do_coprocessor_error
 	jmp error_code
 	CFI_ENDPROC
 END(coprocessor_error)
 
 ENTRY(simd_coprocessor_error)
 	RING0_INT_FRAME
-	pushl $0
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $0
 #ifdef CONFIG_X86_INVD_BUG
 	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
-661:	pushl $do_general_protection
+661:	pushl_cfi $do_general_protection
 662:
 .section .altinstructions,"a"
 	.balign 4
@@ -1140,19 +1097,16 @@ ENTRY(simd_coprocessor_error)
 664:
 .previous
 #else
-	pushl $do_simd_coprocessor_error
+	pushl_cfi $do_simd_coprocessor_error
 #endif
-	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
 END(simd_coprocessor_error)
 
 ENTRY(device_not_available)
 	RING0_INT_FRAME
-	pushl $-1			# mark this as an int
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_device_not_available
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $-1			# mark this as an int
+	pushl_cfi $do_device_not_available
 	jmp error_code
 	CFI_ENDPROC
 END(device_not_available)
@@ -1174,82 +1128,68 @@ END(native_irq_enable_sysexit)
 
 ENTRY(overflow)
 	RING0_INT_FRAME
-	pushl $0
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_overflow
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $0
+	pushl_cfi $do_overflow
 	jmp error_code
 	CFI_ENDPROC
 END(overflow)
 
 ENTRY(bounds)
 	RING0_INT_FRAME
-	pushl $0
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_bounds
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $0
+	pushl_cfi $do_bounds
 	jmp error_code
 	CFI_ENDPROC
 END(bounds)
 
 ENTRY(invalid_op)
 	RING0_INT_FRAME
-	pushl $0
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_invalid_op
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $0
+	pushl_cfi $do_invalid_op
 	jmp error_code
 	CFI_ENDPROC
 END(invalid_op)
 
 ENTRY(coprocessor_segment_overrun)
 	RING0_INT_FRAME
-	pushl $0
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_coprocessor_segment_overrun
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $0
+	pushl_cfi $do_coprocessor_segment_overrun
 	jmp error_code
 	CFI_ENDPROC
 END(coprocessor_segment_overrun)
 
 ENTRY(invalid_TSS)
 	RING0_EC_FRAME
-	pushl $do_invalid_TSS
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $do_invalid_TSS
 	jmp error_code
 	CFI_ENDPROC
 END(invalid_TSS)
 
 ENTRY(segment_not_present)
 	RING0_EC_FRAME
-	pushl $do_segment_not_present
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $do_segment_not_present
 	jmp error_code
 	CFI_ENDPROC
 END(segment_not_present)
 
 ENTRY(stack_segment)
 	RING0_EC_FRAME
-	pushl $do_stack_segment
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $do_stack_segment
 	jmp error_code
 	CFI_ENDPROC
 END(stack_segment)
 
 ENTRY(alignment_check)
 	RING0_EC_FRAME
-	pushl $do_alignment_check
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $do_alignment_check
 	jmp error_code
 	CFI_ENDPROC
 END(alignment_check)
 
 ENTRY(divide_error)
 	RING0_INT_FRAME
-	pushl $0			# no error code
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_divide_error
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $0			# no error code
+	pushl_cfi $do_divide_error
 	jmp error_code
 	CFI_ENDPROC
 END(divide_error)
@@ -1257,10 +1197,8 @@ END(divide_error)
 #ifdef CONFIG_X86_MCE
 ENTRY(machine_check)
 	RING0_INT_FRAME
-	pushl $0
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl machine_check_vector
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $0
+	pushl_cfi machine_check_vector
 	jmp error_code
 	CFI_ENDPROC
 END(machine_check)
@@ -1269,10 +1207,8 @@ END(machine_check)
 #ifndef CONFIG_XEN
 ENTRY(spurious_interrupt_bug)
 	RING0_INT_FRAME
-	pushl $0
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_spurious_interrupt_bug
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $0
+	pushl_cfi $do_spurious_interrupt_bug
 	jmp error_code
 	CFI_ENDPROC
 END(spurious_interrupt_bug)
@@ -1280,8 +1216,7 @@ END(spurious_interrupt_bug)
 
 ENTRY(fixup_4gb_segment)
 	RING0_EC_FRAME
-	pushl $do_fixup_4gb_segment
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $do_fixup_4gb_segment
 	jmp error_code
 	CFI_ENDPROC
 END(fixup_4gb_segment)
@@ -1415,8 +1350,7 @@ ENTRY(ia32pv_cstar_target)
 	movl %ebp,%ecx
 	movl $__USER_CS,4(%esp)
 	movl 12(%esp),%ebp
-	pushl %eax			# save orig_eax
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %eax			# save orig_eax
 /*
  * Load the potential sixth argument from user stack.
  * Careful about security.
@@ -1550,40 +1484,29 @@ mask=0
 
 ENTRY(page_fault)
 	RING0_EC_FRAME
-	pushl $do_page_fault
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $do_page_fault
 	ALIGN
 error_code:
 	/* the function address is in %gs's slot on the stack */
-	pushl %fs
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %fs
 	/*CFI_REL_OFFSET fs, 0*/
-	pushl %es
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %es
 	/*CFI_REL_OFFSET es, 0*/
-	pushl %ds
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %ds
 	/*CFI_REL_OFFSET ds, 0*/
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %eax
 	CFI_REL_OFFSET eax, 0
-	pushl %ebp
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %ebp
 	CFI_REL_OFFSET ebp, 0
-	pushl %edi
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %edi
 	CFI_REL_OFFSET edi, 0
-	pushl %esi
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %esi
 	CFI_REL_OFFSET esi, 0
-	pushl %edx
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %edx
 	CFI_REL_OFFSET edx, 0
-	pushl %ecx
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %ecx
 	CFI_REL_OFFSET ecx, 0
-	pushl %ebx
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %ebx
 	CFI_REL_OFFSET ebx, 0
 	cld
 	movl $(__KERNEL_PERCPU), %ecx
@@ -1626,12 +1549,9 @@ END(page_fault)
 	movl TSS_sysenter_sp0 + \offset(%esp), %esp
 	CFI_DEF_CFA esp, 0
 	CFI_UNDEFINED eip
-	pushfl
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl $__KERNEL_CS
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl $sysenter_past_esp
-	CFI_ADJUST_CFA_OFFSET 4
+	pushfl_cfi
+	pushl_cfi $__KERNEL_CS
+	pushl_cfi $sysenter_past_esp
 	CFI_REL_OFFSET eip, 0
 .endm
 #endif /* CONFIG_XEN */
@@ -1644,8 +1564,7 @@ ENTRY(debug)
 	FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
 debug_stack_correct:
 #endif /* !CONFIG_XEN */
-	pushl $-1			# mark this as an int
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $-1			# mark this as an int
 	SAVE_ALL
 	TRACE_IRQS_OFF
 	xorl %edx,%edx			# error code 0
@@ -1665,33 +1584,28 @@ END(debug)
  */
 ENTRY(nmi)
 	RING0_INT_FRAME
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %eax
 #ifndef CONFIG_XEN
 	movl %ss, %eax
 	cmpw $__ESPFIX_SS, %ax
-	popl %eax
-	CFI_ADJUST_CFA_OFFSET -4
+	popl_cfi %eax
 	je nmi_espfix_stack
 	cmpl $ia32_sysenter_target,(%esp)
 	je nmi_stack_fixup
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %eax
 	movl %esp,%eax
 	/* Do not access memory above the end of our stack page,
 	 * it might not exist.
 	 */
 	andl $(THREAD_SIZE-1),%eax
 	cmpl $(THREAD_SIZE-20),%eax
-	popl %eax
-	CFI_ADJUST_CFA_OFFSET -4
+	popl_cfi %eax
 	jae nmi_stack_correct
 	cmpl $ia32_sysenter_target,12(%esp)
 	je nmi_debug_stack_check
 nmi_stack_correct:
 	/* We have a RING0_INT_FRAME here */
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %eax
 	SAVE_ALL
 	xorl %edx,%edx		# zero error code
 	movl %esp,%eax		# pt_regs pointer
@@ -1720,18 +1634,14 @@ nmi_espfix_stack:
 	 *
 	 * create the pointer to lss back
 	 */
-	pushl %ss
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl %esp
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %ss
+	pushl_cfi %esp
 	addl $4, (%esp)
 	/* copy the iret frame of 12 bytes */
 	.rept 3
-	pushl 16(%esp)
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi 16(%esp)
 	.endr
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi %eax
 	SAVE_ALL
 	FIXUP_ESPFIX_STACK		# %eax == %esp
 	xorl %edx,%edx			# zero error code
@@ -1753,8 +1663,7 @@ END(nmi)
 
 ENTRY(int3)
 	RING0_INT_FRAME
-	pushl $-1			# mark this as an int
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $-1			# mark this as an int
 	SAVE_ALL
 	TRACE_IRQS_OFF
 	xorl %edx,%edx		# zero error code
@@ -1766,8 +1675,7 @@ END(int3)
 
 ENTRY(general_protection)
 	RING0_EC_FRAME
-	pushl $do_general_protection
-	CFI_ADJUST_CFA_OFFSET 4
+	pushl_cfi $do_general_protection
 	jmp error_code
 	CFI_ENDPROC
 END(general_protection)
--- head.orig/arch/x86/kernel/entry_64-xen.S	2013-05-24 08:24:12.000000000 +0200
+++ head/arch/x86/kernel/entry_64-xen.S	2013-05-24 08:24:37.000000000 +0200
@@ -204,23 +204,17 @@ NMI_MASK = 0x80000000
 	.macro FAKE_STACK_FRAME child_rip
 	/* push in order ss, rsp, eflags, cs, rip */
 	xorl %eax, %eax
-	pushq $__KERNEL_DS /* ss */
-	CFI_ADJUST_CFA_OFFSET	8
+	pushq_cfi $__KERNEL_DS /* ss */
 	/*CFI_REL_OFFSET	ss,0*/
-	pushq %rax /* rsp */
-	CFI_ADJUST_CFA_OFFSET	8
+	pushq_cfi %rax /* rsp */
 	CFI_REL_OFFSET	rsp,0
-	pushq $X86_EFLAGS_IF /* eflags - interrupts on */
-	CFI_ADJUST_CFA_OFFSET	8
+	pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
 	/*CFI_REL_OFFSET	rflags,0*/
-	pushq $__KERNEL_CS /* cs */
-	CFI_ADJUST_CFA_OFFSET	8
+	pushq_cfi $__KERNEL_CS /* cs */
 	/*CFI_REL_OFFSET	cs,0*/
-	pushq \child_rip /* rip */
-	CFI_ADJUST_CFA_OFFSET	8
+	pushq_cfi \child_rip /* rip */
 	CFI_REL_OFFSET	rip,0
-	pushq	%rax /* orig rax */
-	CFI_ADJUST_CFA_OFFSET	8
+	pushq_cfi %rax /* orig rax */
 	.endm
 
 	.macro UNFAKE_STACK_FRAME
@@ -335,6 +329,7 @@ NMI_MASK = 0x80000000
 
 #ifndef CONFIG_XEN
 /* save partial stack frame */
+	.pushsection .kprobes.text, "ax"
 ENTRY(save_args)
 	XCPT_FRAME
 	cld
@@ -374,6 +369,7 @@ ENTRY(save_args)
 	ret
 	CFI_ENDPROC
 END(save_args)
+	.popsection
 #endif
 
 ENTRY(save_rest)
@@ -435,10 +431,8 @@ ENTRY(ret_from_fork)
 
 	LOCK ; btr $TIF_FORK,TI_flags(%r8)
 
-	push kernel_eflags(%rip)
-	CFI_ADJUST_CFA_OFFSET 8
-	popf					# reset kernel eflags
-	CFI_ADJUST_CFA_OFFSET -8
+	pushq_cfi kernel_eflags(%rip)
+	popfq_cfi				# reset kernel eflags
 
 	call schedule_tail			# rdi: 'prev' task parameter
 
@@ -535,11 +529,9 @@ sysret_careful:
 	jnc sysret_signal
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	pushq %rdi
-	CFI_ADJUST_CFA_OFFSET 8
+	pushq_cfi %rdi
 	call schedule
-	popq  %rdi
-	CFI_ADJUST_CFA_OFFSET -8
+	popq_cfi %rdi
 	jmp sysret_check
 
 	/* Handle a signal */
@@ -652,11 +644,9 @@ int_careful:
 	jnc  int_very_careful
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	pushq %rdi
-	CFI_ADJUST_CFA_OFFSET 8
+	pushq_cfi %rdi
 	call schedule
-	popq %rdi
-	CFI_ADJUST_CFA_OFFSET -8
+	popq_cfi %rdi
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	jmp int_with_check
@@ -670,12 +660,10 @@ int_check_syscall_exit_work:
 	/* Check for syscall exit trace */
 	testl $_TIF_WORK_SYSCALL_EXIT,%edx
 	jz int_signal
-	pushq %rdi
-	CFI_ADJUST_CFA_OFFSET 8
+	pushq_cfi %rdi
 	leaq 8(%rsp),%rdi	# &ptregs -> arg1
 	call syscall_trace_leave
-	popq %rdi
-	CFI_ADJUST_CFA_OFFSET -8
+	popq_cfi %rdi
 	andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
 	jmp int_restore_rest
 
@@ -732,9 +720,8 @@ END(ptregscall_common)
 
 ENTRY(stub_execve)
 	CFI_STARTPROC
-	popq %r11
-	CFI_ADJUST_CFA_OFFSET -8
-	CFI_REGISTER rip, r11
+	addq $8, %rsp
+	PARTIAL_FRAME 0
 	SAVE_REST
 	FIXUP_TOP_OF_STACK %r11
 	movq %rsp, %rcx
@@ -753,7 +740,7 @@ END(stub_execve)
 ENTRY(stub_rt_sigreturn)
 	CFI_STARTPROC
 	addq $8, %rsp
-	CFI_ADJUST_CFA_OFFSET	-8
+	PARTIAL_FRAME 0
 	SAVE_REST
 	movq %rsp,%rdi
 	FIXUP_TOP_OF_STACK %r11
@@ -795,11 +782,9 @@ retint_careful:
 	jnc   retint_signal
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	pushq %rdi
-	CFI_ADJUST_CFA_OFFSET	8
+	pushq_cfi %rdi
 	call  schedule
-	popq %rdi
-	CFI_ADJUST_CFA_OFFSET	-8
+	popq_cfi %rdi
 	GET_THREAD_INFO(%rcx)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
@@ -849,8 +834,8 @@ ENTRY(\sym)
         movq 8(%rsp),%r11
 	CFI_RESTORE r11
 	movq $-1,8(%rsp)	/* ORIG_RAX: no syscall to restart */
-	subq $(15-1)*8,%rsp
-	CFI_ADJUST_CFA_OFFSET (15-1)*8
+	subq $ORIG_RAX-R15-1*8,%rsp
+	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15-1*8
 	call error_entry
 	DEFAULT_FRAME 0
 	movq %rsp,%rdi		/* pt_regs pointer */
@@ -876,8 +861,8 @@ ENTRY(\sym)
 	CFI_RESTORE rcx
         movq 8(%rsp),%r11
 	CFI_RESTORE r11
-	subq $(15-2)*8,%rsp
-	CFI_ADJUST_CFA_OFFSET (15-2)*8
+	subq $ORIG_RAX-R15-2*8,%rsp
+	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15-2*8
 	call error_entry
 	DEFAULT_FRAME 0
 	movq %rsp,%rdi			/* pt_regs pointer */
@@ -997,8 +982,7 @@ ENTRY(failsafe_callback)
 	CFI_RESTORE r11
 	addq $0x30,%rsp
 	CFI_ADJUST_CFA_OFFSET -0x30
-	pushq $-1
-	CFI_ADJUST_CFA_OFFSET 8
+	pushq_cfi $-1
 	SAVE_ALL
 	jmp error_exit
 	CFI_ENDPROC
@@ -1066,8 +1050,7 @@ END(kernel_execve)
 /* Call softirq on interrupt stack. Interrupts are off. */
 ENTRY(call_softirq)
 	CFI_STARTPROC
-	push %rbp
-	CFI_ADJUST_CFA_OFFSET	8
+	pushq_cfi %rbp
 	CFI_REL_OFFSET rbp,0
 	mov  %rsp,%rbp
 	CFI_DEF_CFA_REGISTER rbp
@@ -1076,6 +1059,7 @@ ENTRY(call_softirq)
 	push  %rbp			# backlink for old unwinder
 	call __do_softirq
 	leaveq
+	CFI_RESTORE		rbp
 	CFI_DEF_CFA_REGISTER	rsp
 	CFI_ADJUST_CFA_OFFSET   -8
 	decl PER_CPU_VAR(irq_count)
@@ -1114,7 +1098,7 @@ paranoidzeroentry machine_check *machine
 
 	/* ebx:	no swapgs flag */
 ENTRY(paranoid_exit)
-	INTR_FRAME
+	DEFAULT_FRAME
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	testl %ebx,%ebx				/* swapgs needed? */
@@ -1194,7 +1178,6 @@ error_sti:
 #endif
 	TRACE_IRQS_OFF
 	ret
-	CFI_ENDPROC
 
 #ifndef CONFIG_XEN
 /*
@@ -1221,6 +1204,7 @@ bstep_iret:
 	movq %rcx,RIP+8(%rsp)
 	jmp error_swapgs
 #endif
+	CFI_ENDPROC
 END(error_entry)
 
 
@@ -1261,11 +1245,9 @@ END(do_nmi_callback)
 #ifndef CONFIG_IA32_EMULATION
 ENTRY(ignore_sysret)
 	INTR_FRAME
-	popq %rcx
-	CFI_ADJUST_CFA_OFFSET -8
+	popq_cfi %rcx
 	CFI_RESTORE rcx
-	popq %r11
-	CFI_ADJUST_CFA_OFFSET -8
+	popq_cfi %r11
 	CFI_RESTORE r11
 	mov $-ENOSYS,%eax
 	# any non-zero value not having VGCF_in_syscall set will do:
--- head.orig/arch/x86/kernel/head-xen.c	2013-05-13 14:02:23.000000000 +0200
+++ head/arch/x86/kernel/head-xen.c	2013-04-05 09:12:57.000000000 +0200
@@ -1,5 +1,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/memblock.h>
 #include <linux/pci.h>
 
 #include <asm/setup.h>
@@ -53,7 +54,7 @@ void __init reserve_ebda_region(void)
 		lowmem = 0x9f000;
 
 	/* reserve all memory between lowmem and the 1MB mark */
-	reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved");
+	memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved");
 }
 #else /* CONFIG_XEN */
 #include <linux/module.h>
@@ -105,10 +106,11 @@ void __init xen_start_kernel(void)
 	WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
 				     VMASST_TYPE_writable_pagetables));
 
-	reserve_early(PAGE_ALIGN(__pa_symbol(&_end)),
-		      __pa(xen_start_info->pt_base)
-		      + PFN_PHYS(xen_start_info->nr_pt_frames),
-		      "Xen provided");
+	memblock_init();
+	memblock_x86_reserve_range(PAGE_ALIGN(__pa_symbol(&_end)),
+				   __pa(xen_start_info->pt_base)
+				   + PFN_PHYS(xen_start_info->nr_pt_frames),
+				   "Xen provided");
 
 	x86_configure_nx();
 
--- head.orig/arch/x86/kernel/head32-xen.c	2011-05-09 11:41:42.000000000 +0200
+++ head/arch/x86/kernel/head32-xen.c	2011-05-09 11:42:39.000000000 +0200
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/start_kernel.h>
 #include <linux/mm.h>
+#include <linux/memblock.h>
 
 #include <asm/setup.h>
 #include <asm/sections.h>
@@ -15,6 +16,7 @@
 #include <asm/trampoline.h>
 #include <asm/apic.h>
 #include <asm/io_apic.h>
+#include <asm/tlbflush.h>
 
 static void __init i386_default_early_setup(void)
 {
@@ -47,17 +49,18 @@ void __init i386_start_kernel(void)
 	BUG_ON(pte_index(hypervisor_virt_start));
 #endif
 
+	memblock_init();
+
 #ifdef CONFIG_X86_TRAMPOLINE
 	/*
 	 * But first pinch a few for the stack/trampoline stuff
 	 * FIXME: Don't need the extra page at 4K, but need to fix
 	 * trampoline before removing it. (see the GDT stuff)
 	 */
-	reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE,
-					 "EX TRAMPOLINE");
+	memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
 #endif
 
-	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+	memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
 
 #ifndef CONFIG_XEN
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -67,7 +70,7 @@ void __init i386_start_kernel(void)
 		u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 		u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
 		u64 ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size);
-		reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
+		memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK");
 	}
 #endif
 
--- head.orig/arch/x86/kernel/head64-xen.c	2011-02-01 14:55:46.000000000 +0100
+++ head/arch/x86/kernel/head64-xen.c	2011-02-01 15:09:47.000000000 +0100
@@ -15,6 +15,7 @@
 #include <linux/percpu.h>
 #include <linux/start_kernel.h>
 #include <linux/io.h>
+#include <linux/memblock.h>
 
 #include <asm/processor.h>
 #include <asm/proto.h>
@@ -119,7 +120,9 @@ void __init x86_64_start_reservations(ch
 {
 	copy_bootdata(__va(real_mode_data));
 
-	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+	memblock_init();
+
+	memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
 
 	/*
 	 * At this point everything still needed from the boot loader
--- head.orig/arch/x86/kernel/irq-xen.c	2013-05-24 10:36:37.000000000 +0200
+++ head/arch/x86/kernel/irq-xen.c	2013-05-24 10:37:09.000000000 +0200
@@ -71,10 +71,10 @@ static int show_other_interrupts(struct 
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
 	seq_printf(p, "  Performance monitoring interrupts\n");
-	seq_printf(p, "%*s: ", prec, "PND");
+	seq_printf(p, "%*s: ", prec, "IWI");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
-	seq_printf(p, "  Performance pending work\n");
+		seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
+	seq_printf(p, "  IRQ work interrupts\n");
 #endif
 #ifndef CONFIG_XEN
 	if (x86_platform_ipi_callback) {
@@ -172,7 +172,7 @@ int show_interrupts(struct seq_file *p, 
 	seq_printf(p, "%*d: ", prec, i);
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-	seq_printf(p, " %8s", desc->chip->name);
+	seq_printf(p, " %8s", desc->irq_data.chip->name);
 	seq_printf(p, "-%-8s", desc->name);
 
 	if (action) {
@@ -198,7 +198,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += irq_stats(cpu)->apic_timer_irqs;
 	sum += irq_stats(cpu)->irq_spurious_count;
 	sum += irq_stats(cpu)->apic_perf_irqs;
-	sum += irq_stats(cpu)->apic_pending_irqs;
+	sum += irq_stats(cpu)->apic_irq_work_irqs;
 #endif
 #ifndef CONFIG_XEN
 	if (x86_platform_ipi_callback)
@@ -303,6 +303,7 @@ void fixup_irqs(void)
 	unsigned int irq;
 	static int warned;
 	struct irq_desc *desc;
+	struct irq_data *data;
 	static DECLARE_BITMAP(irqs_used, NR_IRQS);
 
 	for_each_irq_desc(irq, desc) {
@@ -318,7 +319,8 @@ void fixup_irqs(void)
 		/* interrupt's are disabled at this point */
 		raw_spin_lock(&desc->lock);
 
-		affinity = desc->affinity;
+		data = &desc->irq_data;
+		affinity = data->affinity;
 		if (!irq_has_action(irq) ||
 		    cpumask_subset(affinity, cpu_online_mask)) {
 			raw_spin_unlock(&desc->lock);
@@ -333,16 +335,16 @@ void fixup_irqs(void)
 			affinity = cpu_all_mask;
 		}
 
-		if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask)
-			desc->chip->mask(irq);
+		if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask)
+			data->chip->irq_mask(data);
 
-		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, affinity);
-		else if (desc->chip != &no_irq_chip && !(warned++))
+		if (data->chip->irq_set_affinity)
+			data->chip->irq_set_affinity(data, affinity, true);
+		else if (data->chip != &no_irq_chip && !(warned++))
 			set_affinity = 0;
 
-		if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
-			desc->chip->unmask(irq);
+		if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask)
+			data->chip->irq_unmask(data);
 
 		raw_spin_unlock(&desc->lock);
 
@@ -368,9 +370,10 @@ void fixup_irqs(void)
 			continue;
 
 		if (xen_test_irq_pending(irq)) {
+			data = irq_get_irq_data(irq);
 			raw_spin_lock(&desc->lock);
-			if (desc->chip->retrigger)
-				desc->chip->retrigger(irq);
+			if (data->chip->irq_retrigger)
+				data->chip->irq_retrigger(data);
 			raw_spin_unlock(&desc->lock);
 		}
 	}
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ head/arch/x86/kernel/irq_work-xen.c	2011-02-03 11:19:35.000000000 +0100
@@ -0,0 +1,23 @@
+/*
+ * x86/Xen specific code for irq_work
+ */
+
+#include <linux/kernel.h>
+#include <linux/irq_work.h>
+#include <linux/hardirq.h>
+#include <asm/ipi.h>
+
+#ifdef CONFIG_SMP
+irqreturn_t smp_irq_work_interrupt(int irq, void *dev_id)
+{
+	inc_irq_stat(apic_irq_work_irqs);
+	irq_work_run();
+
+	return IRQ_HANDLED;
+}
+
+void arch_irq_work_raise(void)
+{
+	xen_send_IPI_self(IRQ_WORK_VECTOR);
+}
+#endif
--- head.orig/arch/x86/kernel/cpu/microcode/core-xen.c	2011-12-01 15:26:48.000000000 +0100
+++ head/arch/x86/kernel/cpu/microcode/core-xen.c	2011-12-01 15:28:13.000000000 +0100
@@ -12,7 +12,7 @@
  *	Software Developer's Manual
  *	Order Number 253668 or free download from:
  *
- *	http://developer.intel.com/design/pentium4/manuals/253668.htm
+ *	http://developer.intel.com/Assets/PDF/manual/253668.pdf
  *
  *	For more information, go to http://www.urbanmyth.org/microcode
  *
@@ -117,6 +117,7 @@ static const struct file_operations micr
 	.owner			= THIS_MODULE,
 	.write			= microcode_write,
 	.open			= microcode_open,
+	.llseek		= no_llseek,
 };
 
 static struct miscdevice microcode_dev = {
--- head.orig/arch/x86/kernel/mpparse-xen.c	2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/kernel/mpparse-xen.c	2011-02-01 15:09:47.000000000 +0100
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
 #include <linux/bitops.h>
@@ -686,7 +687,7 @@ static void __init smp_reserve_memory(st
 {
 	unsigned long size = get_mpc_size(mpf->physptr);
 
-	reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc");
+	memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
 }
 #endif
 
@@ -719,7 +720,7 @@ static int __init smp_scan_config(unsign
 			       mpf, (u64)virt_to_phys(mpf));
 
 			mem = virt_to_phys(mpf);
-			reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf");
+			memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf");
 			if (mpf->physptr)
 				smp_reserve_memory(mpf);
 #else
--- head.orig/arch/x86/kernel/pci-dma-xen.c	2012-04-04 14:10:53.000000000 +0200
+++ head/arch/x86/kernel/pci-dma-xen.c	2012-04-04 14:32:09.000000000 +0200
@@ -9,6 +9,7 @@
 #include <asm/dma.h>
 #include <asm/iommu.h>
 #include <asm/x86_init.h>
+#include <asm/iommu_table.h>
 
 static int forbid_dac __read_mostly;
 
@@ -42,6 +43,8 @@ int iommu_detected __read_mostly = 0;
 int iommu_pass_through __read_mostly;
 #endif
 
+extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
+
 /* Dummy device used for NULL arguments (normally ISA). */
 struct device x86_dma_fallback_dev = {
 	.init_name = "fallback device",
@@ -140,7 +143,10 @@ static struct dma_map_ops swiotlb_dma_op
 	.dma_supported = swiotlb_dma_supported
 };
 
-#define pci_xen_swiotlb_detect() 1
+static int __init pci_xen_swiotlb_detect(void)
+{
+	return 1;
+}
 
 static void __init pci_xen_swiotlb_init(void)
 {
@@ -151,26 +157,28 @@ static void __init pci_xen_swiotlb_init(
 	}
 }
 
+IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, NULL, pci_xen_swiotlb_init, NULL);
+
 void __init pci_iommu_alloc(void)
 {
+	struct iommu_table_entry *p;
+
 	/* free the range so iommu could get some range less than 4G */
 	dma32_free_bootmem();
 
-	if (pci_xen_swiotlb_detect() || pci_swiotlb_detect())
-		goto out;
-
-	gart_iommu_hole_init();
-
-	detect_calgary();
+	sort_iommu_table(__iommu_table, __iommu_table_end);
+	check_iommu_entries(__iommu_table, __iommu_table_end);
 
-	detect_intel_iommu();
-
-	/* needs to be called after gart_iommu_hole_init */
-	amd_iommu_detect();
-out:
-	pci_xen_swiotlb_init();
+	for (p = __iommu_table; p < __iommu_table_end; p++) {
+		if (p && p->detect && p->detect() > 0) {
+			p->flags |= IOMMU_DETECTED;
+			if (p->early_init)
+				p->early_init();
+			if (p->flags & IOMMU_FINISH_IF_DETECTED)
+				break;
+		}
+	}
 }
-
 void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 				 dma_addr_t *dma_addr, gfp_t flag)
 {
@@ -375,6 +383,7 @@ EXPORT_SYMBOL(dma_supported);
 
 static int __init pci_iommu_init(void)
 {
+	struct iommu_table_entry *p;
 	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
 
 #ifdef CONFIG_PCI
@@ -382,14 +391,10 @@ static int __init pci_iommu_init(void)
 #endif
 	x86_init.iommu.iommu_init();
 
-#ifndef CONFIG_XEN
-	if (swiotlb || xen_swiotlb) {
-		printk(KERN_INFO "PCI-DMA: "
-		       "Using software bounce buffering for IO (SWIOTLB)\n");
-		swiotlb_print_info();
-	} else
-		swiotlb_free();
-#endif
+	for (p = __iommu_table; p < __iommu_table_end; p++) {
+		if (p && (p->flags & IOMMU_DETECTED) && p->late_init)
+			p->late_init();
+	}
 
 	return 0;
 }
--- head.orig/arch/x86/kernel/resource.c	2011-01-05 01:50:19.000000000 +0100
+++ head/arch/x86/kernel/resource.c	2011-09-23 14:48:43.000000000 +0200
@@ -1,3 +1,7 @@
+#ifdef CONFIG_XEN
+# define e820 machine_e820
+# include <asm/hypervisor.h>
+#endif
 #include <linux/ioport.h>
 #include <asm/e820.h>
 
@@ -37,6 +41,10 @@ static void remove_e820_regions(struct r
 
 void arch_remove_reservations(struct resource *avail)
 {
+#ifdef CONFIG_XEN
+	if (!is_initial_xendomain())
+		return;
+#endif
 	/*
 	 * Trim out BIOS area (high 2MB) and E820 regions. We do not remove
 	 * the low 1MB unconditionally, as this area is needed for some ISA
--- head.orig/arch/x86/kernel/setup-xen.c	2013-12-06 15:07:45.000000000 +0100
+++ head/arch/x86/kernel/setup-xen.c	2013-12-06 15:07:53.000000000 +0100
@@ -31,6 +31,7 @@
 #include <linux/apm_bios.h>
 #include <linux/initrd.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/seq_file.h>
 #include <linux/console.h>
 #include <linux/mca.h>
@@ -83,7 +84,6 @@
 #include <asm/dmi.h>
 #include <asm/io_apic.h>
 #include <asm/ist.h>
-#include <asm/vmi.h>
 #include <asm/setup_arch.h>
 #include <asm/bios_ebda.h>
 #include <asm/cacheflush.h>
@@ -107,11 +107,12 @@
 #include <asm/percpu.h>
 #include <asm/topology.h>
 #include <asm/apicdef.h>
-#include <asm/k8.h>
+#include <asm/amd_nb.h>
 #ifdef CONFIG_X86_64
 #include <asm/numa_64.h>
 #endif
 #include <asm/mce.h>
+#include <asm/alternative.h>
 
 #ifdef CONFIG_XEN
 #include <asm/hypervisor.h>
@@ -156,7 +157,6 @@ unsigned long max_pfn_mapped;
 RESERVE_BRK(dmi_alloc, 65536);
 #endif
 
-unsigned int boot_cpu_id __read_mostly;
 
 static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
 unsigned long _brk_end = (unsigned long)__brk_base;
@@ -338,7 +338,7 @@ static inline void init_gbpages(void)
 static void __init reserve_brk(void)
 {
 	if (_brk_end > _brk_start)
-		reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK");
+		memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK");
 
 	/* Mark brk area as locked down and no longer taking any
 	   new allocations */
@@ -361,17 +361,16 @@ static void __init relocate_initrd(void)
 	char *p, *q;
 
 	/* We need to move the initrd down into lowmem */
-	ramdisk_here = find_e820_area(0, end_of_lowmem, area_size,
+	ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
 					 PAGE_SIZE);
 
-	if (ramdisk_here == -1ULL)
+	if (ramdisk_here == MEMBLOCK_ERROR)
 		panic("Cannot find place for new RAMDISK of size %lld\n",
 			 ramdisk_size);
 
 	/* Note: this includes all the lowmem currently occupied by
 	   the initrd, we rely on that fact to keep the data intact. */
-	reserve_early(ramdisk_here, ramdisk_here + area_size,
-			 "NEW RAMDISK");
+	memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK");
 	initrd_start = ramdisk_here + PAGE_OFFSET;
 	initrd_end   = initrd_start + ramdisk_size;
 	printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
@@ -444,7 +443,7 @@ static void __init reserve_initrd(void)
 	initrd_start = 0;
 
 	if (ramdisk_size >= (end_of_lowmem>>1)) {
-		free_early(ramdisk_image, ramdisk_end);
+		memblock_x86_free_range(ramdisk_image, ramdisk_end);
 		printk(KERN_ERR "initrd too large to handle, "
 		       "disabling initrd\n");
 		return;
@@ -470,7 +469,7 @@ static void __init reserve_initrd(void)
 
 	relocate_initrd();
 
-	free_early(ramdisk_image, ramdisk_end);
+	memblock_x86_free_range(ramdisk_image, ramdisk_end);
 }
 #else
 static void __init reserve_initrd(void)
@@ -530,7 +529,7 @@ static void __init e820_reserve_setup_da
 #endif
 }
 
-static void __init reserve_early_setup_data(void)
+static void __init memblock_x86_reserve_range_setup_data(void)
 {
 #ifndef CONFIG_XEN
 	struct setup_data *data;
@@ -543,7 +542,7 @@ static void __init reserve_early_setup_d
 	while (pa_data) {
 		data = early_memremap(pa_data, sizeof(*data));
 		sprintf(buf, "setup data %x", data->type);
-		reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
+		memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf);
 		pa_data = data->next;
 		early_iounmap(data, sizeof(*data));
 	}
@@ -565,6 +564,18 @@ static inline unsigned long long get_tot
 	return total << PAGE_SHIFT;
 }
 
+/*
+ * Keep the crash kernel below this limit.  On 32 bits earlier kernels
+ * would limit the kernel to the low 512 MiB due to mapping restrictions.
+ * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
+ * limit once kexec-tools are fixed.
+ */
+#ifdef CONFIG_X86_32
+# define CRASH_KERNEL_ADDR_MAX	(512 << 20)
+#else
+# define CRASH_KERNEL_ADDR_MAX	(896 << 20)
+#endif
+
 static void __init reserve_crashkernel(void)
 {
 	unsigned long long total_mem;
@@ -582,23 +593,27 @@ static void __init reserve_crashkernel(v
 	if (crash_base <= 0) {
 		const unsigned long long alignment = 16<<20;	/* 16M */
 
-		crash_base = find_e820_area(alignment, ULONG_MAX, crash_size,
-				 alignment);
-		if (crash_base == -1ULL) {
+		/*
+		 *  kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
+		 */
+		crash_base = memblock_find_in_range(alignment,
+			       CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
+
+		if (crash_base == MEMBLOCK_ERROR) {
 			pr_info("crashkernel reservation failed - No suitable area found.\n");
 			return;
 		}
 	} else {
 		unsigned long long start;
 
-		start = find_e820_area(crash_base, ULONG_MAX, crash_size,
-				 1<<20);
+		start = memblock_find_in_range(crash_base,
+				 crash_base + crash_size, crash_size, 1<<20);
 		if (start != crash_base) {
 			pr_info("crashkernel reservation failed - memory is in use.\n");
 			return;
 		}
 	}
-	reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL");
+	memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL");
 
 	printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
 			"for crashkernel (System RAM: %ldMB)\n",
@@ -683,93 +698,27 @@ static __init void reserve_ibft_region(v
 
 #ifndef CONFIG_XEN
 	if (size)
-		reserve_early_overlap_ok(addr, addr + size, "ibft");
+		memblock_x86_reserve_range(addr, addr + size, "* ibft");
 #endif
 }
 
-#ifdef CONFIG_X86_RESERVE_LOW_64K
-static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
-{
-	printk(KERN_NOTICE
-		"%s detected: BIOS may corrupt low RAM, working around it.\n",
-		d->ident);
-
-	e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
-	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-
-	return 0;
-}
-#endif
-
-/* List of systems that have known low memory corruption BIOS problems */
-static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
-#ifdef CONFIG_X86_RESERVE_LOW_64K
-	{
-		.callback = dmi_low_memory_corruption,
-		.ident = "AMI BIOS",
-		.matches = {
-			DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
-		},
-	},
-	{
-		.callback = dmi_low_memory_corruption,
-		.ident = "Phoenix BIOS",
-		.matches = {
-			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
-		},
-	},
-	{
-		.callback = dmi_low_memory_corruption,
-		.ident = "Phoenix/MSC BIOS",
-		.matches = {
-			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
-		},
-	},
-	/*
-	 * AMI BIOS with low memory corruption was found on Intel DG45ID and
-	 * DG45FC boards.
-	 * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
-	 * match only DMI_BOARD_NAME and see if there is more bad products
-	 * with this vendor.
-	 */
-	{
-		.callback = dmi_low_memory_corruption,
-		.ident = "AMI BIOS",
-		.matches = {
-			DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
-		},
-	},
-	{
-		.callback = dmi_low_memory_corruption,
-		.ident = "AMI BIOS",
-		.matches = {
-			DMI_MATCH(DMI_BOARD_NAME, "DG45FC"),
-		},
-	},
-	/*
-	 * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so
-	 * match on the product name.
-	 */
-	{
-		.callback = dmi_low_memory_corruption,
-		.ident = "Phoenix BIOS",
-		.matches = {
-			DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"),
-		},
-	},
-#endif
-	{}
-};
-
 #ifndef CONFIG_XEN
+static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
+
 static void __init trim_bios_range(void)
 {
 	/*
 	 * A special case is the first 4Kb of memory;
 	 * This is a BIOS owned area, not kernel ram, but generally
 	 * not listed as such in the E820 table.
+	 *
+	 * This typically reserves additional memory (64KiB by default)
+	 * since some BIOSes are known to corrupt low memory.  See the
+	 * Kconfig help text for X86_RESERVE_LOW.
 	 */
-	e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
+	e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE),
+			  E820_RAM, E820_RESERVED);
+
 	/*
 	 * special case: Some BIOSen report the PC BIOS
 	 * area (640->1Mb) as ram even though it is not.
@@ -778,8 +727,39 @@ static void __init trim_bios_range(void)
 	e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 }
+
+static int __init parse_reservelow(char *p)
+{
+	unsigned long long size;
+
+	if (!p)
+		return -EINVAL;
+
+	size = memparse(p, &p);
+
+	if (size < 4096)
+		size = 4096;
+
+	if (size > 640*1024)
+		size = 640*1024;
+
+	reserve_low = size;
+
+	return 0;
+}
+
+early_param("reservelow", parse_reservelow);
 #endif
 
+static u64 __init get_max_mapped(void)
+{
+	u64 end = max_pfn_mapped;
+
+	end <<= PAGE_SHIFT;
+
+	return end;
+}
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -797,6 +777,7 @@ void __init setup_arch(char **cmdline_p)
 {
 	int acpi = 0;
 	int k8 = 0;
+	unsigned long flags;
 #ifdef CONFIG_XEN
 	unsigned int i;
 	unsigned long p2m_pages;
@@ -819,14 +800,27 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_X86_32
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
 	visws_early_detect();
+
+#ifndef CONFIG_XEN
+	/*
+	 * copy kernel address range established so far and switch
+	 * to the proper swapper page table
+	 */
+	clone_pgd_range(swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+			initial_page_table + KERNEL_PGD_BOUNDARY,
+			KERNEL_PGD_PTRS);
+
+	load_cr3(swapper_pg_dir);
+	__flush_tlb_all();
+#endif
 #else
 	printk(KERN_INFO "Command line: %s\n", boot_command_line);
 #endif
 
-	/* VMI may relocate the fixmap; do this before touching ioremap area */
-	vmi_init();
-
-	/* OFW also may relocate the fixmap */
+	/*
+	 * If we have OLPC OFW, we might end up relocating the fixmap due to
+	 * reserve_top(), so do this before touching the ioremap area.
+	 */
 	olpc_ofw_detect();
 
 	early_trap_init();
@@ -872,7 +866,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
 	 4)) {
 		efi_enabled = 1;
-		efi_reserve_early();
+		efi_memblock_x86_reserve_range();
 	}
 #endif
 #else /* CONFIG_XEN */
@@ -900,6 +894,7 @@ void __init setup_arch(char **cmdline_p)
 
 	x86_init.oem.arch_setup();
 
+	iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
 	setup_memory_map();
 	parse_setup_data();
 	/* update the e820_saved too */
@@ -952,11 +947,8 @@ void __init setup_arch(char **cmdline_p)
 
 	x86_report_nx();
 
-	/* Must be before kernel pagetables are setup */
-	vmi_activate();
-
 	/* after early param, so could get panic from serial */
-	reserve_early_setup_data();
+	memblock_x86_reserve_range_setup_data();
 
 	if (acpi_mps_check()) {
 #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
@@ -975,12 +967,9 @@ void __init setup_arch(char **cmdline_p)
 	if (efi_enabled)
 		efi_init();
 
-	if (is_initial_xendomain()) {
+	if (is_initial_xendomain())
 		dmi_scan_machine();
 
-		dmi_check_system(bad_bios_dmi_table);
-	}
-
 	/*
 	 * VMware detection requires dmi to be available, so this
 	 * needs to be done after dmi_scan_machine, for the BP.
@@ -1015,8 +1004,6 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	max_pfn = e820_end_of_ram_pfn();
 
-	/* preallocate 4k for mptable mpc */
-	early_reserve_e820_mpc_new();
 	/* update e820 for memory not covered by WB MTRRs */
 	mtrr_bp_init();
 #ifndef CONFIG_XEN
@@ -1043,20 +1030,8 @@ void __init setup_arch(char **cmdline_p)
 		max_low_pfn = max_pfn;
 
 	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
-#ifndef CONFIG_XEN
-	max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
-#endif
 #endif
 
-#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
-	setup_bios_corruption_check();
-#endif
-
-	printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
-			max_pfn_mapped<<PAGE_SHIFT);
-
-	reserve_brk();
-
 	/*
 	 * Find and reserve possible boot-time SMP configuration:
 	 */
@@ -1064,6 +1039,26 @@ void __init setup_arch(char **cmdline_p)
 
 	reserve_ibft_region();
 
+	/*
+	 * Need to conclude brk, before memblock_x86_fill()
+	 *  it could use memblock_find_in_range, could overlap with
+	 *  brk area.
+	 */
+	reserve_brk();
+
+	memblock.current_limit = get_max_mapped();
+	memblock_x86_fill();
+
+	/* preallocate 4k for mptable mpc */
+	early_reserve_e820_mpc_new();
+
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+	setup_bios_corruption_check();
+#endif
+
+	printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
+			max_pfn_mapped<<PAGE_SHIFT);
+
 	reserve_trampoline_memory();
 
 #ifdef CONFIG_ACPI_SLEEP
@@ -1087,6 +1082,7 @@ void __init setup_arch(char **cmdline_p)
 		max_low_pfn = max_pfn;
 	}
 #endif
+	memblock.current_limit = get_max_mapped();
 
 	/*
 	 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
@@ -1132,10 +1128,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	initmem_init(0, max_pfn, acpi, k8);
-#ifndef CONFIG_NO_BOOTMEM
-	early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
-#endif
-
+	memblock_find_dma_reserve();
 	dma32_reserve_bootmem();
 
 #ifdef CONFIG_KVM_CLOCK
@@ -1146,7 +1139,12 @@ void __init setup_arch(char **cmdline_p)
 	paging_init();
 	x86_init.paging.pagetable_setup_done(swapper_pg_dir);
 
-	setup_trampoline_page_table();
+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
+	/* sync back kernel address range */
+	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
+			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+			KERNEL_PGD_PTRS);
+#endif
 
 	tboot_probe();
 
@@ -1292,6 +1290,10 @@ void __init setup_arch(char **cmdline_p)
 	x86_init.oem.banner();
 
 	mcheck_init();
+
+	local_irq_save(flags);
+	arch_init_ideal_nop5();
+	local_irq_restore(flags);
 }
 
 #ifdef CONFIG_X86_32
--- head.orig/arch/x86/kernel/smp-xen.c	2011-02-01 15:03:03.000000000 +0100
+++ head/arch/x86/kernel/smp-xen.c	2011-02-01 15:09:47.000000000 +0100
@@ -143,10 +143,10 @@ irqreturn_t smp_reboot_interrupt(int irq
 	return IRQ_HANDLED;
 }
 
-void xen_smp_send_stop(void)
+void xen_stop_other_cpus(int wait)
 {
 	unsigned long flags;
-	unsigned long wait;
+	unsigned long timeout;
 
 	/*
 	 * Use an own vector here because smp_call_function
@@ -160,9 +160,12 @@ void xen_smp_send_stop(void)
 	if (num_online_cpus() > 1) {
 		xen_send_IPI_allbutself(REBOOT_VECTOR);
 
-		/* Don't wait longer than a second */
-		wait = USEC_PER_SEC;
-		while (num_online_cpus() > 1 && wait--)
+		/*
+		 * Don't wait longer than a second if the caller
+		 * didn't ask us to wait.
+		 */
+		timeout = USEC_PER_SEC;
+		while (num_online_cpus() > 1 && (wait || timeout--))
 			udelay(1);
 	}
 
--- head.orig/arch/x86/kernel/traps-xen.c	2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/kernel/traps-xen.c	2013-11-07 11:51:33.000000000 +0100
@@ -568,6 +568,7 @@ dotraplinkage void __kprobes do_debug(st
 	if (regs->flags & X86_VM_MASK) {
 		handle_vm86_trap((struct kernel_vm86_regs *) regs,
 				error_code, 1);
+		preempt_conditional_cli(regs);
 		return;
 	}
 
@@ -773,21 +774,10 @@ asmlinkage void math_state_restore(void)
 	__math_state_restore();
 }
 
-#ifndef CONFIG_MATH_EMULATION
-void math_emulate(struct math_emu_info *info)
-{
-	printk(KERN_EMERG
-		"math-emulation not enabled and no coprocessor found.\n");
-	printk(KERN_EMERG "killing %s.\n", current->comm);
-	force_sig(SIGFPE, current);
-	schedule();
-}
-#endif /* CONFIG_MATH_EMULATION */
-
 dotraplinkage void __kprobes
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
+#ifdef CONFIG_MATH_EMULATION
 	if (read_cr0() & X86_CR0_EM) {
 		struct math_emu_info info = { };
 
@@ -795,12 +785,12 @@ do_device_not_available(struct pt_regs *
 
 		info.regs = regs;
 		math_emulate(&info);
-	} else {
-		math_state_restore(); /* interrupts still off */
-		conditional_sti(regs);
+		return;
 	}
-#else
-	math_state_restore();
+#endif
+	math_state_restore(); /* interrupts still off */
+#ifdef CONFIG_X86_32
+	conditional_sti(regs);
 #endif
 }
 
@@ -882,20 +872,6 @@ void __init trap_init(void)
 	if (ret)
 		printk("HYPERVISOR_set_trap_table failed (%d)\n", ret);
 
-#ifdef CONFIG_X86_32
-	if (cpu_has_fxsr) {
-		printk(KERN_INFO "Enabling fast FPU save and restore... ");
-		set_in_cr4(X86_CR4_OSFXSR);
-		printk("done.\n");
-	}
-	if (cpu_has_xmm) {
-		printk(KERN_INFO
-			"Enabling unmasked SIMD FPU exception support... ");
-		set_in_cr4(X86_CR4_OSXMMEXCPT);
-		printk("done.\n");
-	}
-
-#endif
 	/*
 	 * Should be a barrier for any external CPU state:
 	 */
--- head.orig/arch/x86/mm/fault-xen.c	2011-08-15 11:05:39.000000000 +0200
+++ head/arch/x86/mm/fault-xen.c	2011-08-15 11:05:47.000000000 +0200
@@ -11,6 +11,7 @@
 #include <linux/kprobes.h>		/* __kprobes, ...		*/
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
 #include <linux/perf_event.h>		/* perf_sw_event		*/
+#include <linux/hugetlb.h>		/* hstate_index_to_shift	*/
 
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
@@ -161,15 +162,20 @@ is_prefetch(struct pt_regs *regs, unsign
 
 static void
 force_sig_info_fault(int si_signo, int si_code, unsigned long address,
-		     struct task_struct *tsk)
+		     struct task_struct *tsk, int fault)
 {
+	unsigned lsb = 0;
 	siginfo_t info;
 
 	info.si_signo	= si_signo;
 	info.si_errno	= 0;
 	info.si_code	= si_code;
 	info.si_addr	= (void __user *)address;
-	info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
+	if (fault & VM_FAULT_HWPOISON_LARGE)
+		lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+	if (fault & VM_FAULT_HWPOISON)
+		lsb = PAGE_SHIFT;
+	info.si_addr_lsb = lsb;
 
 	force_sig_info(si_signo, &info, tsk);
 }
@@ -177,9 +183,6 @@ force_sig_info_fault(int si_signo, int s
 DEFINE_SPINLOCK(pgd_lock);
 LIST_HEAD(pgd_list);
 
-#define pgd_page_table(what, pg) \
-	spin_##what(&((struct mm_struct *)(pg)->private)->page_table_lock)
-
 #ifdef CONFIG_X86_32
 static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 {
@@ -241,13 +244,16 @@ void vmalloc_sync_all(void)
 
 		spin_lock_irqsave(&pgd_lock, flags);
 		list_for_each_entry(page, &pgd_list, lru) {
-			pmd_t *pmd;
+			spinlock_t *pgt_lock;
+			pmd_t *ret;
+
+			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 
-			pgd_page_table(lock, page);
-			pmd = vmalloc_sync_one(page_address(page), address);
-			pgd_page_table(unlock, page);
+			spin_lock(pgt_lock);
+			ret = vmalloc_sync_one(page_address(page), address);
+			spin_unlock(pgt_lock);
 
-			if (!pmd)
+			if (!ret)
 				break;
 		}
 		spin_unlock_irqrestore(&pgd_lock, flags);
@@ -269,6 +275,8 @@ static noinline __kprobes int vmalloc_fa
 	if (!(address >= VMALLOC_START && address < VMALLOC_END))
 		return -1;
 
+	WARN_ON_ONCE(in_nmi());
+
 	/*
 	 * Synchronize this task's top level page-table
 	 * with the 'reference' page table.
@@ -344,31 +352,7 @@ out:
 
 void vmalloc_sync_all(void)
 {
-	unsigned long address;
-
-	for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
-	     address += PGDIR_SIZE) {
-
-		const pgd_t *pgd_ref = pgd_offset_k(address);
-		unsigned long flags;
-		struct page *page;
-
-		if (pgd_none(*pgd_ref))
-			continue;
-
-		spin_lock_irqsave(&pgd_lock, flags);
-		list_for_each_entry(page, &pgd_list, lru) {
-			pgd_t *pgd;
-			pgd = (pgd_t *)page_address(page) + pgd_index(address);
-			pgd_page_table(lock, page);
-			if (pgd_none(*pgd))
-				set_pgd(pgd, *pgd_ref);
-			else
-				BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-			pgd_page_table(unlock, page);
-		}
-		spin_unlock_irqrestore(&pgd_lock, flags);
-	}
+	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
 }
 
 /*
@@ -389,6 +373,8 @@ static noinline __kprobes int vmalloc_fa
 	if (!(address >= VMALLOC_START && address < VMALLOC_END))
 		return -1;
 
+	WARN_ON_ONCE(in_nmi());
+
 	/*
 	 * Copy kernel mappings over when needed. This can also
 	 * happen within a race in page table update. In the later
@@ -752,7 +738,7 @@ __bad_area_nosemaphore(struct pt_regs *r
 		tsk->thread.error_code	= error_code | (address >= TASK_SIZE);
 		tsk->thread.trap_no	= 14;
 
-		force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+		force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
 
 		return;
 	}
@@ -837,14 +823,14 @@ do_sigbus(struct pt_regs *regs, unsigned
 	tsk->thread.trap_no	= 14;
 
 #ifdef CONFIG_MEMORY_FAILURE
-	if (fault & VM_FAULT_HWPOISON) {
+	if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
 		printk(KERN_ERR
 	"MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
 			tsk->comm, tsk->pid, address);
 		code = BUS_MCEERR_AR;
 	}
 #endif
-	force_sig_info_fault(SIGBUS, code, address, tsk);
+	force_sig_info_fault(SIGBUS, code, address, tsk, fault);
 }
 
 static noinline void
@@ -854,7 +840,8 @@ mm_fault_error(struct pt_regs *regs, uns
 	if (fault & VM_FAULT_OOM) {
 		out_of_memory(regs, error_code, address);
 	} else {
-		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
+		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+			     VM_FAULT_HWPOISON_LARGE))
 			do_sigbus(regs, error_code, address, fault);
 		else
 			BUG();
@@ -915,8 +902,14 @@ spurious_fault(unsigned long error_code,
 	if (pmd_large(*pmd))
 		return spurious_fault_check(error_code, (pte_t *) pmd);
 
+	/*
+	 * Note: don't use pte_present() here, since it returns true
+	 * if the _PAGE_PROTNONE bit is set.  However, this aliases the
+	 * _PAGE_GLOBAL bit, which for kernel pages give false positives
+	 * when CONFIG_DEBUG_PAGEALLOC is used.
+	 */
 	pte = pte_offset_kernel(pmd, address);
-	if (!pte_present(*pte))
+	if (!(pte_flags(*pte) & _PAGE_PRESENT))
 		return 0;
 
 	ret = spurious_fault_check(error_code, pte);
@@ -936,9 +929,9 @@ spurious_fault(unsigned long error_code,
 int show_unhandled_signals = 1;
 
 static inline int
-access_error(unsigned long error_code, int write, struct vm_area_struct *vma)
+access_error(unsigned long error_code, struct vm_area_struct *vma)
 {
-	if (write) {
+	if (error_code & PF_WRITE) {
 		/* write, present and write, not present: */
 		if (unlikely(!(vma->vm_flags & VM_WRITE)))
 			return 1;
@@ -973,8 +966,10 @@ do_page_fault(struct pt_regs *regs, unsi
 	struct task_struct *tsk;
 	unsigned long address;
 	struct mm_struct *mm;
-	int write;
 	int fault;
+	int write = error_code & PF_WRITE;
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY |
+					(write ? FAULT_FLAG_WRITE : 0);
 
 	/* Set the "privileged fault" bit to something sane. */
 	if (user_mode_vm(regs))
@@ -1102,6 +1097,7 @@ do_page_fault(struct pt_regs *regs, unsi
 			bad_area_nosemaphore(regs, error_code, address);
 			return;
 		}
+retry:
 		down_read(&mm->mmap_sem);
 	} else {
 		/*
@@ -1145,9 +1141,7 @@ do_page_fault(struct pt_regs *regs, unsi
 	 * we can handle it..
 	 */
 good_area:
-	write = error_code & PF_WRITE;
-
-	if (unlikely(access_error(error_code, write, vma))) {
+	if (unlikely(access_error(error_code, vma))) {
 		bad_area_access_error(regs, error_code, address);
 		return;
 	}
@@ -1157,21 +1151,34 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault:
 	 */
-	fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
+	fault = handle_mm_fault(mm, vma, address, flags);
 
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		mm_fault_error(regs, error_code, address, fault);
 		return;
 	}
 
-	if (fault & VM_FAULT_MAJOR) {
-		tsk->maj_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
-				     regs, address);
-	} else {
-		tsk->min_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
-				     regs, address);
+	/*
+	 * Major/minor page fault accounting is only done on the
+	 * initial attempt. If we go through a retry, it is extremely
+	 * likely that the page will be found in page cache at that point.
+	 */
+	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+		if (fault & VM_FAULT_MAJOR) {
+			tsk->maj_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+				      regs, address);
+		} else {
+			tsk->min_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+				      regs, address);
+		}
+		if (fault & VM_FAULT_RETRY) {
+			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
+			 * of starvation. */
+			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			goto retry;
+		}
 	}
 
 	check_v8086_mode(regs, address, tsk);
--- head.orig/arch/x86/mm/highmem_32-xen.c	2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/mm/highmem_32-xen.c	2011-02-01 15:09:47.000000000 +0100
@@ -9,6 +9,7 @@ void *kmap(struct page *page)
 		return page_address(page);
 	return kmap_high(page);
 }
+EXPORT_SYMBOL(kmap);
 
 void kunmap(struct page *page)
 {
@@ -18,6 +19,7 @@ void kunmap(struct page *page)
 		return;
 	kunmap_high(page);
 }
+EXPORT_SYMBOL(kunmap);
 
 /*
  * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
@@ -27,10 +29,10 @@ void kunmap(struct page *page)
  * However when holding an atomic kmap it is not legal to sleep, so atomic
  * kmaps are appropriate for short, tight code paths only.
  */
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 {
-	enum fixed_addresses idx;
 	unsigned long vaddr;
+	int idx, type;
 
 	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
 	pagefault_disable();
@@ -38,8 +40,7 @@ void *kmap_atomic_prot(struct page *page
 	if (!PageHighMem(page))
 		return page_address(page);
 
-	debug_kmap_atomic(type);
-
+	type = kmap_atomic_idx_push();
 	idx = type + KM_TYPE_NR*smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	BUG_ON(!pte_none(*(kmap_pte-idx)));
@@ -47,44 +48,57 @@ void *kmap_atomic_prot(struct page *page
 
 	return (void *)vaddr;
 }
+EXPORT_SYMBOL(kmap_atomic_prot);
 
-void *kmap_atomic(struct page *page, enum km_type type)
+void *__kmap_atomic(struct page *page)
+{
+	return kmap_atomic_prot(page, kmap_prot);
+}
+EXPORT_SYMBOL(__kmap_atomic);
+
+/*
+ * This is the same as kmap_atomic() but can map memory that doesn't
+ * have a struct page associated with it.
+ */
+void *kmap_atomic_pfn(unsigned long pfn)
 {
-	return kmap_atomic_prot(page, type, kmap_prot);
+	return kmap_atomic_prot_pfn(pfn, kmap_prot);
 }
+EXPORT_SYMBOL_GPL(kmap_atomic_pfn);
 
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
+void __kunmap_atomic(void *kvaddr)
 {
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
 
-	/*
-	 * Force other mappings to Oops if they'll try to access this pte
-	 * without first remap it.  Keeping stale mappings around is a bad idea
-	 * also, in case the page changes cacheability attributes or becomes
-	 * a protected page in a hypervisor.
-	 */
-	if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+	if (vaddr >= __fix_to_virt(FIX_KMAP_END) &&
+	    vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
+		int idx, type;
+
+		type = kmap_atomic_idx();
+		idx = type + KM_TYPE_NR * smp_processor_id();
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+		WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+#endif
+		/*
+		 * Force other mappings to Oops if they'll try to access this
+		 * pte without first remap it.  Keeping stale mappings around
+		 * is a bad idea also, in case the page changes cacheability
+		 * attributes or becomes a protected page in a hypervisor.
+		 */
 		kpte_clear_flush(kmap_pte-idx, vaddr);
-	else {
+		kmap_atomic_idx_pop();
+	}
 #ifdef CONFIG_DEBUG_HIGHMEM
+	else {
 		BUG_ON(vaddr < PAGE_OFFSET);
 		BUG_ON(vaddr >= (unsigned long)high_memory);
-#endif
 	}
+#endif
 
 	pagefault_enable();
 }
-
-/*
- * This is the same as kmap_atomic() but can map memory that doesn't
- * have a struct page associated with it.
- */
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
-{
-	return kmap_atomic_prot_pfn(pfn, type, kmap_prot);
-}
-EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
+EXPORT_SYMBOL(__kunmap_atomic);
 
 struct page *kmap_atomic_to_page(void *ptr)
 {
@@ -98,6 +112,7 @@ struct page *kmap_atomic_to_page(void *p
 	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
 	return pte_page(*pte);
 }
+EXPORT_SYMBOL(kmap_atomic_to_page);
 
 void clear_highpage(struct page *page)
 {
@@ -117,6 +132,7 @@ void clear_highpage(struct page *page)
 	clear_page(kaddr);
 	kunmap_atomic(kaddr, KM_USER0);
 }
+EXPORT_SYMBOL(clear_highpage);
 
 void copy_highpage(struct page *to, struct page *from)
 {
@@ -143,14 +159,6 @@ void copy_highpage(struct page *to, stru
 	kunmap_atomic(vfrom, KM_USER0);
 	kunmap_atomic(vto, KM_USER1);
 }
-
-EXPORT_SYMBOL(kmap);
-EXPORT_SYMBOL(kunmap);
-EXPORT_SYMBOL(kmap_atomic);
-EXPORT_SYMBOL(kunmap_atomic_notypecheck);
-EXPORT_SYMBOL(kmap_atomic_prot);
-EXPORT_SYMBOL(kmap_atomic_to_page);
-EXPORT_SYMBOL(clear_highpage);
 EXPORT_SYMBOL(copy_highpage);
 
 void __init set_highmem_pages_init(void)
--- head.orig/arch/x86/mm/init-xen.c	2013-08-15 13:02:09.000000000 +0200
+++ head/arch/x86/mm/init-xen.c	2013-04-05 09:15:32.000000000 +0200
@@ -2,6 +2,7 @@
 #include <linux/initrd.h>
 #include <linux/ioport.h>
 #include <linux/swap.h>
+#include <linux/memblock.h>
 #include <linux/bootmem.h>
 
 #include <asm/cacheflush.h>
@@ -88,10 +89,10 @@ static void __init find_early_table_spac
 		e820_table_end = e820_table_start;
 	} else {
 		/*
-		 * [table_start, table_top) gets passed to reserve_early(),
-		 * so we must not use table_end here, despite continuing
-		 * to allocate from there. table_end possibly being below
-		 * table_start is otoh not a problem.
+		 * [table_start, table_top) gets passed to
+		 * memblock_x86_reserve_range(), so we must not use table_end
+		 * here, despite continuing to allocate from there. table_end
+		 * possibly being below table_start is otoh not a problem.
 		 */
 		e820_table_start = e820_table_top;
 	}
@@ -340,7 +341,7 @@ unsigned long __init_refok init_memory_m
 	__flush_tlb_all();
 
 	if (!after_bootmem && e820_table_top > e820_table_start)
-		reserve_early(e820_table_start << PAGE_SHIFT,
+		memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT,
 			      e820_table_top << PAGE_SHIFT, "PGTABLE");
 
 	if (!after_bootmem)
--- head.orig/arch/x86/mm/init_32-xen.c	2011-02-01 15:03:03.000000000 +0100
+++ head/arch/x86/mm/init_32-xen.c	2011-02-01 15:09:47.000000000 +0100
@@ -25,6 +25,7 @@
 #include <linux/pfn.h>
 #include <linux/poison.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/proc_fs.h>
 #include <linux/memory_hotplug.h>
 #include <linux/initrd.h>
@@ -70,7 +71,7 @@ static __init void *alloc_low_page(void)
 		panic("alloc_low_page: ran out of memory");
 
 	adr = __va(pfn * PAGE_SIZE);
-	memset(adr, 0, PAGE_SIZE);
+	clear_page(adr);
 	return adr;
 }
 
@@ -458,49 +459,28 @@ static void __init add_one_highpage_init
 	totalhigh_pages++;
 }
 
-struct add_highpages_data {
-	unsigned long start_pfn;
-	unsigned long end_pfn;
-};
-
-static int __init add_highpages_work_fn(unsigned long start_pfn,
-					 unsigned long end_pfn, void *datax)
-{
-	int node_pfn;
-	struct page *page;
-	unsigned long final_start_pfn, final_end_pfn;
-	struct add_highpages_data *data;
-
-	data = (struct add_highpages_data *)datax;
-
-	final_start_pfn = max(start_pfn, data->start_pfn);
-	final_end_pfn = min(end_pfn, data->end_pfn);
-	if (final_start_pfn >= final_end_pfn)
-		return 0;
-
-	for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
-	     node_pfn++) {
-		if (!pfn_valid(node_pfn))
-			continue;
-		page = pfn_to_page(node_pfn);
-		add_one_highpage_init(page);
-	}
-
-	return 0;
-
-}
-
-void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
-					      unsigned long end_pfn)
+void __init add_highpages_with_active_regions(int nid,
+			 unsigned long start_pfn, unsigned long end_pfn)
 {
-	struct add_highpages_data data;
+	struct range *range;
+	int nr_range;
+	int i;
 
-	data.start_pfn = start_pfn;
-	data.end_pfn = end_pfn;
+	nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn);
 
-	work_with_active_regions(nid, add_highpages_work_fn, &data);
+	for (i = 0; i < nr_range; i++) {
+		struct page *page;
+		int node_pfn;
+
+		for (node_pfn = range[i].start; node_pfn < range[i].end;
+		     node_pfn++) {
+			if (!pfn_valid(node_pfn))
+				continue;
+			page = pfn_to_page(node_pfn);
+			add_one_highpage_init(page);
+		}
+	}
 }
-
 #else
 static inline void permanent_kmaps_init(pgd_t *pgd_base)
 {
@@ -550,48 +530,6 @@ static void __init pagetable_init(void)
 	permanent_kmaps_init(pgd_base);
 }
 
-#if defined(CONFIG_ACPI_SLEEP) && !defined(CONFIG_XEN)
-/*
- * ACPI suspend needs this for resume, because things like the intel-agp
- * driver might have split up a kernel 4MB mapping.
- */
-char swsusp_pg_dir[PAGE_SIZE]
-	__attribute__ ((aligned(PAGE_SIZE)));
-
-static inline void save_pg_dir(void)
-{
-	memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
-}
-#else /* !CONFIG_ACPI_SLEEP */
-static inline void save_pg_dir(void)
-{
-}
-#endif /* !CONFIG_ACPI_SLEEP */
-
-void zap_low_mappings(bool early)
-{
-	int i;
-
-	/*
-	 * Zap initial low-memory mappings.
-	 *
-	 * Note that "pgd_clear()" doesn't do it for
-	 * us, because pgd_clear() is a no-op on i386.
-	 */
-	for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
-#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
-		set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
-#else
-		set_pgd(swapper_pg_dir+i, __pgd(0));
-#endif
-	}
-
-	if (early)
-		__flush_tlb();
-	else
-		flush_tlb_all();
-}
-
 pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
@@ -714,14 +652,14 @@ void __init initmem_init(unsigned long s
 	highstart_pfn = highend_pfn = max_pfn;
 	if (max_pfn > max_low_pfn)
 		highstart_pfn = max_low_pfn;
-	e820_register_active_regions(0, 0, highend_pfn);
+	memblock_x86_register_active_regions(0, 0, highend_pfn);
 	sparse_memory_present_with_active_regions(0);
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 		pages_to_mb(highend_pfn - highstart_pfn));
 	num_physpages = highend_pfn;
 	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
-	e820_register_active_regions(0, 0, max_low_pfn);
+	memblock_x86_register_active_regions(0, 0, max_low_pfn);
 	sparse_memory_present_with_active_regions(0);
 	num_physpages = max_low_pfn;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
@@ -752,75 +690,18 @@ static void __init zone_sizes_init(void)
 	free_area_init_nodes(max_zone_pfns);
 }
 
-#ifndef CONFIG_NO_BOOTMEM
-static unsigned long __init setup_node_bootmem(int nodeid,
-				 unsigned long start_pfn,
-				 unsigned long end_pfn,
-				 unsigned long bootmap)
-{
-	unsigned long bootmap_size;
-
-	/* don't touch min_low_pfn */
-	bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
-					 bootmap >> PAGE_SHIFT,
-					 start_pfn, end_pfn);
-	printk(KERN_INFO "  node %d low ram: %08lx - %08lx\n",
-		nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
-	printk(KERN_INFO "  node %d bootmap %08lx - %08lx\n",
-		 nodeid, bootmap, bootmap + bootmap_size);
-	free_bootmem_with_active_regions(nodeid, end_pfn);
-
-	return bootmap + bootmap_size;
-}
-#endif
-
 void __init setup_bootmem_allocator(void)
 {
-#ifndef CONFIG_NO_BOOTMEM
-	int nodeid;
-	unsigned long bootmap_size, bootmap;
-	unsigned long end_xen_pfn = min(max_low_pfn, xen_start_info->nr_pages);
-
-	/*
-	 * Initialize the boot-time allocator (with low memory only):
-	 */
-	bootmap_size = bootmem_bootmap_pages(end_xen_pfn)<<PAGE_SHIFT;
-	bootmap = find_e820_area(0, min(max_pfn_mapped,
-				        xen_start_info->nr_pages)<<PAGE_SHIFT,
-				 bootmap_size, PAGE_SIZE);
-	if (bootmap == -1L)
-		panic("Cannot find bootmem map of size %ld\n", bootmap_size);
-	reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
-#elif defined(CONFIG_XEN)
+#ifdef CONFIG_XEN
 	if (max_low_pfn > xen_start_info->nr_pages)
-		reserve_early(xen_start_info->nr_pages << PAGE_SHIFT,
-			      max_low_pfn << PAGE_SHIFT, "BALLOON");
+		memblock_x86_reserve_range(xen_start_info->nr_pages << PAGE_SHIFT,
+					   max_low_pfn << PAGE_SHIFT, "BALLOON");
 #endif
 
 	printk(KERN_INFO "  mapped low ram: 0 - %08lx\n",
 		 max_pfn_mapped<<PAGE_SHIFT);
 	printk(KERN_INFO "  low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
 
-#ifndef CONFIG_NO_BOOTMEM
-	for_each_online_node(nodeid) {
-		 unsigned long start_pfn, end_pfn;
-
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-		start_pfn = node_start_pfn[nodeid];
-		end_pfn = node_end_pfn[nodeid];
-		if (start_pfn > end_xen_pfn)
-			continue;
-		if (end_pfn > end_xen_pfn)
-			end_pfn = end_xen_pfn;
-#else
-		start_pfn = 0;
-		end_pfn = end_xen_pfn;
-#endif
-		bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
-						 bootmap);
-	}
-#endif
-
 	after_bootmem = 1;
 }
 
@@ -870,8 +751,8 @@ unsigned long __init extend_init_mapping
 	}
 
 	if (start_pfn > start)
-		reserve_early(start << PAGE_SHIFT,
-			      start_pfn << PAGE_SHIFT, "INITMAP");
+		memblock_x86_reserve_range(start << PAGE_SHIFT,
+					   start_pfn << PAGE_SHIFT, "INITMAP");
 
 	return start_pfn;
 }
@@ -1026,9 +907,6 @@ void __init mem_init(void)
 	if (boot_cpu_data.wp_works_ok < 0)
 		test_wp_bit();
 
-	save_pg_dir();
-	zap_low_mappings(true);
-
 	SetPagePinned(virt_to_page(init_mm.pgd));
 }
 
@@ -1139,8 +1017,3 @@ void mark_rodata_ro(void)
 }
 #endif
 
-int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
-				   int flags)
-{
-	return reserve_bootmem(phys, len, flags);
-}
--- head.orig/arch/x86/mm/init_64-xen.c	2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/mm/init_64-xen.c	2013-04-05 09:14:46.000000000 +0200
@@ -24,6 +24,7 @@
 #include <linux/initrd.h>
 #include <linux/pagemap.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/proc_fs.h>
 #include <linux/pci.h>
 #include <linux/pfn.h>
@@ -54,7 +55,6 @@
 #include <asm/cacheflush.h>
 #include <asm/init.h>
 #include <asm/setup.h>
-#include <linux/bootmem.h>
 
 #include <xen/features.h>
 
@@ -164,6 +164,43 @@ static int __init nonx32_setup(char *str
 __setup("noexec32=", nonx32_setup);
 
 /*
+ * When memory was added/removed make sure all the processes MM have
+ * suitable PGD entries in the local PGD level page.
+ */
+void sync_global_pgds(unsigned long start, unsigned long end)
+{
+	unsigned long address;
+
+	for (address = start; address <= end; address += PGDIR_SIZE) {
+		const pgd_t *pgd_ref = pgd_offset_k(address);
+		unsigned long flags;
+		struct page *page;
+
+		if (pgd_none(*pgd_ref))
+			continue;
+
+		spin_lock_irqsave(&pgd_lock, flags);
+		list_for_each_entry(page, &pgd_list, lru) {
+			pgd_t *pgd;
+			spinlock_t *pgt_lock;
+
+			pgd = (pgd_t *)page_address(page) + pgd_index(address);
+			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
+			spin_lock(pgt_lock);
+
+			if (pgd_none(*pgd))
+				set_pgd(pgd, *pgd_ref);
+			else
+				BUG_ON(pgd_page_vaddr(*pgd)
+				       != pgd_page_vaddr(*pgd_ref));
+
+			spin_unlock(pgt_lock);
+		}
+		spin_unlock_irqrestore(&pgd_lock, flags);
+	}
+}
+
+/*
  * NOTE: This function is marked __ref because it calls __init function
  * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
  */
@@ -405,9 +442,9 @@ static inline int __meminit make_readonl
 	 * page and descriptor tables embedded inside don't have writable
 	 * mappings. Exclude the vsyscall area here, allowing alternative
 	 * instruction patching to work. The range must be in sync with that
-	 * passed to reserve_early() (as "TEXT DATA BSS"), since all other
-	 * regions can be allocated from under CONFIG_NO_BOOTMEM and thus must
-	 * be writable.
+	 * passed to memblock_x86_reserve_range() (as "TEXT DATA BSS"), since
+	 * all other regions can be allocated from under CONFIG_NO_BOOTMEM and
+	 * thus must be writable.
 	 */
 	if ((paddr >= __pa_symbol(&_text))
             && (paddr < (__pa_symbol(__bss_stop) & PAGE_MASK))
@@ -778,11 +815,13 @@ kernel_physical_mapping_init(unsigned lo
 			     unsigned long end,
 			     unsigned long page_size_mask)
 {
-
+	bool pgd_changed = false;
 	unsigned long next, last_map_addr = end;
+	unsigned long addr;
 
 	start = (unsigned long)__va(start);
 	end = (unsigned long)__va(end);
+	addr = start;
 
 	for (; start < end; start = next) {
 		pgd_t *pgd = pgd_offset_k(start);
@@ -814,9 +853,13 @@ kernel_physical_mapping_init(unsigned lo
 			spin_lock(&init_mm.page_table_lock);
 			pgd_populate(&init_mm, pgd, __va(pud_phys));
 			spin_unlock(&init_mm.page_table_lock);
+			pgd_changed = true;
 		}
 	}
 
+	if (pgd_changed)
+		sync_global_pgds(addr, end);
+
 	return last_map_addr;
 }
 
@@ -824,31 +867,11 @@ kernel_physical_mapping_init(unsigned lo
 void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
 				int acpi, int k8)
 {
-#ifndef CONFIG_NO_BOOTMEM
-	unsigned long bootmap_size, bootmap;
-
-	e820_register_active_regions(0, start_pfn, end_pfn);
-#ifdef CONFIG_XEN
-	if (end_pfn > xen_start_info->nr_pages)
-		end_pfn = xen_start_info->nr_pages;
-#endif
-	bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
-	bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
-				 PAGE_SIZE);
-	if (bootmap == -1L)
-		panic("Cannot find bootmem map of size %ld\n", bootmap_size);
-	reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
-	/* don't touch min_low_pfn */
-	bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
-					 0, end_pfn);
-	free_bootmem_with_active_regions(0, end_pfn);
-#else
-	e820_register_active_regions(0, start_pfn, end_pfn);
+	memblock_x86_register_active_regions(0, start_pfn, end_pfn);
 #ifdef CONFIG_XEN
 	if (end_pfn > xen_start_info->nr_pages)
-		reserve_early(xen_start_info->nr_pages << PAGE_SHIFT,
-			      end_pfn << PAGE_SHIFT, "BALLOON");
-#endif
+		memblock_x86_reserve_range(xen_start_info->nr_pages << PAGE_SHIFT,
+					   end_pfn << PAGE_SHIFT, "BALLOON");
 #endif
 }
 #endif
@@ -1068,54 +1091,6 @@ void mark_rodata_ro(void)
 
 #endif
 
-int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
-				   int flags)
-{
-#ifdef CONFIG_NUMA
-	int nid, next_nid;
-	int ret;
-#endif
-	unsigned long pfn = phys >> PAGE_SHIFT;
-
-	if (pfn >= max_pfn) {
-		/*
-		 * This can happen with kdump kernels when accessing
-		 * firmware tables:
-		 */
-		if (pfn < max_pfn_mapped)
-			return -EFAULT;
-
-		printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %lu\n",
-				phys, len);
-		return -EFAULT;
-	}
-
-	/* Should check here against the e820 map to avoid double free */
-#ifdef CONFIG_NUMA
-	nid = phys_to_nid(phys);
-	next_nid = phys_to_nid(phys + len - 1);
-	if (nid == next_nid)
-		ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
-	else
-		ret = reserve_bootmem(phys, len, flags);
-
-	if (ret != 0)
-		return ret;
-
-#else
-	reserve_bootmem(phys, len, flags);
-#endif
-
-#ifndef CONFIG_XEN
-	if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
-		dma_reserve += len / PAGE_SIZE;
-		set_dma_reserve(dma_reserve);
-	}
-#endif
-
-	return 0;
-}
-
 int kern_addr_valid(unsigned long addr)
 {
 	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
@@ -1287,6 +1262,7 @@ vmemmap_populate(struct page *start_page
 		}
 
 	}
+	sync_global_pgds((unsigned long)start_page, end);
 	return 0;
 }
 
--- head.orig/arch/x86/mm/iomap_32-xen.c	2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/mm/iomap_32-xen.c	2011-02-01 15:09:47.000000000 +0100
@@ -49,21 +49,20 @@ int iomap_create_wc(resource_size_t base
 }
 EXPORT_SYMBOL_GPL(iomap_create_wc);
 
-void
-iomap_free(resource_size_t base, unsigned long size)
+void iomap_free(resource_size_t base, unsigned long size)
 {
 	io_free_memtype(base, base + size);
 }
 EXPORT_SYMBOL_GPL(iomap_free);
 
-void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
 {
-	enum fixed_addresses idx;
 	unsigned long vaddr;
+	int idx, type;
 
 	pagefault_disable();
 
-	debug_kmap_atomic(type);
+	type = kmap_atomic_idx_push();
 	idx = type + KM_TYPE_NR * smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	set_pte_at(&init_mm, vaddr, kmap_pte - idx, pfn_pte(pfn, prot));
@@ -73,10 +72,10 @@ void *kmap_atomic_prot_pfn(unsigned long
 }
 
 /*
- * Map 'mfn' using fixed map 'type' and protections 'prot'
+ * Map 'mfn' using protections 'prot'
  */
 void __iomem *
-iomap_atomic_prot_pfn(unsigned long mfn, enum km_type type, pgprot_t prot)
+iomap_atomic_prot_pfn(unsigned long mfn, pgprot_t prot)
 {
 	/*
 	 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
@@ -88,24 +87,34 @@ iomap_atomic_prot_pfn(unsigned long mfn,
 		prot = PAGE_KERNEL_UC_MINUS;
 
 	pgprot_val(prot) |= _PAGE_IOMAP;
-	return (void __force __iomem *) kmap_atomic_prot_pfn(mfn, type, prot);
+	return (void __force __iomem *) kmap_atomic_prot_pfn(mfn, prot);
 }
 EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
 
 void
-iounmap_atomic(void __iomem *kvaddr, enum km_type type)
+iounmap_atomic(void __iomem *kvaddr)
 {
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
 
-	/*
-	 * Force other mappings to Oops if they'll try to access this pte
-	 * without first remap it.  Keeping stale mappings around is a bad idea
-	 * also, in case the page changes cacheability attributes or becomes
-	 * a protected page in a hypervisor.
-	 */
-	if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+	if (vaddr >= __fix_to_virt(FIX_KMAP_END) &&
+	    vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
+		int idx, type;
+
+		type = kmap_atomic_idx();
+		idx = type + KM_TYPE_NR * smp_processor_id();
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+		WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+#endif
+		/*
+		 * Force other mappings to Oops if they'll try to access this
+		 * pte without first remap it.  Keeping stale mappings around
+		 * is a bad idea also, in case the page changes cacheability
+		 * attributes or becomes a protected page in a hypervisor.
+		 */
 		kpte_clear_flush(kmap_pte-idx, vaddr);
+		kmap_atomic_idx_pop();
+	}
 
 	pagefault_enable();
 }
--- head.orig/arch/x86/mm/ioremap-xen.c	2012-11-26 14:22:41.000000000 +0100
+++ head/arch/x86/mm/ioremap-xen.c	2011-05-09 11:42:30.000000000 +0200
@@ -527,6 +527,11 @@ static inline pte_t * __init early_iorem
 	return &bm_pte[pte_index(addr)];
 }
 
+bool __init is_early_ioremap_ptep(pte_t *ptep)
+{
+	return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
+}
+
 static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
 
 void __init early_ioremap_init(void)
--- head.orig/arch/x86/mm/pgtable-xen.c	2011-02-01 15:03:03.000000000 +0100
+++ head/arch/x86/mm/pgtable-xen.c	2013-12-10 11:38:00.000000000 +0100
@@ -429,7 +429,19 @@ static inline void pgd_list_del(pgd_t *p
 #define UNSHARED_PTRS_PER_PGD				\
 	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
 
-static void pgd_ctor(pgd_t *pgd)
+
+static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
+{
+	BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
+	virt_to_page(pgd)->index = (pgoff_t)mm;
+}
+
+struct mm_struct *pgd_page_get_mm(struct page *page)
+{
+	return (struct mm_struct *)page->index;
+}
+
+static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
 {
 	pgd_test_and_unpin(pgd);
 
@@ -442,10 +454,6 @@ static void pgd_ctor(pgd_t *pgd)
 		clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
 				swapper_pg_dir + KERNEL_PGD_BOUNDARY,
 				KERNEL_PGD_PTRS);
-		paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
-					 __pa(swapper_pg_dir) >> PAGE_SHIFT,
-					 KERNEL_PGD_BOUNDARY,
-					 KERNEL_PGD_PTRS);
 	}
 
 #ifdef CONFIG_X86_64
@@ -455,8 +463,10 @@ static void pgd_ctor(pgd_t *pgd)
 #endif
 
 	/* list required to sync kernel mapping updates */
-	if (!SHARED_KERNEL_PMD)
+	if (!SHARED_KERNEL_PMD) {
+		pgd_set_mm(pgd, mm);
 		pgd_list_add(pgd);
+	}
 }
 
 static void pgd_dtor(pgd_t *pgd)
@@ -664,12 +674,9 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	}
 #endif
 
-	pgd_ctor(pgd);
+	pgd_ctor(mm, pgd);
 	pgd_prepopulate_pmd(mm, pgd, pmds);
 
-	/* Store a back link for vmalloc_sync_all(). */
-	set_page_private(virt_to_page(pgd), (unsigned long)mm);
-
 	spin_unlock_irqrestore(&pgd_lock, flags);
 
 	return pgd;
--- head.orig/arch/x86/pci/pcifront.c	2011-02-01 14:50:44.000000000 +0100
+++ head/arch/x86/pci/pcifront.c	2011-02-01 15:09:47.000000000 +0100
@@ -16,7 +16,7 @@ static int pcifront_enable_irq(struct pc
 {
 	u8 irq;
 	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
-	if (!irq_to_desc_alloc_node(irq, numa_node_id()))
+	if (!alloc_irq_and_cfg_at(irq, numa_node_id()))
 		return -ENOMEM;
 	evtchn_register_pirq(irq);
 	dev->irq = irq;
--- head.orig/arch/x86/xen/Kconfig	2013-01-08 11:58:39.000000000 +0100
+++ head/arch/x86/xen/Kconfig	2014-04-30 10:50:20.000000000 +0200
@@ -16,7 +16,7 @@ config PARAVIRT_XEN
 
 config XEN_DOM0
 	def_bool y
-	depends on XEN && PCI_XEN && SWIOTLB_XEN
+	depends on PARAVIRT_XEN && PCI_XEN && SWIOTLB_XEN
 	depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI
 
 config XEN_PVHVM
--- head.orig/drivers/oprofile/oprofile_files.c	2012-01-06 10:50:28.000000000 +0100
+++ head/drivers/oprofile/oprofile_files.c	2012-02-16 13:43:26.000000000 +0100
@@ -296,6 +296,7 @@ static const struct file_operations acti
 	.open		= adomain_open,
 	.read		= domain_read,
 	.write		= domain_write,
+	.llseek		= default_llseek,
 };
 
 static DEFINE_DOMAIN_DATA(passive);
@@ -310,6 +311,7 @@ static const struct file_operations pass
 	.open		= pdomain_open,
 	.read		= domain_read,
 	.write		= domain_write,
+	.llseek		= default_llseek,
 };
 
 #endif /* CONFIG_XEN */
--- head.orig/drivers/pci/Kconfig	2014-02-18 17:26:10.000000000 +0100
+++ head/drivers/pci/Kconfig	2013-09-26 13:00:18.000000000 +0200
@@ -69,9 +69,9 @@ config PCI_STUB
 
 	  When in doubt, say N.
 
-config XEN_PCIDEV_FRONTEND
+config PARAVIRT_XEN_PCIDEV_FRONTEND
         tristate "Xen PCI Frontend"
-        depends on PCI && X86 && XEN
+        depends on PCI && X86 && PARAVIRT_XEN
         select PCI_XEN
 	select XEN_XENBUS_FRONTEND
         default y
@@ -79,6 +79,15 @@ config XEN_PCIDEV_FRONTEND
           The PCI device frontend driver allows the kernel to import arbitrary
           PCI devices from a PCI backend to support PCI driver domains.
 
+config XEN_PCIDEV_FRONTEND
+	def_bool y
+	prompt "Xen PCI Frontend" if X86_64
+	depends on PCI && XEN && (PCI_GOXEN_FE || PCI_GOANY || X86_64)
+	select HOTPLUG
+	help
+	  The PCI device frontend driver allows the kernel to import arbitrary
+	  PCI devices from a PCI backend to support PCI driver domains.
+
 config HT_IRQ
 	bool "Interrupts on hypertransport devices"
 	default y
--- head.orig/drivers/pci/Makefile	2011-01-31 14:32:40.000000000 +0100
+++ head/drivers/pci/Makefile	2013-01-08 11:58:55.000000000 +0100
@@ -60,7 +60,7 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o
 
 obj-$(CONFIG_PCI_STUB) += pci-stub.o
 
-obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
+obj-$(CONFIG_PARAVIRT_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
 
 obj-$(CONFIG_OF) += of.o
 
--- head.orig/drivers/xen/Kconfig	2014-01-30 10:17:56.000000000 +0100
+++ head/drivers/xen/Kconfig	2012-02-17 14:34:57.000000000 +0100
@@ -20,10 +20,6 @@ config XEN_PRIVILEGED_GUEST
 config XEN_UNPRIVILEGED_GUEST
 	def_bool !XEN_PRIVILEGED_GUEST
 	select PM
-	select PM_SLEEP
-	select PM_SLEEP_SMP if SMP
-	select PM_RUNTIME if PCI
-	select PM_OPS if PCI
 	select SUSPEND
 
 config XEN_PRIVCMD
--- head.orig/drivers/xen/Makefile	2012-10-04 12:54:07.000000000 +0200
+++ head/drivers/xen/Makefile	2012-10-04 13:05:34.000000000 +0200
@@ -1,6 +1,8 @@
 obj-$(CONFIG_PARAVIRT_XEN)	+= grant-table.o features.o events.o manage.o
+xen-biomerge-$(CONFIG_PARAVIRT_XEN) := biomerge.o
 xen-hotplug-$(CONFIG_PARAVIRT_XEN) := cpu_hotplug.o
 xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o
+xen-evtchn-name-$(CONFIG_PARAVIRT_XEN) := xen-evtchn
 
 xen-balloon-$(CONFIG_XEN)	:= balloon/
 obj-$(CONFIG_XEN)		+= core/
@@ -9,6 +11,7 @@ obj-y				+= xenbus/
 obj-$(CONFIG_XEN)		+= char/
 
 xen-backend-$(CONFIG_XEN_BACKEND)	:= util.o
+xen-evtchn-name-$(CONFIG_XEN)		:= evtchn
 
 nostackp := $(call cc-option, -fno-stack-protector)
 ifeq ($(CONFIG_PARAVIRT_XEN),y)
@@ -16,17 +19,23 @@ CFLAGS_features.o			:= $(nostackp)
 endif
 
 priv-$(CONFIG_USB_SUPPORT)		:= dbgp.o
+priv-$(CONFIG_PCI)			+= pci.o
 
 obj-$(CONFIG_XEN)			+= features.o $(xen-backend-y) $(xen-backend-m)
 obj-$(CONFIG_XEN_PRIVILEGED_GUEST)	+= $(priv-y)
+obj-$(CONFIG_BLOCK)			+= $(xen-biomerge-y)
 obj-$(CONFIG_HOTPLUG_CPU)		+= $(xen-hotplug-y)
 obj-$(CONFIG_XEN_XENCOMM)		+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)		+= $(xen-balloon-y)
-obj-$(CONFIG_XEN_DEV_EVTCHN)		+= evtchn.o
+obj-$(CONFIG_XEN_DEV_EVTCHN)		+= $(xen-evtchn-name-y).o
 obj-$(CONFIG_XENFS)			+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
 obj-$(CONFIG_XEN_PLATFORM_PCI)		+= platform-pci.o
 obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
+obj-$(CONFIG_XEN_DOM0)			+= pci.o
+
+xen-evtchn-y				:= evtchn.o
+
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
 obj-$(CONFIG_XEN_BLKDEV_TAP)		+= blktap/
 obj-$(filter m,$(CONFIG_XEN_BLKDEV_TAP2)) += blktap2/ blktap2-new/
--- head.orig/drivers/xen/blkback/blkback.c	2013-06-20 15:24:06.000000000 +0200
+++ head/drivers/xen/blkback/blkback.c	2013-06-20 15:25:36.000000000 +0200
@@ -196,13 +196,17 @@ static void fast_flush_area(pending_req_
 
 static void print_stats(blkif_t *blkif)
 {
-	printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d  |  br %4d\n",
+	printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d  |  br %4d"
+	       "  |  fl %4d\n",
 	       current->comm, blkif->st_oo_req,
-	       blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
+	       blkif->st_rd_req, blkif->st_wr_req,
+	       blkif->st_br_req, blkif->st_fl_req);
 	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
 	blkif->st_rd_req = 0;
 	blkif->st_wr_req = 0;
 	blkif->st_oo_req = 0;
+	blkif->st_br_req = 0;
+	blkif->st_fl_req = 0;
 }
 
 int blkif_schedule(void *arg)
@@ -260,19 +264,43 @@ int blkif_schedule(void *arg)
 	return 0;
 }
 
+static void drain_io(blkif_t *blkif)
+{
+	atomic_set(&blkif->drain, 1);
+	do {
+		/* The initial value is one, and one refcnt taken at the
+		 * start of the blkif_schedule thread. */
+		if (atomic_read(&blkif->refcnt) <= 2)
+			break;
+
+		wait_for_completion_interruptible_timeout(
+				&blkif->drain_complete, HZ);
+
+		if (!atomic_read(&blkif->drain))
+			break;
+	} while (!kthread_should_stop());
+	atomic_set(&blkif->drain, 0);
+}
+
 /******************************************************************
  * COMPLETION CALLBACK -- Called as bh->b_end_io()
  */
 
 static void __end_block_io_op(pending_req_t *pending_req, int error)
 {
+	blkif_t *blkif = pending_req->blkif;
 	int status = BLKIF_RSP_OKAY;
 
 	/* An error fails the entire request. */
 	if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
 	    (error == -EOPNOTSUPP)) {
 		DPRINTK("blkback: write barrier op failed, not supported\n");
-		blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
+		blkback_barrier(XBT_NIL, blkif->be, 0);
+		status = BLKIF_RSP_EOPNOTSUPP;
+	} else if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
+		   (error == -EOPNOTSUPP)) {
+		DPRINTK("blkback: flush diskcache op failed, not supported\n");
+		blkback_flush_diskcache(XBT_NIL, blkif->be, 0);
 		status = BLKIF_RSP_EOPNOTSUPP;
 	} else if (error) {
 		DPRINTK("Buffer not up-to-date at end of operation, "
@@ -282,10 +310,13 @@ static void __end_block_io_op(pending_re
 
 	if (atomic_dec_and_test(&pending_req->pendcnt)) {
 		fast_flush_area(pending_req);
-		make_response(pending_req->blkif, pending_req->id,
+		make_response(blkif, pending_req->id,
 			      pending_req->operation, status);
-		blkif_put(pending_req->blkif);
 		free_req(pending_req);
+		if (atomic_read(&blkif->drain)
+		    && atomic_read(&blkif->refcnt) <= 2)
+			complete(&blkif->drain_complete);
+		blkif_put(blkif);
 	}
 }
 
@@ -366,6 +397,7 @@ static int _do_block_io_op(blkif_t *blki
 		case BLKIF_OP_READ:
 		case BLKIF_OP_WRITE:
 		case BLKIF_OP_WRITE_BARRIER:
+		case BLKIF_OP_FLUSH_DISKCACHE:
 			pending_req = alloc_req();
 			if (!pending_req) {
 				blkif->st_oo_req++;
@@ -443,7 +475,11 @@ static void dispatch_rw_block_io(blkif_t
 		break;
 	case BLKIF_OP_WRITE_BARRIER:
 		blkif->st_br_req++;
-		operation = WRITE_BARRIER;
+		operation = WRITE_FLUSH_FUA;
+		break;
+	case BLKIF_OP_FLUSH_DISKCACHE:
+		blkif->st_fl_req++;
+		operation = WRITE_FLUSH;
 		break;
 	default:
 		operation = 0; /* make gcc happy */
@@ -452,7 +488,7 @@ static void dispatch_rw_block_io(blkif_t
 
 	/* Check that number of segments is sane. */
 	nseg = req->nr_segments;
-	if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || 
+	if (unlikely(nseg == 0 && !(operation & REQ_FLUSH)) ||
 	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
 		DPRINTK("Bad number of segments in request (%d)\n", nseg);
 		goto fail_response;
@@ -524,6 +560,12 @@ static void dispatch_rw_block_io(blkif_t
 		goto fail_flush;
 	}
 
+	/* Wait on all outstanding I/O's and once that has been completed
+	 * issue the WRITE_FLUSH.
+	 */
+	if (req->operation == BLKIF_OP_WRITE_BARRIER)
+		drain_io(blkif);
+
 	plug_queue(blkif, preq.bdev);
 	atomic_set(&pending_req->pendcnt, 1);
 	blkif_get(blkif);
@@ -560,7 +602,7 @@ static void dispatch_rw_block_io(blkif_t
 	}
 
 	if (!bio) {
-		BUG_ON(operation != WRITE_BARRIER);
+		BUG_ON(!(operation & (REQ_FLUSH|REQ_FUA)));
 		bio = bio_alloc(GFP_KERNEL, 0);
 		if (unlikely(bio == NULL))
 			goto fail_put_bio;
--- head.orig/drivers/xen/blkback/common.h	2012-06-06 13:52:57.000000000 +0200
+++ head/drivers/xen/blkback/common.h	2013-06-20 15:25:38.000000000 +0200
@@ -48,6 +48,7 @@ struct vbd {
 	blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
 	unsigned char  readonly;    /* Non-zero -> read-only */
 	unsigned char  type;        /* VDISK_xxx */
+	bool           flush_support;
 	u32            pdevice;     /* phys device that this vbd maps to */
 	struct block_device *bdev;
 	sector_t       size;        /* Cached size parameter */
@@ -74,6 +75,9 @@ typedef struct blkif_st {
 	atomic_t         refcnt;
 
 	wait_queue_head_t   wq;
+	/* for barrier (drain) requests */
+	struct completion   drain_complete;
+	atomic_t            drain;
 	struct task_struct  *xenblkd;
 	unsigned int        waiting_reqs;
 	struct request_queue *plug;
@@ -84,6 +88,7 @@ typedef struct blkif_st {
 	int                 st_wr_req;
 	int                 st_oo_req;
 	int                 st_br_req;
+	int                 st_fl_req;
 	int                 st_rd_sect;
 	int                 st_wr_sect;
 
@@ -141,5 +146,7 @@ int blkif_schedule(void *arg);
 
 void blkback_barrier(struct xenbus_transaction, struct backend_info *,
 		     int state);
+void blkback_flush_diskcache(struct xenbus_transaction,
+			     struct backend_info *, int state);
 
 #endif /* __BLKIF__BACKEND__COMMON_H__ */
--- head.orig/drivers/xen/blkback/interface.c	2013-06-20 15:18:15.000000000 +0200
+++ head/drivers/xen/blkback/interface.c	2013-06-20 15:25:40.000000000 +0200
@@ -49,6 +49,8 @@ blkif_t *blkif_alloc(domid_t domid)
 	spin_lock_init(&blkif->blk_ring_lock);
 	atomic_set(&blkif->refcnt, 1);
 	init_waitqueue_head(&blkif->wq);
+	init_completion(&blkif->drain_complete);
+	atomic_set(&blkif->drain, 0);
 	blkif->st_print = jiffies;
 	init_waitqueue_head(&blkif->waiting_to_free);
 	init_waitqueue_head(&blkif->shutdown_wq);
--- head.orig/drivers/xen/blkback/vbd.c	2011-02-01 14:50:44.000000000 +0100
+++ head/drivers/xen/blkback/vbd.c	2011-09-07 12:35:54.000000000 +0200
@@ -55,6 +55,7 @@ int vbd_create(blkif_t *blkif, blkif_vde
 {
 	struct vbd *vbd;
 	struct block_device *bdev;
+	struct request_queue *q;
 
 	vbd = &blkif->vbd;
 	vbd->handle   = handle; 
@@ -88,6 +89,10 @@ int vbd_create(blkif_t *blkif, blkif_vde
 	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
 		vbd->type |= VDISK_REMOVABLE;
 
+	q = bdev_get_queue(bdev);
+	if (q && q->flush_flags)
+		vbd->flush_support = true;
+
 	DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
 		handle, blkif->domid);
 	return 0;
--- head.orig/drivers/xen/blkback/xenbus.c	2013-05-31 13:17:26.000000000 +0200
+++ head/drivers/xen/blkback/xenbus.c	2012-12-18 12:08:43.000000000 +0100
@@ -119,6 +119,7 @@ VBD_SHOW(oo_req,  "%d\n", be->blkif->st_
 VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
 VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
 VBD_SHOW(br_req,  "%d\n", be->blkif->st_br_req);
+VBD_SHOW(fl_req,  "%d\n", be->blkif->st_fl_req);
 VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
 VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
 
@@ -127,6 +128,7 @@ static struct attribute *vbdstat_attrs[]
 	&dev_attr_rd_req.attr,
 	&dev_attr_wr_req.attr,
 	&dev_attr_br_req.attr,
+	&dev_attr_fl_req.attr,
 	&dev_attr_rd_sect.attr,
 	&dev_attr_wr_sect.attr,
 	NULL
@@ -210,6 +212,17 @@ void blkback_barrier(struct xenbus_trans
 		xenbus_dev_error(dev, err, "writing feature-barrier");
 }
 
+void blkback_flush_diskcache(struct xenbus_transaction xbt,
+			     struct backend_info *be, int state)
+{
+	struct xenbus_device *dev = be->dev;
+	int err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
+				"%d", state);
+
+	if (err)
+		xenbus_dev_error(dev, err, "writing feature-flush-cache");
+}
+
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures, and watch the store waiting for the hotplug scripts to tell us
@@ -426,7 +439,8 @@ again:
 		return;
 	}
 
-	blkback_barrier(xbt, be, 1);
+	blkback_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
+	blkback_barrier(xbt, be, be->blkif->vbd.flush_support);
 
 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
 			    vbd_size(&be->blkif->vbd));
--- head.orig/drivers/xen/blkfront/blkfront.c	2013-05-31 13:33:41.000000000 +0200
+++ head/drivers/xen/blkfront/blkfront.c	2013-05-31 13:36:10.000000000 +0200
@@ -337,7 +337,7 @@ static void connect(struct blkfront_info
 {
 	unsigned long long sectors;
 	unsigned int binfo, sector_size, physical_sector_size;
-	int err, barrier;
+	int err, barrier, flush;
 
 	switch (info->connected) {
 	case BLKIF_STATE_CONNECTED:
@@ -382,25 +382,41 @@ static void connect(struct blkfront_info
 	if (err <= 0)
 		physical_sector_size = sector_size;
 
+	info->feature_flush = 0;
+	info->flush_op = 0;
+
 	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
 			   "feature-barrier", "%d", &barrier);
 	/*
 	 * If there's no "feature-barrier" defined, then it means
 	 * we're dealing with a very old backend which writes
-	 * synchronously; draining will do what needs to get done.
-	 *
-	 * If there are barriers, then we can do full queued writes
-	 * with tagged barriers.
+	 * synchronously; nothing to do.
 	 *
-	 * If barriers are not supported, then there's no much we can
-	 * do, so just set ordering to NONE.
+	 * If there are barriers, then we use flush.
+	 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
+	if (err > 0 && barrier) {
+		info->feature_flush = REQ_FLUSH | REQ_FUA;
+		info->flush_op = BLKIF_OP_WRITE_BARRIER;
+	}
+	/*
+	 * And if there is "feature-flush-cache" use that above
+	 * barriers.
 	 */
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "feature-flush-cache", "%d", &flush);
+	if (err > 0 && flush) {
+		info->feature_flush = REQ_FLUSH;
+		info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
+	}
+#else
 	if (err <= 0)
-		info->feature_barrier = QUEUE_ORDERED_DRAIN;
+		info->feature_flush = QUEUE_ORDERED_DRAIN;
 	else if (barrier)
-		info->feature_barrier = QUEUE_ORDERED_TAG;
+		info->feature_flush = QUEUE_ORDERED_TAG;
 	else
-		info->feature_barrier = QUEUE_ORDERED_NONE;
+		info->feature_flush = QUEUE_ORDERED_NONE;
+#endif
 
 	err = xlvbd_add(sectors, info->vdevice, binfo, sector_size,
 			physical_sector_size, info);
@@ -539,7 +555,7 @@ static inline int ADD_ID_TO_FREELIST(
 	if (!info->shadow[id].request)
 		return -ENXIO;
 	info->shadow[id].req.id  = info->shadow_free;
-	info->shadow[id].request = 0;
+	info->shadow[id].request = NULL;
 	info->shadow_free = id;
 	return 0;
 }
@@ -762,14 +778,10 @@ int blkif_getgeo(struct block_device *bd
 
 
 /*
- * blkif_queue_request
+ * Generate a Xen blkfront IO request from a blk layer request.  Reads
+ * and writes are handled as expected.
  *
- * request block io
- *
- * id: for guest use only.
- * operation: BLKIF_OP_{READ,WRITE,PROBE}
- * buffer: buffer to read/write into. this should be a
- *   virtual address in the guest os.
+ * @req: a request struct
  */
 static int blkif_queue_request(struct request *req)
 {
@@ -798,7 +810,7 @@ static int blkif_queue_request(struct re
 	/* Fill out a communications ring structure. */
 	ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
 	id = GET_ID_FROM_FREELIST(info);
-	info->shadow[id].request = (unsigned long)req;
+	info->shadow[id].request = req;
 
 	ring_req->id = id;
 	ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
@@ -806,8 +818,12 @@ static int blkif_queue_request(struct re
 
 	ring_req->operation = rq_data_dir(req) ?
 		BLKIF_OP_WRITE : BLKIF_OP_READ;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
+	if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
+#else
 	if (req->cmd_flags & REQ_HARDBARRIER)
-		ring_req->operation = BLKIF_OP_WRITE_BARRIER;
+#endif
+		ring_req->operation = info->flush_op;
 
 	ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
 	BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
@@ -865,7 +881,9 @@ void do_blkif_request(struct request_que
 
 		blk_start_request(req);
 
-		if (req->cmd_type != REQ_TYPE_FS) {
+		if ((req->cmd_type != REQ_TYPE_FS) ||
+		    ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
+		     !info->flush_op)) {
 			req->errors = (DID_ERROR << 16) |
 				      (DRIVER_INVALID << 24);
 			__blk_end_request_all(req, -EIO);
@@ -931,7 +949,7 @@ static irqreturn_t blkif_int(int irq, vo
 			continue;
 		}
 		id   = bret->id;
-		req  = (struct request *)info->shadow[id].request;
+		req  = info->shadow[id].request;
 
 		blkif_completion(&info->shadow[id]);
 
@@ -945,14 +963,31 @@ static irqreturn_t blkif_int(int irq, vo
 
 		ret = bret->status == BLKIF_RSP_OKAY ? 0 : -EIO;
 		switch (bret->operation) {
+			const char *kind;
+
+		case BLKIF_OP_FLUSH_DISKCACHE:
 		case BLKIF_OP_WRITE_BARRIER:
-			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
-				pr_warning("blkfront: %s: %s op failed\n",
-					   info->gd->disk_name,
-					   op_name(bret->operation));
+			kind = "";
+			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP))
 				ret = -EOPNOTSUPP;
-				info->feature_barrier = QUEUE_ORDERED_NONE;
-			        xlvbd_barrier(info);
+			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
+				     info->shadow[id].req.nr_segments == 0)) {
+				kind = "empty ";
+				ret = -EOPNOTSUPP;
+			}
+			if (unlikely(ret)) {
+				if (ret == -EOPNOTSUPP) {
+					pr_warn("blkfront: %s: %s%s op failed\n",
+					        info->gd->disk_name, kind,
+						op_name(bret->operation));
+					ret = 0;
+				}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
+				info->feature_flush = 0;
+#else
+				info->feature_flush = QUEUE_ORDERED_NONE;
+#endif
+			        xlvbd_flush(info);
 			}
 			/* fall through */
 		case BLKIF_OP_READ:
@@ -1044,7 +1079,7 @@ static int blkif_recover(struct blkfront
 	/* Stage 3: Find pending requests and requeue them. */
 	for (i = 0; i < BLK_RING_SIZE; i++) {
 		/* Not in use? */
-		if (copy[i].request == 0)
+		if (!copy[i].request)
 			continue;
 
 		/* Grab a request slot and copy shadow state into it. */
@@ -1062,8 +1097,7 @@ static int blkif_recover(struct blkfront
 				req->seg[j].gref,
 				info->xbdev->otherend_id,
 				pfn_to_mfn(info->shadow[req->id].frame[j]),
-				rq_data_dir((struct request *)
-					    info->shadow[req->id].request) ?
+				rq_data_dir(info->shadow[req->id].request) ?
 				GTF_readonly : 0);
 		info->shadow[req->id].req = *req;
 
--- head.orig/drivers/xen/blkfront/block.h	2013-05-31 13:26:15.000000000 +0200
+++ head/drivers/xen/blkfront/block.h	2013-05-31 13:36:13.000000000 +0200
@@ -77,7 +77,7 @@ struct xlbd_major_info
 
 struct blk_shadow {
 	blkif_request_t req;
-	unsigned long request;
+	struct request *request;
 	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
@@ -107,7 +107,8 @@ struct blkfront_info
 	struct gnttab_free_callback callback;
 	struct blk_shadow shadow[BLK_RING_SIZE];
 	unsigned long shadow_free;
-	int feature_barrier;
+	unsigned int feature_flush;
+	unsigned int flush_op;
 	int is_ready;
 };
 
@@ -135,7 +136,7 @@ int xlvbd_add(blkif_sector_t capacity, i
 	      unsigned int sector_size, unsigned int physical_sector_size,
 	      struct blkfront_info *);
 void xlvbd_del(struct blkfront_info *info);
-int xlvbd_barrier(struct blkfront_info *info);
+void xlvbd_flush(struct blkfront_info *info);
 
 #ifdef CONFIG_SYSFS
 int xlvbd_sysfs_addif(struct blkfront_info *info);
--- head.orig/drivers/xen/blkfront/vbd.c	2013-05-31 13:33:43.000000000 +0200
+++ head/drivers/xen/blkfront/vbd.c	2013-05-31 13:36:14.000000000 +0200
@@ -475,7 +475,7 @@ xlvbd_add(blkif_sector_t capacity, int v
 
 	info->gd = gd;
 
-	xlvbd_barrier(info);
+	xlvbd_flush(info);
 
 	if (vdisk_info & VDISK_READONLY)
 		set_disk_ro(gd, 1);
@@ -521,36 +521,38 @@ xlvbd_del(struct blkfront_info *info)
 	info->rq = NULL;
 }
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
-int
-xlvbd_barrier(struct blkfront_info *info)
+void
+xlvbd_flush(struct blkfront_info *info)
 {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
+	blk_queue_flush(info->rq, info->feature_flush);
+	pr_info("blkfront: %s: %s: %s\n",
+		info->gd->disk_name,
+		info->flush_op == BLKIF_OP_WRITE_BARRIER ?
+		"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
+			     "flush diskcache" : "barrier or flush"),
+		info->feature_flush ? "enabled" : "disabled");
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
 	int err;
 	const char *barrier;
 
-	switch (info->feature_barrier) {
+	switch (info->feature_flush) {
 	case QUEUE_ORDERED_DRAIN:	barrier = "enabled (drain)"; break;
 	case QUEUE_ORDERED_TAG:		barrier = "enabled (tag)"; break;
 	case QUEUE_ORDERED_NONE:	barrier = "disabled"; break;
 	default:			return -EINVAL;
 	}
 
-	err = blk_queue_ordered(info->rq, info->feature_barrier);
+	err = blk_queue_ordered(info->rq, info->feature_flush);
 	if (err)
 		return err;
 	pr_info("blkfront: %s: barriers %s\n",
 		info->gd->disk_name, barrier);
-	return 0;
-}
 #else
-int
-xlvbd_barrier(struct blkfront_info *info)
-{
-	if (info->feature_barrier)
+	if (info->feature_flush)
 		pr_info("blkfront: %s: barriers disabled\n", info->gd->disk_name);
-	return -ENOSYS;
-}
 #endif
+}
 
 #ifdef CONFIG_SYSFS
 static ssize_t show_media(struct device *dev,
--- head.orig/drivers/xen/blktap/blktap.c	2013-06-20 15:24:44.000000000 +0200
+++ head/drivers/xen/blktap/blktap.c	2012-05-23 13:38:37.000000000 +0200
@@ -445,6 +445,7 @@ static const struct file_operations blkt
 	.unlocked_ioctl = blktap_ioctl,
 	.open    = blktap_open,
 	.release = blktap_release,
+	.llseek  = no_llseek,
 	.mmap    = blktap_mmap,
 };
 
@@ -578,6 +579,8 @@ static int blktap_open(struct inode *ino
 	tap_blkif_t *info;
 	int i;
 	
+	nonseekable_open(inode, filp);
+
 	/* ctrl device, treat differently */
 	if (!idx)
 		return 0;
--- head.orig/drivers/xen/blktap2/device.c	2012-02-16 13:40:30.000000000 +0100
+++ head/drivers/xen/blktap2/device.c	2012-02-16 13:43:41.000000000 +0100
@@ -824,7 +824,7 @@ blktap_device_run_queue(struct blktap *t
 			continue;
 		}
 
-		if (req->cmd_flags & REQ_HARDBARRIER) {
+		if (req->cmd_flags & (REQ_FLUSH|REQ_FUA)) {
 			blk_start_request(req);
 			__blk_end_request_all(req, -EOPNOTSUPP);
 			continue;
--- head.orig/drivers/xen/blktap2-new/device.c	2012-02-16 16:38:13.000000000 +0100
+++ head/drivers/xen/blktap2-new/device.c	2012-02-16 16:38:28.000000000 +0100
@@ -306,9 +306,6 @@ blktap_device_configure(struct blktap *t
 	/* Make sure buffer addresses are sector-aligned. */
 	blk_queue_dma_alignment(rq, 511);
 
-	/* We are reordering, but cacheless. */
-	blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN);
-
 	spin_unlock_irq(&dev->lock);
 }
 
--- head.orig/drivers/xen/core/Makefile	2012-02-17 14:28:52.000000000 +0100
+++ head/drivers/xen/core/Makefile	2012-02-17 14:35:00.000000000 +0100
@@ -4,7 +4,6 @@
 
 obj-y := evtchn.o gnttab.o reboot.o machine_reboot.o fallback.o
 
-obj-$(CONFIG_PCI)		+= pci.o
 obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += firmware.o
 obj-$(CONFIG_PROC_FS)		+= xen_proc.o
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
--- head.orig/drivers/xen/core/evtchn.c	2013-01-30 11:51:17.000000000 +0100
+++ head/drivers/xen/core/evtchn.c	2014-06-30 10:29:57.000000000 +0200
@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/irq.h>
+#include <linux/irqdesc.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
@@ -88,14 +89,17 @@ static struct irq_cfg _irq_cfg[] = {
 static inline struct irq_cfg *__pure irq_cfg(unsigned int irq)
 {
 #ifdef CONFIG_SPARSE_IRQ
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	return desc ? desc->chip_data : NULL;
+	return get_irq_chip_data(irq);
 #else
 	return irq < NR_IRQS ? _irq_cfg + irq : NULL;
 #endif
 }
 
+static inline struct irq_cfg *__pure irq_data_cfg(struct irq_data *data)
+{
+	return irq_data_get_irq_chip_data(data);
+}
+
 /* Constructor for packed IRQ information. */
 static inline u32 mk_irq_info(u32 type, u32 index, u32 evtchn)
 {
@@ -115,26 +119,47 @@ static inline u32 mk_irq_info(u32 type, 
  * Accessors for packed IRQ information.
  */
 
+static inline unsigned int evtchn_from_irq_cfg(const struct irq_cfg *cfg)
+{
+	return cfg->info & ((1U << _EVTCHN_BITS) - 1);
+}
+
+static inline unsigned int evtchn_from_irq_data(struct irq_data *data)
+{
+	const struct irq_cfg *cfg = irq_data_cfg(data);
+
+	return cfg ? evtchn_from_irq_cfg(cfg) : 0;
+}
+
 static inline unsigned int evtchn_from_irq(int irq)
 {
-	const struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_data *data = irq_get_irq_data(irq);
 
-	return cfg ? cfg->info & ((1U << _EVTCHN_BITS) - 1) : 0;
+	return data ? evtchn_from_irq_data(data) : 0;
+}
+
+static inline unsigned int index_from_irq_cfg(const struct irq_cfg *cfg)
+{
+	return (cfg->info >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1);
 }
 
 static inline unsigned int index_from_irq(int irq)
 {
 	const struct irq_cfg *cfg = irq_cfg(irq);
 
-	return cfg ? (cfg->info >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1)
-		   : 0;
+	return cfg ? index_from_irq_cfg(cfg) : 0;
+}
+
+static inline unsigned int type_from_irq_cfg(const struct irq_cfg *cfg)
+{
+	return cfg->info >> (32 - _IRQT_BITS);
 }
 
 static inline unsigned int type_from_irq(int irq)
 {
 	const struct irq_cfg *cfg = irq_cfg(irq);
 
-	return cfg ? cfg->info >> (32 - _IRQT_BITS) : IRQT_UNBOUND;
+	return cfg ? type_from_irq_cfg(cfg) : IRQT_UNBOUND;
 }
 
 unsigned int irq_from_evtchn(unsigned int port)
@@ -171,16 +196,17 @@ static inline unsigned long active_evtch
 		~sh->evtchn_mask[idx]);
 }
 
-static void _bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu, int irq,
+static void _bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu,
+				struct irq_data *data,
 				const struct cpumask *cpumask)
 {
 	shared_info_t *s = HYPERVISOR_shared_info;
 
 	BUG_ON(!test_bit(chn, s->evtchn_mask));
 
-	if (irq >= 0) {
+	if (data) {
 		BUG_ON(!cpumask_test_cpu(cpu, cpumask));
-		cpumask_copy(irq_to_desc(irq)->affinity, cpumask);
+		cpumask_copy(data->affinity, cpumask);
 	}
 
 	clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_evtchn[chn]));
@@ -190,7 +216,11 @@ static void _bind_evtchn_to_cpu(unsigned
 
 static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 {
-	_bind_evtchn_to_cpu(chn, cpu, evtchn_to_irq[chn], cpumask_of(cpu));
+	int irq = evtchn_to_irq[chn];
+
+	_bind_evtchn_to_cpu(chn, cpu,
+			    irq != -1 ? irq_get_irq_data(irq) : NULL,
+			    cpumask_of(cpu));
 }
 
 static void init_evtchn_cpu_bindings(void)
@@ -199,10 +229,10 @@ static void init_evtchn_cpu_bindings(voi
 
 	/* By default all event channels notify CPU#0. */
 	for (i = 0; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
+		struct irq_data *data = irq_get_irq_data(i);
 
-		if (desc)
-			cpumask_copy(desc->affinity, cpumask_of(0));
+		if (data)
+			cpumask_copy(data->affinity, cpumask_of(0));
 	}
 
 	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
@@ -225,7 +255,8 @@ static inline unsigned long active_evtch
 	return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]);
 }
 
-static void _bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu, int irq,
+static void _bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu,
+				struct irq_data *data,
 				const struct cpumask *cpumask)
 {
 }
@@ -394,32 +425,37 @@ asmlinkage void __irq_entry evtchn_do_up
 	set_irq_regs(old_regs);
 }
 
-static int find_unbound_irq(unsigned int node, struct irq_chip *chip)
+/*
+ * On success returns with irq_mapping_update_lock held and
+ * cfg->bindcount set to 1.
+ */
+static int find_unbound_irq(unsigned int node, struct irq_cfg **pcfg,
+			    struct irq_chip *chip)
 {
 	static int warned;
 	int irq;
 
 	for (irq = DYNIRQ_BASE; irq < nr_irqs; irq++) {
-		struct irq_desc *desc;
-		struct irq_cfg *cfg;
+		struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
+		struct irq_desc *desc = irq_to_desc(irq);
 
-		desc = irq_to_desc(irq);
-		if (!desc)
-			desc = irq_to_desc_alloc_node(irq, node);
-		else if (desc->chip != &no_irq_chip &&
-			 desc->chip != &dynirq_chip)
-			continue;
-		if (!desc)
+		if (unlikely(!cfg))
 			return -ENOMEM;
 
-		cfg = desc->chip_data;
-		if (cfg && !cfg->bindcount) {
+		spin_lock(&irq_mapping_update_lock);
+		if ((desc->irq_data.chip == &no_irq_chip ||
+		     desc->irq_data.chip == chip) &&
+		    !cfg->bindcount) {
+			cfg->bindcount = 1;
+			spin_unlock(&irq_mapping_update_lock);
+			*pcfg = cfg;
 			desc->status |= IRQ_NOPROBE;
 			set_irq_chip_and_handler_name(irq, chip,
 						      handle_fasteoi_irq,
 						      "fasteoi");
 			return irq;
 		}
+		spin_unlock(&irq_mapping_update_lock);
 	}
 
 	if (!warned) {
@@ -440,40 +476,45 @@ static int bind_caller_port_to_irq(unsig
 	spin_lock(&irq_mapping_update_lock);
 
 	if ((irq = evtchn_to_irq[caller_port]) == -1) {
-		if ((irq = find_unbound_irq(numa_node_id(), &dynirq_chip)) < 0)
-			goto out;
-
-		evtchn_to_irq[caller_port] = irq;
-		irq_cfg(irq)->info = mk_irq_info(IRQT_CALLER_PORT,
-						  0, caller_port);
-	}
+		struct irq_cfg *cfg;
 
-	irq_cfg(irq)->bindcount++;
+		spin_unlock(&irq_mapping_update_lock);
+		if ((irq = find_unbound_irq(numa_node_id(), &cfg,
+					    &dynirq_chip)) < 0)
+			return irq;
+		spin_lock(&irq_mapping_update_lock);
+		if (evtchn_to_irq[caller_port] == -1) {
+			evtchn_to_irq[caller_port] = irq;
+			cfg->info = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port);
+		} else {
+			cfg->bindcount = 0;
+			irq = evtchn_to_irq[caller_port];
+			++irq_cfg(irq)->bindcount;
+		}
+	} else
+		++irq_cfg(irq)->bindcount;
 
- out:
 	spin_unlock(&irq_mapping_update_lock);
 	return irq;
 }
 
 static int bind_local_port_to_irq(unsigned int local_port)
 {
+	struct irq_cfg *cfg;
 	int irq;
 
-	spin_lock(&irq_mapping_update_lock);
-
-	BUG_ON(evtchn_to_irq[local_port] != -1);
-
-	if ((irq = find_unbound_irq(numa_node_id(), &dynirq_chip)) < 0) {
+	if ((irq = find_unbound_irq(numa_node_id(), &cfg, &dynirq_chip)) < 0) {
 		if (close_evtchn(local_port))
 			BUG();
-		goto out;
+		return irq;
 	}
 
+	spin_lock(&irq_mapping_update_lock);
+
+	BUG_ON(evtchn_to_irq[local_port] != -1);
 	evtchn_to_irq[local_port] = irq;
-	irq_cfg(irq)->info = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port);
-	irq_cfg(irq)->bindcount++;
+	cfg->info = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port);
 
- out:
 	spin_unlock(&irq_mapping_update_lock);
 	return irq;
 }
@@ -509,91 +550,107 @@ static int bind_interdomain_evtchn_to_ir
 
 static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 {
-	struct evtchn_bind_virq bind_virq;
-	int evtchn, irq;
+	int irq;
 
 	spin_lock(&irq_mapping_update_lock);
 
 	if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) {
-		if ((irq = find_unbound_irq(cpu_to_node(cpu),
+		struct irq_cfg *cfg;
+
+		spin_unlock(&irq_mapping_update_lock);
+		if ((irq = find_unbound_irq(cpu_to_node(cpu), &cfg,
 					    &dynirq_chip)) < 0)
-			goto out;
+			return irq;
+		spin_lock(&irq_mapping_update_lock);
 
-		bind_virq.virq = virq;
-		bind_virq.vcpu = cpu;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
-						&bind_virq) != 0)
-			BUG();
-		evtchn = bind_virq.port;
+		if (per_cpu(virq_to_irq, cpu)[virq] == -1) {
+			struct evtchn_bind_virq bind_virq = {
+				.virq = virq,
+				.vcpu = cpu
+			};
 
-		evtchn_to_irq[evtchn] = irq;
-		irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+			if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+							&bind_virq) != 0)
+				BUG();
 
-		per_cpu(virq_to_irq, cpu)[virq] = irq;
+			evtchn_to_irq[bind_virq.port] = irq;
+			cfg->info = mk_irq_info(IRQT_VIRQ, virq, bind_virq.port);
 
-		bind_evtchn_to_cpu(evtchn, cpu);
-	}
+			per_cpu(virq_to_irq, cpu)[virq] = irq;
 
-	irq_cfg(irq)->bindcount++;
+			bind_evtchn_to_cpu(bind_virq.port, cpu);
+		} else {
+			cfg->bindcount = 0;
+			irq = per_cpu(virq_to_irq, cpu)[virq];
+			++irq_cfg(irq)->bindcount;
+		}
+	} else
+		++irq_cfg(irq)->bindcount;
 
- out:
 	spin_unlock(&irq_mapping_update_lock);
 	return irq;
 }
 
 static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 {
-	struct evtchn_bind_ipi bind_ipi;
-	int evtchn, irq;
+	int irq;
 
 	spin_lock(&irq_mapping_update_lock);
 
 	if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) {
-		if ((irq = find_unbound_irq(cpu_to_node(cpu),
+		struct irq_cfg *cfg;
+
+		spin_unlock(&irq_mapping_update_lock);
+		if ((irq = find_unbound_irq(cpu_to_node(cpu), &cfg,
 					    &dynirq_chip)) < 0)
-			goto out;
+			return irq;
+		spin_lock(&irq_mapping_update_lock);
 
-		bind_ipi.vcpu = cpu;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
-						&bind_ipi) != 0)
-			BUG();
-		evtchn = bind_ipi.port;
+		if (per_cpu(ipi_to_irq, cpu)[ipi] == -1) {
+			struct evtchn_bind_ipi bind_ipi = { .vcpu = cpu };
 
-		evtchn_to_irq[evtchn] = irq;
-		irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, evtchn);
+			if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+							&bind_ipi) != 0)
+				BUG();
 
-		per_cpu(ipi_to_irq, cpu)[ipi] = irq;
+			evtchn_to_irq[bind_ipi.port] = irq;
+			cfg->info = mk_irq_info(IRQT_IPI, ipi, bind_ipi.port);
 
-		bind_evtchn_to_cpu(evtchn, cpu);
-	}
+			per_cpu(ipi_to_irq, cpu)[ipi] = irq;
 
-	irq_cfg(irq)->bindcount++;
+			bind_evtchn_to_cpu(bind_ipi.port, cpu);
+		} else {
+			cfg->bindcount = 0;
+			irq = per_cpu(ipi_to_irq, cpu)[ipi];
+			++irq_cfg(irq)->bindcount;
+		}
+	} else
+		++irq_cfg(irq)->bindcount;
 
- out:
 	spin_unlock(&irq_mapping_update_lock);
 	return irq;
 }
 
 static void unbind_from_irq(unsigned int irq)
 {
-	unsigned int cpu;
-	int evtchn = evtchn_from_irq(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
+	unsigned int cpu, evtchn = evtchn_from_irq_cfg(cfg);
 
 	spin_lock(&irq_mapping_update_lock);
 
-	if (!--irq_cfg(irq)->bindcount && VALID_EVTCHN(evtchn)) {
-		if ((type_from_irq(irq) != IRQT_CALLER_PORT) &&
+	if (!--cfg->bindcount && VALID_EVTCHN(evtchn)) {
+		if ((type_from_irq_cfg(cfg) != IRQT_CALLER_PORT) &&
 		    close_evtchn(evtchn))
 			BUG();
 
-		switch (type_from_irq(irq)) {
+		switch (type_from_irq_cfg(cfg)) {
 		case IRQT_VIRQ:
 			per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
-				[index_from_irq(irq)] = -1;
+				[index_from_irq_cfg(cfg)] = -1;
 			break;
 		case IRQT_IPI:
 			per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
-				[index_from_irq(irq)] = -1;
+				[index_from_irq_cfg(cfg)] = -1;
 			break;
 		default:
 			break;
@@ -603,7 +660,7 @@ static void unbind_from_irq(unsigned int
 		bind_evtchn_to_cpu(evtchn, 0);
 
 		evtchn_to_irq[evtchn] = -1;
-		irq_cfg(irq)->info = IRQ_UNBOUND;
+		cfg->info = IRQ_UNBOUND;
 
 		/* Zap stats across IRQ changes of use. */
 		for_each_possible_cpu(cpu)
@@ -744,9 +801,10 @@ void unbind_from_irqhandler(unsigned int
 EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
 
 #ifdef CONFIG_SMP
-static int set_affinity_irq(unsigned int irq, const struct cpumask *dest)
+static int set_affinity_irq(struct irq_data *data,
+			    const struct cpumask *dest, bool force)
 {
-	unsigned int port = evtchn_from_irq(irq);
+	unsigned int port = evtchn_from_irq_data(data);
 	unsigned int cpu = cpumask_any(dest);
 	struct evtchn_bind_vcpu ebv = { .port = port, .vcpu = cpu };
 	bool masked;
@@ -758,7 +816,7 @@ static int set_affinity_irq(unsigned int
 	masked = test_and_set_evtchn_mask(port);
 	rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &ebv);
 	if (rc == 0)
-		_bind_evtchn_to_cpu(port, cpu, irq, dest);
+		_bind_evtchn_to_cpu(port, cpu, data, dest);
 	if (!masked)
 		unmask_evtchn(port);
 
@@ -766,9 +824,10 @@ static int set_affinity_irq(unsigned int
 }
 #endif
 
-int resend_irq_on_evtchn(unsigned int irq)
+int resend_irq_on_evtchn(struct irq_data *data)
 {
-	int masked, evtchn = evtchn_from_irq(irq);
+	unsigned int evtchn = evtchn_from_irq_data(data);
+	bool masked;
 
 	if (!VALID_EVTCHN(evtchn))
 		return 1;
@@ -785,52 +844,51 @@ int resend_irq_on_evtchn(unsigned int ir
  * Interface to generic handling in irq.c
  */
 
-static void unmask_dynirq(unsigned int irq)
+static void unmask_dynirq(struct irq_data *data)
 {
-	int evtchn = evtchn_from_irq(irq);
+	unsigned int evtchn = evtchn_from_irq_data(data);
 
 	if (VALID_EVTCHN(evtchn))
 		unmask_evtchn(evtchn);
 }
 
-static void mask_dynirq(unsigned int irq)
+static void mask_dynirq(struct irq_data *data)
 {
-	int evtchn = evtchn_from_irq(irq);
+	unsigned int evtchn = evtchn_from_irq_data(data);
 
 	if (VALID_EVTCHN(evtchn))
 		mask_evtchn(evtchn);
 }
 
-static unsigned int startup_dynirq(unsigned int irq)
+static unsigned int startup_dynirq(struct irq_data *data)
 {
-	unmask_dynirq(irq);
+	unmask_dynirq(data);
 	return 0;
 }
 
 #define shutdown_dynirq mask_dynirq
 
-static void end_dynirq(unsigned int irq)
+static void end_dynirq(struct irq_data *data)
 {
-	if (!(irq_to_desc(irq)->status & IRQ_DISABLED)) {
-		move_masked_irq(irq);
-		unmask_dynirq(irq);
+	if (!(irq_to_desc(data->irq)->status & IRQ_DISABLED)) {
+		move_masked_irq(data->irq);
+		unmask_dynirq(data);
 	}
 }
 
 static struct irq_chip dynirq_chip = {
-	.name     = "Dynamic",
-	.startup  = startup_dynirq,
-	.shutdown = shutdown_dynirq,
-	.enable   = unmask_dynirq,
-	.disable  = mask_dynirq,
-	.mask     = mask_dynirq,
-	.unmask   = unmask_dynirq,
-	.end      = end_dynirq,
-	.eoi      = end_dynirq,
+	.name             = "Dynamic",
+	.irq_startup      = startup_dynirq,
+	.irq_shutdown     = shutdown_dynirq,
+	.irq_enable       = unmask_dynirq,
+	.irq_disable      = mask_dynirq,
+	.irq_mask         = mask_dynirq,
+	.irq_unmask       = unmask_dynirq,
+	.irq_eoi          = end_dynirq,
 #ifdef CONFIG_SMP
-	.set_affinity = set_affinity_irq,
+	.irq_set_affinity = set_affinity_irq,
 #endif
-	.retrigger = resend_irq_on_evtchn,
+	.irq_retrigger    = resend_irq_on_evtchn,
 };
 
 /* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
@@ -883,19 +941,20 @@ static inline void pirq_query_unmask(int
 		set_bit(irq - PIRQ_BASE, pirq_needs_eoi);
 }
 
-static int set_type_pirq(unsigned int irq, unsigned int type)
+static int set_type_pirq(struct irq_data *data, unsigned int type)
 {
 	if (type != IRQ_TYPE_PROBE)
 		return -EINVAL;
-	set_bit(irq - PIRQ_BASE, probing_pirq);
+	set_bit(data->irq - PIRQ_BASE, probing_pirq);
 	return 0;
 }
 
-static void enable_pirq(unsigned int irq)
+static void enable_pirq(struct irq_data *data)
 {
 	struct evtchn_bind_pirq bind_pirq;
-	int evtchn = evtchn_from_irq(irq);
-	unsigned int pirq = irq - PIRQ_BASE;
+	struct irq_cfg *cfg = irq_data_cfg(data);
+	unsigned int evtchn = evtchn_from_irq_cfg(cfg);
+	unsigned int irq = data->irq, pirq = irq - PIRQ_BASE;
 
 	if (VALID_EVTCHN(evtchn)) {
 		if (pirq < nr_pirqs)
@@ -919,8 +978,8 @@ static void enable_pirq(unsigned int irq
 	pirq_query_unmask(irq);
 
 	evtchn_to_irq[evtchn] = irq;
-	_bind_evtchn_to_cpu(evtchn, 0, -1, NULL);
-	irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn);
+	_bind_evtchn_to_cpu(evtchn, 0, NULL, NULL);
+	cfg->info = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn);
 
  out:
 	pirq_unmask_and_notify(evtchn, irq);
@@ -928,15 +987,16 @@ static void enable_pirq(unsigned int irq
 
 #define disable_pirq mask_pirq
 
-static unsigned int startup_pirq(unsigned int irq)
+static unsigned int startup_pirq(struct irq_data *data)
 {
-	enable_pirq(irq);
+	enable_pirq(data);
 	return 0;
 }
 
-static void shutdown_pirq(unsigned int irq)
+static void shutdown_pirq(struct irq_data *data)
 {
-	int evtchn = evtchn_from_irq(irq);
+	struct irq_cfg *cfg = irq_data_cfg(data);
+	unsigned int evtchn = evtchn_from_irq_cfg(cfg);
 
 	if (!VALID_EVTCHN(evtchn))
 		return;
@@ -948,48 +1008,47 @@ static void shutdown_pirq(unsigned int i
 
 	bind_evtchn_to_cpu(evtchn, 0);
 	evtchn_to_irq[evtchn] = -1;
-	irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0);
+	cfg->info = mk_irq_info(IRQT_PIRQ, index_from_irq_cfg(cfg), 0);
 }
 
-static void unmask_pirq(unsigned int irq)
+static void unmask_pirq(struct irq_data *data)
 {
-	int evtchn = evtchn_from_irq(irq);
+	unsigned int evtchn = evtchn_from_irq_data(data);
 
 	if (VALID_EVTCHN(evtchn))
-		pirq_unmask_and_notify(evtchn, irq);
+		pirq_unmask_and_notify(evtchn, data->irq);
 }
 
 #define mask_pirq mask_dynirq
 
-static void end_pirq(unsigned int irq)
+static void end_pirq(struct irq_data *data)
 {
-	const struct irq_desc *desc = irq_to_desc(irq);
+	const struct irq_desc *desc = irq_to_desc(data->irq);
 
 	if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
 	    (IRQ_DISABLED|IRQ_PENDING))
-		shutdown_pirq(irq);
+		shutdown_pirq(data);
 	else {
 		if (!(desc->status & IRQ_DISABLED))
-			move_masked_irq(irq);
-		unmask_pirq(irq);
+			move_masked_irq(data->irq);
+		unmask_pirq(data);
 	}
 }
 
 static struct irq_chip pirq_chip = {
-	.name     = "Phys",
-	.startup  = startup_pirq,
-	.shutdown = shutdown_pirq,
-	.enable   = enable_pirq,
-	.disable  = disable_pirq,
-	.mask     = mask_pirq,
-	.unmask   = unmask_pirq,
-	.end      = end_pirq,
-	.eoi      = end_pirq,
-	.set_type = set_type_pirq,
+	.name             = "Phys",
+	.irq_startup      = startup_pirq,
+	.irq_shutdown     = shutdown_pirq,
+	.irq_enable       = enable_pirq,
+	.irq_disable      = disable_pirq,
+	.irq_mask         = mask_pirq,
+	.irq_unmask       = unmask_pirq,
+	.irq_eoi          = end_pirq,
+	.irq_set_type     = set_type_pirq,
 #ifdef CONFIG_SMP
-	.set_affinity = set_affinity_irq,
+	.irq_set_affinity = set_affinity_irq,
 #endif
-	.retrigger = resend_irq_on_evtchn,
+	.irq_retrigger    = resend_irq_on_evtchn,
 };
 
 int irq_ignore_unhandled(unsigned int irq)
@@ -1092,7 +1151,7 @@ static void restore_cpu_virqs(unsigned i
 		/* Record the new mapping. */
 		evtchn_to_irq[evtchn] = irq;
 		irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, evtchn);
-		_bind_evtchn_to_cpu(evtchn, cpu, -1, NULL);
+		_bind_evtchn_to_cpu(evtchn, cpu, NULL, NULL);
 
 		/* Ready for use. */
 		unmask_evtchn(evtchn);
@@ -1120,7 +1179,7 @@ static void restore_cpu_ipis(unsigned in
 		/* Record the new mapping. */
 		evtchn_to_irq[evtchn] = irq;
 		irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, evtchn);
-		_bind_evtchn_to_cpu(evtchn, cpu, -1, NULL);
+		_bind_evtchn_to_cpu(evtchn, cpu, NULL, NULL);
 
 		/* Ready for use. */
 		if (!(irq_to_desc(irq)->status & IRQ_DISABLED))
@@ -1180,29 +1239,41 @@ int __init arch_early_irq_init(void)
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(_irq_cfg); i++)
-		irq_to_desc(i)->chip_data = _irq_cfg + i;
+		set_irq_chip_data(i, _irq_cfg + i);
 
 	return 0;
 }
 
-#ifdef CONFIG_SPARSE_IRQ
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
 {
-	if (!desc->chip_data) {
+	int res = irq_alloc_desc_at(at, node);
+	struct irq_cfg *cfg = NULL;
+
+	if (res < 0) {
+		if (res != -EEXIST)
+			return NULL;
+		cfg = get_irq_chip_data(at);
+		if (cfg)
+			return cfg;
+	}
+
+#ifdef CONFIG_SPARSE_IRQ
 #ifdef CONFIG_SMP
-		/* By default all event channels notify CPU#0. */
-		cpumask_copy(desc->affinity, cpumask_of(0));
+	/* By default all event channels notify CPU#0. */
+	cpumask_copy(irq_get_irq_data(at)->affinity, cpumask_of(0));
 #endif
-		desc->chip_data = kzalloc(sizeof(struct irq_cfg), GFP_ATOMIC);
-	}
-	if (!desc->chip_data) {
-		pr_emerg("cannot alloc irq_cfg\n");
-		BUG();
-	}
 
-	return 0;
-}
+	cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+	if (cfg)
+		set_irq_chip_data(at, cfg);
+	else
+		irq_free_desc(at);
+
+	return cfg;
+#else
+	return irq_cfg(at);
 #endif
+}
 
 #ifdef CONFIG_SPARSE_IRQ
 #ifdef CONFIG_X86_IO_APIC
@@ -1239,7 +1310,7 @@ int __init arch_probe_nr_irqs(void)
 
 	printk(KERN_DEBUG "nr_pirqs: %d\n", nr_pirqs);
 
-	return 0;
+	return ARRAY_SIZE(_irq_cfg);
 }
 #endif
 
@@ -1271,10 +1342,12 @@ int assign_irq_vector(int irq, struct ir
 
 void evtchn_register_pirq(int irq)
 {
+	struct irq_cfg *cfg = irq_cfg(irq);
+
 	BUG_ON(irq < PIRQ_BASE || irq - PIRQ_BASE >= nr_pirqs);
-	if (identity_mapped_irq(irq) || type_from_irq(irq) != IRQT_UNBOUND)
+	if (identity_mapped_irq(irq) || type_from_irq_cfg(cfg) != IRQT_UNBOUND)
 		return;
-	irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, irq, 0);
+	cfg->info = mk_irq_info(IRQT_PIRQ, irq, 0);
 	set_irq_chip_and_handler_name(irq, &pirq_chip, handle_fasteoi_irq,
 				      "fasteoi");
 }
@@ -1283,21 +1356,15 @@ int evtchn_map_pirq(int irq, int xen_pir
 {
 	if (irq < 0) {
 #ifdef CONFIG_SPARSE_IRQ
-		spin_lock(&irq_mapping_update_lock);
-		irq = find_unbound_irq(numa_node_id(), &pirq_chip);
-		if (irq >= 0) {
-			struct irq_desc *desc;
-			struct irq_cfg *cfg;
+		struct irq_cfg *cfg;
 
-			desc = irq_to_desc_alloc_node(irq, numa_node_id());
-			cfg = desc->chip_data;
-			BUG_ON(type_from_irq(irq) != IRQT_UNBOUND);
-			cfg->bindcount++;
-			cfg->info = mk_irq_info(IRQT_PIRQ, xen_pirq, 0);
-		}
-		spin_unlock(&irq_mapping_update_lock);
+		irq = find_unbound_irq(numa_node_id(), &cfg, &pirq_chip);
 		if (irq < 0)
 			return irq;
+		spin_lock(&irq_mapping_update_lock);
+		BUG_ON(type_from_irq_cfg(cfg) != IRQT_UNBOUND);
+		cfg->info = mk_irq_info(IRQT_PIRQ, xen_pirq, 0);
+		spin_unlock(&irq_mapping_update_lock);
 	} else if (irq >= PIRQ_BASE && irq < PIRQ_BASE + nr_pirqs) {
 		WARN_ONCE(1, "Non-MSI IRQ#%d (Xen %d)\n", irq, xen_pirq);
 		return -EINVAL;
@@ -1307,15 +1374,17 @@ int evtchn_map_pirq(int irq, int xen_pir
 		irq = PIRQ_BASE + nr_pirqs - 1;
 		spin_lock(&irq_alloc_lock);
 		do {
-			struct irq_desc *desc;
 			struct irq_cfg *cfg;
 
 			if (identity_mapped_irq(irq))
 				continue;
-			desc = irq_to_desc_alloc_node(irq, numa_node_id());
-			cfg = desc->chip_data;
-			if (!index_from_irq(irq)) {
-				BUG_ON(type_from_irq(irq) != IRQT_UNBOUND);
+			cfg = alloc_irq_and_cfg_at(irq, numa_node_id());
+			if (unlikely(!cfg)) {
+				spin_unlock(&irq_alloc_lock);
+				return -ENOMEM;
+			}
+			if (!index_from_irq_cfg(cfg)) {
+				BUG_ON(type_from_irq_cfg(cfg) != IRQT_UNBOUND);
 				cfg->info = mk_irq_info(IRQT_PIRQ,
 							xen_pirq, 0);
 				break;
@@ -1328,18 +1397,14 @@ int evtchn_map_pirq(int irq, int xen_pir
 					      handle_fasteoi_irq, "fasteoi");
 #endif
 	} else if (!xen_pirq) {
-		if (unlikely(type_from_irq(irq) != IRQT_PIRQ))
+		struct irq_cfg *cfg = irq_cfg(irq);
+
+		if (!cfg || unlikely(type_from_irq_cfg(cfg) != IRQT_PIRQ))
 			return -EINVAL;
-		/*
-		 * dynamic_irq_cleanup(irq) would seem to be the correct thing
-		 * here, but cannot be used as we get here also during shutdown
-		 * when a driver didn't free_irq() its MSI(-X) IRQ(s), which
-		 * then causes a warning in dynamic_irq_cleanup().
-		 */
 		set_irq_chip_and_handler(irq, NULL, NULL);
-		irq_cfg(irq)->info = IRQ_UNBOUND;
+		cfg->info = IRQ_UNBOUND;
 #ifdef CONFIG_SPARSE_IRQ
-		irq_cfg(irq)->bindcount--;
+		cfg->bindcount--;
 #endif
 		return 0;
 	} else if (type_from_irq(irq) != IRQT_PIRQ
@@ -1354,10 +1419,12 @@ int evtchn_map_pirq(int irq, int xen_pir
 
 int evtchn_get_xen_pirq(int irq)
 {
+	struct irq_cfg *cfg = irq_cfg(irq);
+
 	if (identity_mapped_irq(irq))
 		return irq;
-	BUG_ON(type_from_irq(irq) != IRQT_PIRQ);
-	return index_from_irq(irq);
+	BUG_ON(type_from_irq_cfg(cfg) != IRQT_PIRQ);
+	return index_from_irq_cfg(cfg);
 }
 
 void __init xen_init_IRQ(void)
--- head.orig/drivers/xen/core/smpboot.c	2012-03-22 16:21:46.000000000 +0100
+++ head/drivers/xen/core/smpboot.c	2012-03-22 16:22:20.000000000 +0100
@@ -32,7 +32,7 @@ extern void smp_trap_init(trap_info_t *)
 
 cpumask_var_t vcpu_initialized_mask;
 
-DEFINE_PER_CPU(struct cpuinfo_x86, cpu_info);
+DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
 static DEFINE_PER_CPU(int, resched_irq);
@@ -44,6 +44,11 @@ static char callfunc_name[NR_CPUS][15];
 static char call1func_name[NR_CPUS][15];
 static char reboot_name[NR_CPUS][15];
 
+#ifdef CONFIG_IRQ_WORK
+static DEFINE_PER_CPU(int, irq_work_irq);
+static char irq_work_name[NR_CPUS][15];
+#endif
+
 void __init prefill_possible_map(void)
 {
 	int i, rc;
@@ -74,6 +79,9 @@ static int __cpuinit xen_smp_intr_init(u
 	int rc;
 
 	per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) =
+#ifdef CONFIG_IRQ_WORK
+		per_cpu(irq_work_irq, cpu) =
+#endif
 		per_cpu(call1func_irq, cpu) = per_cpu(reboot_irq, cpu) = -1;
 
 	sprintf(resched_name[cpu], "resched%u", cpu);
@@ -120,6 +128,19 @@ static int __cpuinit xen_smp_intr_init(u
 		goto fail;
 	per_cpu(reboot_irq, cpu) = rc;
 
+#ifdef CONFIG_IRQ_WORK
+	sprintf(irq_work_name[cpu], "irqwork%u", cpu);
+	rc = bind_ipi_to_irqhandler(IRQ_WORK_VECTOR,
+				    cpu,
+				    smp_irq_work_interrupt,
+				    IRQF_DISABLED|IRQF_NOBALANCING,
+				    irq_work_name[cpu],
+				    NULL);
+	if (rc < 0)
+		goto fail;
+	per_cpu(irq_work_irq, cpu) = rc;
+#endif
+
 	rc = xen_spinlock_init(cpu);
 	if (rc < 0)
 		goto fail;
@@ -138,6 +159,10 @@ static int __cpuinit xen_smp_intr_init(u
 		unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
 	if (per_cpu(reboot_irq, cpu) >= 0)
 		unbind_from_irqhandler(per_cpu(reboot_irq, cpu), NULL);
+#ifdef CONFIG_IRQ_WORK
+	if (per_cpu(irq_work_irq, cpu) >= 0)
+		unbind_from_irqhandler(per_cpu(irq_work_irq, cpu), NULL);
+#endif
 	xen_spinlock_cleanup(cpu);
 	return rc;
 }
@@ -151,6 +176,9 @@ static void __cpuinit xen_smp_intr_exit(
 	unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
 	unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
 	unbind_from_irqhandler(per_cpu(reboot_irq, cpu), NULL);
+#ifdef CONFIG_IRQ_WORK
+	unbind_from_irqhandler(per_cpu(irq_work_irq, cpu), NULL);
+#endif
 	xen_spinlock_cleanup(cpu);
 }
 
--- head.orig/drivers/xen/core/spinlock.c	2014-01-07 17:14:50.000000000 +0100
+++ head/drivers/xen/core/spinlock.c	2014-01-07 17:15:16.000000000 +0100
@@ -24,7 +24,7 @@ struct spinning {
 	struct spinning *prev;
 };
 static DEFINE_PER_CPU(struct spinning *, _spinning);
-static DEFINE_PER_CPU(evtchn_port_t, poll_evtchn);
+static DEFINE_PER_CPU_READ_MOSTLY(evtchn_port_t, poll_evtchn);
 /*
  * Protect removal of objects: Addition can be done lockless, and even
  * removal itself doesn't need protection - what needs to be prevented is
@@ -227,7 +227,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
 		   unsigned int flags)
 {
 	typeof(vcpu_info(0)->evtchn_upcall_mask) upcall_mask
-		= __raw_local_save_flags();
+		= arch_local_save_flags();
 	struct spinning spinning;
 
 	/* If kicker interrupt not initialized yet, just spin. */
@@ -251,7 +251,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
 				break;
 			}
 	}
-	raw_local_irq_disable();
+	arch_local_irq_disable();
 #endif
 	smp_wmb();
 	percpu_write(_spinning, &spinning);
@@ -278,7 +278,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
 		if (upcall_mask > flags) {
 			spinning.irq_count = percpu_read(_irq_count);
 			smp_wmb();
-			raw_local_irq_restore(flags);
+			arch_local_irq_restore(flags);
 		}
 #endif
 
@@ -287,7 +287,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
 			BUG();
 
 #if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
-		raw_local_irq_disable();
+		arch_local_irq_disable();
 		smp_wmb();
 		spinning.irq_count = UINT_MAX;
 #endif
@@ -306,9 +306,9 @@ bool xen_spin_wait(arch_spinlock_t *lock
 	/* announce we're done */
 	percpu_write(_spinning, spinning.prev);
 	if (!CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING)
-		raw_local_irq_disable();
+		arch_local_irq_disable();
 	sequence();
-	raw_local_irq_restore(upcall_mask);
+	arch_local_irq_restore(upcall_mask);
 	smp_rmb();
 	if (lock->cur == spinning.ticket)
 		return true;
@@ -352,7 +352,7 @@ void xen_spin_kick(const arch_spinlock_t
 		atomic_t *rm_ctr = NULL;
 		struct spinning *spinning;
 
-		flags = __raw_local_irq_save();
+		flags = arch_local_irq_save();
 		if (cpu == local)
 			spinning = percpu_read(_spinning);
 		else for (;;) {
@@ -387,7 +387,7 @@ void xen_spin_kick(const arch_spinlock_t
 
 		if (rm_ctr)
 			atomic_dec(rm_ctr);
-		raw_local_irq_restore(flags);
+		arch_local_irq_restore(flags);
 
 		if (unlikely(spinning)) {
 #if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
--- head.orig/drivers/xen/evtchn.c	2013-09-26 13:00:02.000000000 +0200
+++ head/drivers/xen/evtchn.c	2013-09-26 13:00:15.000000000 +0200
@@ -563,7 +563,11 @@ static const struct file_operations evtc
 
 static struct miscdevice evtchn_miscdev = {
 	.minor        = MISC_DYNAMIC_MINOR,
+#ifdef CONFIG_PARAVIRT_XEN
 	.name         = "xen/evtchn",
+#else
+	.name         = "evtchn",
+#endif
 	.nodename     = "xen/evtchn",
 	.fops         = &evtchn_fops,
 };
--- head.orig/drivers/xen/fbfront/xenfb.c	2011-02-01 15:03:03.000000000 +0100
+++ head/drivers/xen/fbfront/xenfb.c	2011-04-13 14:12:22.000000000 +0200
@@ -611,10 +611,9 @@ static int __devinit xenfb_probe(struct 
 	info->refresh.data = (unsigned long)info;
 	INIT_LIST_HEAD(&info->mappings);
 
-	info->fb = vmalloc(fb_size);
+	info->fb = vzalloc(fb_size);
 	if (info->fb == NULL)
 		goto error_nomem;
-	memset(info->fb, 0, fb_size);
 
 	info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
--- head.orig/drivers/xen/gntdev/gntdev.c	2012-05-23 13:35:46.000000000 +0200
+++ head/drivers/xen/gntdev/gntdev.c	2012-05-23 13:38:40.000000000 +0200
@@ -140,6 +140,7 @@ static long gntdev_ioctl(struct file *fl
 static const struct file_operations gntdev_fops = {
 	.owner = THIS_MODULE,
 	.open = gntdev_open,
+	.llseek = no_llseek,
 	.release = gntdev_release,
 	.mmap = gntdev_mmap,
 	.unlocked_ioctl = gntdev_ioctl
@@ -394,6 +395,8 @@ static int gntdev_open(struct inode *ino
 {
 	gntdev_file_private_data_t *private_data;
 
+	nonseekable_open(inode, flip);
+
 	/* Allocate space for the per-instance private data. */
 	private_data = kmalloc(sizeof(*private_data), GFP_KERNEL);
 	if (!private_data)
--- head.orig/drivers/xen/pci.c	2014-03-31 05:40:15.000000000 +0200
+++ head/drivers/xen/pci.c	2014-02-18 17:29:14.000000000 +0100
@@ -23,14 +23,23 @@
 #include <xen/interface/physdev.h>
 #include <xen/interface/xen.h>
 
+#ifdef CONFIG_PARAVIRT_XEN
+#define CONFIG_XEN_COMPAT 0x040000
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
+#else
+#include <asm/hypervisor.h>
+#endif
 #include "../pci/pci.h"
 #ifdef CONFIG_PCI_MMCONFIG
 #include <asm/pci_x86.h>
 #endif
 
+#if CONFIG_XEN_COMPAT < 0x040200
 static bool __read_mostly pci_seg_supported = true;
+#else
+#define pci_seg_supported true
+#endif
 
 static int xen_add_device(struct device *dev)
 {
@@ -89,7 +98,9 @@ static int xen_add_device(struct device 
 		r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add);
 		if (r != -ENOSYS)
 			return r;
+#if CONFIG_XEN_COMPAT < 0x040200
 		pci_seg_supported = false;
+#endif
 	}
 
 	if (pci_domain_nr(pci_dev->bus))
--- head.orig/drivers/xen/privcmd/privcmd.c	2014-01-30 10:16:46.000000000 +0100
+++ head/drivers/xen/privcmd/privcmd.c	2014-01-30 10:18:11.000000000 +0100
@@ -449,7 +449,8 @@ static int privcmd_mmap(struct file * fi
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return -ENOSYS;
 
-	/* DONTCOPY is essential for Xen as copy_page_range is broken. */
+	/* DONTCOPY is essential for Xen because copy_page_range doesn't know
+	 * how to recreate these mappings */
 	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY;
 	vma->vm_ops = &privcmd_vm_ops;
 	vma->vm_private_data = NULL;
@@ -459,6 +460,8 @@ static int privcmd_mmap(struct file * fi
 #endif
 
 static const struct file_operations privcmd_file_ops = {
+	.open = nonseekable_open,
+	.llseek = no_llseek,
 	.unlocked_ioctl = privcmd_ioctl,
 	.mmap = privcmd_mmap,
 };
--- head.orig/drivers/xen/scsifront/scsifront.c	2012-12-06 16:09:58.000000000 +0100
+++ head/drivers/xen/scsifront/scsifront.c	2012-12-06 16:13:46.000000000 +0100
@@ -324,11 +324,12 @@ static int map_data_for_request(struct v
 	return ref_cnt;
 }
 
-static int scsifront_queuecommand(struct scsi_cmnd *sc,
-				  void (*done)(struct scsi_cmnd *))
+static int scsifront_queuecommand(struct Scsi_Host *shost,
+				  struct scsi_cmnd *sc)
 {
-	struct vscsifrnt_info *info = shost_priv(sc->device->host);
+	struct vscsifrnt_info *info = shost_priv(shost);
 	vscsiif_request_t *ring_req;
+	unsigned long flags;
 	int ref_cnt;
 	uint16_t rqid;
 
@@ -339,11 +340,13 @@ static int scsifront_queuecommand(struct
 		     sc->cmnd[2], sc->cmnd[3], sc->cmnd[4], sc->cmnd[5],
 		     sc->cmnd[6], sc->cmnd[7], sc->cmnd[8], sc->cmnd[9]);
 */
+	spin_lock_irqsave(shost->host_lock, flags);
+	scsi_cmd_get_serial(shost, sc);
 	if (RING_FULL(&info->ring)) {
-		goto out_host_busy;
+		spin_unlock_irqrestore(shost->host_lock, flags);
+		return SCSI_MLQUEUE_HOST_BUSY;
 	}
 
-	sc->scsi_done = done;
 	sc->result    = 0;
 
 	ring_req          = scsifront_pre_request(info);
@@ -371,27 +374,21 @@ static int scsifront_queuecommand(struct
 	ref_cnt = map_data_for_request(info, sc, ring_req, rqid);
 	if (ref_cnt < 0) {
 		add_id_to_freelist(info, rqid);
+		spin_unlock_irqrestore(shost->host_lock, flags);
 		if (ref_cnt == (-ENOMEM))
-			goto out_host_busy;
-		else {
-			sc->result = (DID_ERROR << 16);
-			goto out_fail_command;
-		}
+			return SCSI_MLQUEUE_HOST_BUSY;
+		sc->result = (DID_ERROR << 16);
+		sc->scsi_done(sc);
+		return 0;
 	}
 
 	ring_req->nr_segments          = (uint8_t)ref_cnt;
 	info->shadow[rqid].nr_segments = ref_cnt;
 
 	scsifront_do_request(info);
+	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	return 0;
-
-out_host_busy:
-	return SCSI_MLQUEUE_HOST_BUSY;
-
-out_fail_command:
-	done(sc);
-	return 0;
 }
 
 
--- head.orig/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h	2008-02-20 09:32:49.000000000 +0100
+++ head/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h	2011-02-01 15:09:47.000000000 +0100
@@ -54,7 +54,6 @@
 #include <linux/in6.h>
 #include <linux/spinlock.h>
 #include <linux/highmem.h>
-#include <linux/smp_lock.h>
 #include <linux/ctype.h>
 #include <linux/uio.h>
 #include <asm/current.h>
--- head.orig/drivers/xen/xenbus/xenbus_dev.c	2011-02-01 15:03:03.000000000 +0100
+++ head/drivers/xen/xenbus/xenbus_dev.c	2012-02-16 13:43:48.000000000 +0100
@@ -488,6 +488,7 @@ static const struct file_operations xenb
 	.write = xenbus_dev_write,
 	.open = xenbus_dev_open,
 	.release = xenbus_dev_release,
+	.llseek = no_llseek,
 	.poll = xenbus_dev_poll,
 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
 	.unlocked_ioctl = xenbus_dev_ioctl
--- head.orig/drivers/xen/xenbus/xenbus_probe.c	2012-03-12 13:52:22.000000000 +0100
+++ head/drivers/xen/xenbus/xenbus_probe.c	2012-03-12 13:52:29.000000000 +0100
@@ -58,6 +58,8 @@
 #include <xen/evtchn.h>
 #include <xen/features.h>
 #include <xen/gnttab.h>
+
+#define PARAVIRT_EXPORT_SYMBOL(sym) __typeof__(sym) sym
 #else
 #include <asm/xen/hypervisor.h>
 
@@ -67,6 +69,8 @@
 #include <xen/page.h>
 
 #include <xen/platform_pci.h>
+
+#define PARAVIRT_EXPORT_SYMBOL EXPORT_SYMBOL_GPL
 #endif
 
 #ifndef CONFIG_XEN
@@ -81,11 +85,10 @@
 #endif
 
 int xen_store_evtchn;
-#if !defined(CONFIG_XEN) && !defined(MODULE)
-EXPORT_SYMBOL(xen_store_evtchn);
-#endif
+PARAVIRT_EXPORT_SYMBOL(xen_store_evtchn);
 
 struct xenstore_domain_interface *xen_store_interface;
+PARAVIRT_EXPORT_SYMBOL(xen_store_interface);
 
 static unsigned long xen_store_mfn;
 
@@ -1221,9 +1224,7 @@ int __devinit
 xenbus_init(void)
 {
 	int err = 0;
-#if defined(CONFIG_XEN) || defined(MODULE)
 	unsigned long page = 0;
-#endif
 
 	DPRINTK("");
 
@@ -1241,7 +1242,6 @@ xenbus_init(void)
 	 * Domain0 doesn't have a store_evtchn or store_mfn yet.
 	 */
 	if (is_initial_xendomain()) {
-#if defined(CONFIG_XEN) || defined(MODULE)
 		struct evtchn_alloc_unbound alloc_unbound;
 
 		/* Allocate Xenstore page */
@@ -1280,9 +1280,6 @@ xenbus_init(void)
 		if (xsd_port_intf)
 			xsd_port_intf->read_proc = xsd_port_read;
 #endif
-#else
-		/* dom0 not yet supported */
-#endif
 		xen_store_interface = mfn_to_virt(xen_store_mfn);
 	} else {
 #if !defined(CONFIG_XEN) && !defined(MODULE)
@@ -1368,10 +1365,8 @@ xenbus_init(void)
 	 * registered.
 	 */
 
-#if defined(CONFIG_XEN) || defined(MODULE)
 	if (page != 0)
 		free_page(page);
-#endif
 	return err;
 }
 
--- head.orig/include/uapi/xen/Kbuild	2014-02-18 17:22:08.000000000 +0100
+++ head/include/uapi/xen/Kbuild	2014-02-18 17:29:23.000000000 +0100
@@ -1,5 +1,4 @@
 # UAPI Header export list
 header-y += gntalloc.h
 header-y += gntdev.h
-header-y += privcmd.h
 header-y += public/
--- head.orig/include/xen/evtchn.h	2012-10-23 15:29:34.000000000 +0200
+++ head/include/xen/evtchn.h	2012-10-23 15:45:43.000000000 +0200
@@ -55,6 +55,7 @@ struct irq_cfg {
 #endif
 	};
 };
+struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
 #endif
 
 /*
--- head.orig/include/xen/interface/memory.h	2014-01-30 10:17:58.000000000 +0100
+++ head/include/xen/interface/memory.h	2014-01-30 10:18:07.000000000 +0100
@@ -206,6 +206,7 @@ struct xen_machphys_mapping {
     xen_ulong_t v_start, v_end; /* Start and end virtual addresses.   */
     xen_ulong_t max_mfn;        /* Maximum MFN that can be looked up. */
 };
+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping);
 typedef struct xen_machphys_mapping xen_machphys_mapping_t;
 DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);
 
@@ -318,6 +319,7 @@ struct xen_memory_map {
      */
     XEN_GUEST_HANDLE(void) buffer;
 };
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
 typedef struct xen_memory_map xen_memory_map_t;
 DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t);
 
--- head.orig/include/uapi/xen/privcmd.h	2012-12-11 04:30:57.000000000 +0100
+++ head/include/uapi/xen/privcmd.h	2012-10-23 15:44:46.000000000 +0200
@@ -1,98 +1,3 @@
-/******************************************************************************
- * privcmd.h
- *
- * Interface to /proc/xen/privcmd.
- *
- * Copyright (c) 2003-2005, K A Fraser
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef __LINUX_PUBLIC_PRIVCMD_H__
-#define __LINUX_PUBLIC_PRIVCMD_H__
-
-#include <linux/types.h>
-#include <linux/compiler.h>
-#include <xen/interface/xen.h>
-
-struct privcmd_hypercall {
-	__u64 op;
-	__u64 arg[5];
-};
-
-struct privcmd_mmap_entry {
-	__u64 va;
-	__u64 mfn;
-	__u64 npages;
-};
-
-struct privcmd_mmap {
-	int num;
-	domid_t dom; /* target domain */
-	struct privcmd_mmap_entry __user *entry;
-};
-
-struct privcmd_mmapbatch {
-	int num;     /* number of pages to populate */
-	domid_t dom; /* target domain */
-	__u64 addr;  /* virtual address */
-	xen_pfn_t __user *arr; /* array of mfns - or'd with
-				  PRIVCMD_MMAPBATCH_*_ERROR on err */
-};
-
-#define PRIVCMD_MMAPBATCH_MFN_ERROR     0xf0000000U
-#define PRIVCMD_MMAPBATCH_PAGED_ERROR   0x80000000U
-
-struct privcmd_mmapbatch_v2 {
-	unsigned int num; /* number of pages to populate */
-	domid_t dom;      /* target domain */
-	__u64 addr;       /* virtual address */
-	const xen_pfn_t __user *arr; /* array of mfns */
-	int __user *err;  /* array of error codes */
-};
-
-/*
- * @cmd: IOCTL_PRIVCMD_HYPERCALL
- * @arg: &privcmd_hypercall_t
- * Return: Value returned from execution of the specified hypercall.
- *
- * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
- * @arg: &struct privcmd_mmapbatch_v2
- * Return: 0 on success (i.e., arg->err contains valid error codes for
- * each frame).  On an error other than a failed frame remap, -1 is
- * returned and errno is set to EINVAL, EFAULT etc.  As an exception,
- * if the operation was otherwise successful but any frame failed with
- * -ENOENT, then -1 is returned and errno is set to ENOENT.
- */
-#define IOCTL_PRIVCMD_HYPERCALL					\
-	_IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
-#define IOCTL_PRIVCMD_MMAP					\
-	_IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
-#define IOCTL_PRIVCMD_MMAPBATCH					\
-	_IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
-#define IOCTL_PRIVCMD_MMAPBATCH_V2				\
-	_IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
-
-#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
+#if defined(CONFIG_PARAVIRT_XEN) || !defined(__KERNEL__)
+#include "public/privcmd.h"
+#endif
--- head.orig/include/uapi/xen/public/privcmd.h	2011-02-01 14:38:38.000000000 +0100
+++ head/include/uapi/xen/public/privcmd.h	2011-02-01 15:09:47.000000000 +0100
@@ -34,6 +34,7 @@
 #define __LINUX_PUBLIC_PRIVCMD_H__
 
 #include <linux/types.h>
+#include <linux/compiler.h>
 
 typedef struct privcmd_hypercall
 {
--- head.orig/kernel/power/Kconfig	2014-06-26 11:21:16.000000000 +0200
+++ head/kernel/power/Kconfig	2013-12-02 17:57:40.000000000 +0100
@@ -165,7 +165,7 @@ config PM_ADVANCED_DEBUG
 
 config PM_TEST_SUSPEND
 	bool "Test suspend/resume and wakealarm during bootup"
-	depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
+	depends on SUSPEND && PM_DEBUG && RTC_CLASS=y && !XEN_UNPRIVILEGED_GUEST
 	---help---
 	This option will let you suspend your machine during bootup, and
 	make it wake up a few seconds later using an RTC wakeup alarm.
@@ -212,7 +212,7 @@ config PM_TRACE
 config PM_TRACE_RTC
 	bool "Suspend/resume event tracing"
 	depends on PM_SLEEP_DEBUG
-	depends on X86
+	depends on X86 && !XEN_UNPRIVILEGED_GUEST
 	select PM_TRACE
 	---help---
 	This enables some cheesy code to save the last PM event point in the
--- head.orig/lib/swiotlb-xen.c	2011-02-01 15:04:27.000000000 +0100
+++ head/lib/swiotlb-xen.c	2011-02-01 15:09:47.000000000 +0100
@@ -58,7 +58,7 @@ static unsigned long io_tlb_nslabs;
  */
 static unsigned long io_tlb_overflow = 32*1024;
 
-void *io_tlb_overflow_buffer;
+static void *io_tlb_overflow_buffer;
 
 /*
  * This is a free list describing the number of free entries available from
@@ -174,16 +174,16 @@ void __init swiotlb_init_with_tbl(char *
 	 * Allocate and initialize the free list array.  This array is used
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE.
 	 */
-	io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
+	io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
 	for (i = 0; i < io_tlb_nslabs; i++)
  		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
 	io_tlb_index = 0;
-	io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t));
+	io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
 
 	/*
 	 * Get the overflow emergency buffer
 	 */
-	io_tlb_overflow_buffer = alloc_bootmem(io_tlb_overflow);
+	io_tlb_overflow_buffer = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_overflow));
 	if (!io_tlb_overflow_buffer)
 		panic("Cannot allocate SWIOTLB overflow buffer!\n");
 
@@ -218,7 +218,7 @@ swiotlb_init_with_default_size(size_t de
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	io_tlb_start = alloc_bootmem_pages(bytes);
+	io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes));
 	if (!io_tlb_start)
 		panic("Cannot allocate SWIOTLB buffer");
 
--- head.orig/mm/vmalloc.c	2013-12-02 17:38:28.000000000 +0100
+++ head/mm/vmalloc.c	2013-12-02 17:57:32.000000000 +0100
@@ -541,8 +541,6 @@ static void vmap_debug_free_range(unsign
 #ifdef CONFIG_DEBUG_PAGEALLOC
 	vunmap_page_range(start, end);
 	flush_tlb_kernel_range(start, end);
-#elif defined(CONFIG_XEN) && defined(CONFIG_X86)
-	vunmap_page_range(start, end);
 #endif
 }
kernel / kernel-source

Source Code

Files