Blob Blame History Raw
From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.31
Patch-mainline: 2.6.31

 This patch contains the differences between 2.6.30 and 2.6.31.

Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.kernel.org/patch-2.6.31" by xen-port-patches.py

2.6.34/drivers/staging/vt6655/ttype.h (VOID no longer used)

--- 12.2.orig/arch/x86/Kconfig	2012-04-10 16:57:40.000000000 +0200
+++ 12.2/arch/x86/Kconfig	2012-04-10 17:00:47.000000000 +0200
@@ -21,7 +21,7 @@ config X86
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_PCSPKR_PLATFORM
-	select HAVE_PERF_EVENTS
+	select HAVE_PERF_EVENTS if !XEN
 	select HAVE_IRQ_WORK
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
@@ -898,7 +898,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
 
 config X86_MCE
 	bool "Machine Check / overheating reporting"
-	depends on !X86_XEN && !XEN_UNPRIVILEGED_GUEST
+	depends on !XEN_UNPRIVILEGED_GUEST
 	---help---
 	  Machine Check support allows the processor to notify the
 	  kernel if it detects a problem (e.g. overheating, data corruption).
@@ -923,7 +923,7 @@ config X86_MCE_AMD
 
 config X86_ANCIENT_MCE
 	bool "Support for old Pentium 5 / WinChip machine checks"
-	depends on X86_32 && X86_MCE
+	depends on X86_32 && X86_MCE && !XEN
 	---help---
 	  Include support for machine check handling on old Pentium 5 or WinChip
 	  systems. These typically need to be enabled explicitely on the command
@@ -1626,6 +1626,7 @@ config KEXEC_JUMP
 
 config PHYSICAL_START
 	hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP || XEN)
+	default 0x100000 if XEN
 	default "0x1000000"
 	---help---
 	  This gives the physical address where the kernel is loaded.
--- 12.2.orig/arch/x86/ia32/ia32entry-xen.S	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/ia32/ia32entry-xen.S	2011-02-01 14:50:44.000000000 +0100
@@ -770,9 +770,11 @@ ia32_sys_call_table:
 	.quad compat_sys_signalfd4
 	.quad sys_eventfd2
 	.quad sys_epoll_create1
-	.quad sys_dup3			/* 330 */
+	.quad sys_dup3				/* 330 */
 	.quad sys_pipe2
 	.quad sys_inotify_init1
 	.quad compat_sys_preadv
 	.quad compat_sys_pwritev
+	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
+	.quad sys_perf_counter_open
 ia32_syscall_end:
--- 12.2.orig/arch/x86/include/asm/hw_irq.h	2011-09-07 15:51:08.000000000 +0200
+++ 12.2/arch/x86/include/asm/hw_irq.h	2011-09-07 15:58:21.000000000 +0200
@@ -165,6 +165,7 @@ extern asmlinkage void smp_invalidate_in
 extern irqreturn_t smp_reschedule_interrupt(int, void *);
 extern irqreturn_t smp_call_function_interrupt(int, void *);
 extern irqreturn_t smp_call_function_single_interrupt(int, void *);
+extern irqreturn_t smp_reboot_interrupt(int, void *);
 #endif
 #endif
 
--- 12.2.orig/arch/x86/include/asm/required-features.h	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/arch/x86/include/asm/required-features.h	2011-04-13 13:55:08.000000000 +0200
@@ -48,7 +48,7 @@
 #endif
 
 #ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT
+#if defined(CONFIG_PARAVIRT) || defined(CONFIG_XEN)
 /* Paravirtualized systems may not have PSE or PGE available */
 #define NEED_PSE	0
 #define NEED_PGE	0
--- 12.2.orig/arch/x86/include/mach-xen/asm/agp.h	2011-02-01 14:39:24.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/agp.h	2011-02-01 14:50:44.000000000 +0100
@@ -48,6 +48,7 @@
 /* Convert a physical address to an address suitable for the GART. */
 #define phys_to_gart(x) phys_to_machine(x)
 #define gart_to_phys(x) machine_to_phys(x)
+#define page_to_gart(x) phys_to_gart(page_to_pseudophys(x))
 
 /* GATT allocation. Returns/accepts GATT kernel virtual address. */
 #define alloc_gatt_pages(order)	({                                          \
--- 12.2.orig/arch/x86/include/mach-xen/asm/desc.h	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/desc.h	2011-02-01 14:50:44.000000000 +0100
@@ -1,7 +1,6 @@
 #ifndef _ASM_X86_DESC_H
 #define _ASM_X86_DESC_H
 
-#ifndef __ASSEMBLY__
 #include <asm/desc_defs.h>
 #include <asm/ldt.h>
 #include <asm/mmu.h>
@@ -406,29 +405,4 @@ static inline void set_system_intr_gate_
 }
 #endif
 
-#else
-/*
- * GET_DESC_BASE reads the descriptor base of the specified segment.
- *
- * Args:
- *    idx - descriptor index
- *    gdt - GDT pointer
- *    base - 32bit register to which the base will be written
- *    lo_w - lo word of the "base" register
- *    lo_b - lo byte of the "base" register
- *    hi_b - hi byte of the low word of the "base" register
- *
- * Example:
- *    GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
- *    Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
- */
-#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
-	movb idx * 8 + 4(gdt), lo_b;			\
-	movb idx * 8 + 7(gdt), hi_b;			\
-	shll $16, base;					\
-	movw idx * 8 + 2(gdt), lo_w;
-
-
-#endif /* __ASSEMBLY__ */
-
 #endif /* _ASM_X86_DESC_H */
--- 12.2.orig/arch/x86/include/mach-xen/asm/fixmap.h	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/fixmap.h	2011-02-01 14:50:44.000000000 +0100
@@ -118,12 +118,9 @@ enum fixed_addresses {
 #ifdef CONFIG_PARAVIRT
 	FIX_PARAVIRT_BOOTMAP,
 #endif
-	FIX_TEXT_POKE0,	/* reserve 2 pages for text_poke() */
-	FIX_TEXT_POKE1,
+	FIX_TEXT_POKE1,	/* reserve 2 pages for text_poke() */
+	FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
 	__end_of_permanent_fixed_addresses,
-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
-	FIX_OHCI1394_BASE,
-#endif
 	/*
 	 * 256 temporary boot-time mappings, used by early_ioremap(),
 	 * before ioremap() is functional.
@@ -136,6 +133,9 @@ enum fixed_addresses {
 	FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
 			(__end_of_permanent_fixed_addresses & 255),
 	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+	FIX_OHCI1394_BASE,
+#endif
 #ifdef CONFIG_X86_32
 	FIX_WP_TEST,
 #endif
--- 12.2.orig/arch/x86/include/mach-xen/asm/hypercall.h	2011-08-23 13:33:05.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/hypercall.h	2011-08-23 13:34:34.000000000 +0200
@@ -272,7 +272,7 @@ HYPERVISOR_memory_op(
 	unsigned int cmd, void *arg)
 {
 	if (arch_use_lazy_mmu_mode())
-		xen_multicall_flush(false);
+		xen_multicall_flush();
 	return _hypercall2(int, memory_op, cmd, arg);
 }
 
@@ -343,7 +343,7 @@ HYPERVISOR_grant_table_op(
 	int rc;
 
 	if (arch_use_lazy_mmu_mode())
-		xen_multicall_flush(false);
+		xen_multicall_flush();
 #ifdef GNTTABOP_map_grant_ref
 	if (cmd == GNTTABOP_map_grant_ref)
 #endif
--- 12.2.orig/arch/x86/include/mach-xen/asm/hypervisor.h	2012-02-07 11:57:59.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/hypervisor.h	2012-06-06 14:02:48.000000000 +0200
@@ -140,7 +140,7 @@ void scrub_pages(void *, unsigned int);
 
 DECLARE_PER_CPU(bool, xen_lazy_mmu);
 
-void xen_multicall_flush(bool);
+void xen_multicall_flush(void);
 
 int __must_check xen_multi_update_va_mapping(unsigned long va, pte_t,
 					     unsigned long flags);
@@ -158,7 +158,7 @@ static inline void arch_enter_lazy_mmu_m
 static inline void arch_leave_lazy_mmu_mode(void)
 {
 	percpu_write(xen_lazy_mmu, false);
-	xen_multicall_flush(false);
+	xen_multicall_flush();
 }
 
 #define arch_use_lazy_mmu_mode() unlikely(percpu_read(xen_lazy_mmu))
@@ -172,13 +172,13 @@ static inline void arch_leave_lazy_mmu_m
 static inline void arch_flush_lazy_mmu_mode(void)
 {
 	if (arch_use_lazy_mmu_mode())
-		xen_multicall_flush(false);
+		xen_multicall_flush();
 }
 #endif
 
 #else /* !CONFIG_XEN || MODULE */
 
-static inline void xen_multicall_flush(bool ignore) {}
+static inline void xen_multicall_flush(void) {}
 #define arch_use_lazy_mmu_mode() false
 #define xen_multi_update_va_mapping(...) ({ BUG(); -ENOSYS; })
 #define xen_multi_mmu_update(...) ({ BUG(); -ENOSYS; })
@@ -379,4 +379,9 @@ static inline void MULTI_bug(multicall_e
 
 #define uvm_multi(cpumask) ((unsigned long)cpus_addr(cpumask) | UVMF_MULTI)
 
+#ifdef LINUX
+/* drivers/staging/ use Windows-style types, including VOID */
+#undef VOID
+#endif
+
 #endif /* __HYPERVISOR_H__ */
--- 12.2.orig/arch/x86/include/mach-xen/asm/irq_vectors.h	2011-02-15 17:33:07.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/irq_vectors.h	2011-02-15 17:33:45.000000000 +0100
@@ -1,8 +1,11 @@
 #ifndef _ASM_X86_IRQ_VECTORS_H
 #define _ASM_X86_IRQ_VECTORS_H
 
+#define MCE_VECTOR			0x12
+
 #ifdef CONFIG_X86_32
 # define SYSCALL_VECTOR			0x80
+# define IA32_SYSCALL_VECTOR		0x80
 #else
 # define IA32_SYSCALL_VECTOR		0x80
 #endif
@@ -11,7 +14,8 @@
 #define CALL_FUNCTION_VECTOR		1
 #define NMI_VECTOR			0x02
 #define CALL_FUNC_SINGLE_VECTOR		3
-#define NR_IPIS				4
+#define REBOOT_VECTOR			4
+#define NR_IPIS				5
 
 /*
  * The maximum number of vectors supported by i386 processors
--- 12.2.orig/arch/x86/include/mach-xen/asm/pci.h	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pci.h	2011-02-01 14:50:44.000000000 +0100
@@ -97,7 +97,8 @@ extern void pci_iommu_alloc(void);
 
 #define PCI_DMA_BUS_IS_PHYS 0
 
-#if defined(CONFIG_X86_64) || defined(CONFIG_DMA_API_DEBUG) || defined(CONFIG_SWIOTLB)
+#if defined(CONFIG_X86_64) || defined(CONFIG_DMAR) || defined(CONFIG_DMA_API_DEBUG) \
+	|| defined(CONFIG_SWIOTLB)
 
 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)       \
 	        dma_addr_t ADDR_NAME;
@@ -136,6 +137,7 @@ extern void pci_iommu_alloc(void);
 
 /* generic pci stuff */
 #include <asm-generic/pci.h>
+#define PCIBIOS_MAX_MEM_32 0xffffffff
 
 #ifdef CONFIG_NUMA
 /* Returns the node based on pci bus */
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgalloc.h	2011-02-01 14:39:24.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pgalloc.h	2011-02-01 14:50:44.000000000 +0100
@@ -51,7 +51,13 @@ static inline void pte_free(struct mm_st
 	__pte_free(pte);
 }
 
-extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
+extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+				  unsigned long address)
+{
+	___pte_free_tlb(tlb, pte);
+}
 
 static inline void pmd_populate_kernel(struct mm_struct *mm,
 				       pmd_t *pmd, pte_t *pte)
@@ -92,7 +98,13 @@ static inline void pmd_free(struct mm_st
 	__pmd_free(virt_to_page(pmd));
 }
 
-extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
+extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
+
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+				  unsigned long adddress)
+{
+	___pmd_free_tlb(tlb, pmd);
+}
 
 #ifdef CONFIG_X86_PAE
 extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
@@ -145,7 +157,14 @@ static inline void pud_free(struct mm_st
 	__pmd_free(virt_to_page(pud));
 }
 
-extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
+extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
+
+static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+				  unsigned long address)
+{
+	___pud_free_tlb(tlb, pud);
+}
+
 #endif	/* PAGETABLE_LEVELS > 3 */
 #endif	/* PAGETABLE_LEVELS > 2 */
 
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgtable.h	2011-03-23 09:58:42.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pgtable.h	2011-03-23 09:59:16.000000000 +0100
@@ -2,6 +2,7 @@
 #define _ASM_X86_PGTABLE_H
 
 #include <asm/page.h>
+#include <asm/e820.h>
 
 #include <asm/pgtable_types.h>
 
@@ -75,6 +76,8 @@ static inline void __init paravirt_paget
 #define pte_val(x)	xen_pte_val(x)
 #define __pte(x)	xen_make_pte(x)
 
+#define arch_end_context_switch(prev)	do {} while(0)
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
@@ -261,10 +264,17 @@ static inline pgprot_t pgprot_modify(pgp
 
 #define canon_pgprot(p) __pgprot(massage_pgprot(p))
 
-static inline int is_new_memtype_allowed(unsigned long flags,
-						unsigned long new_flags)
+static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
+					 unsigned long flags,
+					 unsigned long new_flags)
 {
 	/*
+	 * PAT type is always WB for ISA. So no need to check.
+	 */
+	if (is_ISA_range(paddr, paddr + size - 1))
+		return 1;
+
+	/*
 	 * Certain new memtypes are not allowed with certain
 	 * requested memtype:
 	 * - request is uncached, return cannot be write-back
@@ -309,6 +319,11 @@ static inline int pte_present(pte_t a)
 	return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
 }
 
+static inline int pte_hidden(pte_t pte)
+{
+	return pte_flags(pte) & _PAGE_HIDDEN;
+}
+
 static inline int pmd_present(pmd_t pmd)
 {
 #if CONFIG_XEN_COMPAT <= 0x030002
@@ -508,6 +523,8 @@ static inline int pgd_none(pgd_t pgd)
 
 #ifndef __ASSEMBLY__
 
+#define direct_gbpages 0
+
 /* local pte updates need not use xchg for locking */
 static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res)
 {
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgtable_32.h	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pgtable_32.h	2011-02-01 14:50:44.000000000 +0100
@@ -48,13 +48,17 @@ extern void set_pmd_pfn(unsigned long, u
 #endif
 
 #if defined(CONFIG_HIGHPTE)
+#define __KM_PTE			\
+	(in_nmi() ? KM_NMI_PTE : 	\
+	 in_irq() ? KM_IRQ_PTE :	\
+	 KM_PTE0)
 #define pte_offset_map(dir, address)					\
-	((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) +		\
+	((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) +		\
 	 pte_index((address)))
 #define pte_offset_map_nested(dir, address)				\
 	((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) +		\
 	 pte_index((address)))
-#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0)
+#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
 #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
 #else
 #define pte_offset_map(dir, address)					\
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgtable_64.h	2011-03-23 09:58:46.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pgtable_64.h	2011-03-23 09:59:12.000000000 +0100
@@ -33,10 +33,6 @@ extern pgd_t init_level4_pgt[];
 
 extern void paging_init(void);
 
-#endif /* !__ASSEMBLY__ */
-
-#ifndef __ASSEMBLY__
-
 #define pte_ERROR(e)							\
 	printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n",		\
 	       __FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
@@ -137,8 +133,6 @@ static inline int pgd_large(pgd_t pgd) {
 
 #define update_mmu_cache(vma, address, pte) do { } while (0)
 
-#define direct_gbpages 0
-
 /* Encode and de-code a swap entry */
 #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
 #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
@@ -173,10 +167,7 @@ extern void cleanup_highmap(void);
 
 /* fs/proc/kcore.c */
 #define	kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK)
-#define	kc_offset_to_vaddr(o)				\
-	(((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1)))	\
-	 ? ((o) | ~__VIRTUAL_MASK)			\
-	 : (o))
+#define	kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK)
 
 #define __HAVE_ARCH_PTE_SAME
 #endif /* !__ASSEMBLY__ */
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgtable_64_types.h	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pgtable_64_types.h	2011-02-01 14:50:44.000000000 +0100
@@ -51,11 +51,12 @@ typedef union { pteval_t pte; unsigned i
 #define PGDIR_SIZE	(_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
 
+/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
 #define MAX_PHYSMEM_BITS 43
 #define MAXMEM		 _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
-#define VMALLOC_START    _AC(0xffffc20000000000, UL)
-#define VMALLOC_END      _AC(0xffffe1ffffffffff, UL)
-#define VMEMMAP_START	 _AC(0xffffe20000000000, UL)
+#define VMALLOC_START    _AC(0xffffc90000000000, UL)
+#define VMALLOC_END      _AC(0xffffe8ffffffffff, UL)
+#define VMEMMAP_START	 _AC(0xffffea0000000000, UL)
 #define MODULES_VADDR    _AC(0xffffffffa0000000, UL)
 #define MODULES_END      _AC(0xffffffffff000000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgtable_types.h	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pgtable_types.h	2011-02-01 14:50:44.000000000 +0100
@@ -18,7 +18,7 @@
 #define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
 #define _PAGE_BIT_UNUSED1	9	/* available for programmer */
 #define _PAGE_BIT_IOMAP		10	/* flag used to indicate IO mapping */
-#define _PAGE_BIT_UNUSED3	11
+#define _PAGE_BIT_HIDDEN	11	/* hidden by kmemcheck */
 #define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
 #define _PAGE_BIT_SPECIAL	_PAGE_BIT_UNUSED1
 #define _PAGE_BIT_CPA_TEST	_PAGE_BIT_UNUSED1
@@ -41,13 +41,18 @@
 #define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
 #define _PAGE_UNUSED1	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
 #define _PAGE_IOMAP	(_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
-#define _PAGE_UNUSED3	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
 #define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)
 #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
 #define _PAGE_SPECIAL	(_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
 #define _PAGE_CPA_TEST	(_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
 #define __HAVE_ARCH_PTE_SPECIAL
 
+#ifdef CONFIG_KMEMCHECK
+#define _PAGE_HIDDEN	(_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
+#else
+#define _PAGE_HIDDEN	(_AT(pteval_t, 0))
+#endif
+
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 #define _PAGE_NX	(_AT(pteval_t, 1) << _PAGE_BIT_NX)
 #else
@@ -330,7 +335,6 @@ typedef struct page *pgtable_t;
 
 extern pteval_t __supported_pte_mask;
 extern int nx_enabled;
-extern void set_nx(void);
 
 #define pgprot_writecombine	pgprot_writecombine
 extern pgprot_t pgprot_writecombine(pgprot_t prot);
--- 12.2.orig/arch/x86/include/mach-xen/asm/processor.h	2011-03-03 16:45:53.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/processor.h	2011-03-03 16:46:07.000000000 +0100
@@ -146,7 +146,8 @@ struct cpuinfo_x86 {
 extern struct cpuinfo_x86	boot_cpu_data;
 extern struct cpuinfo_x86	new_cpu_data;
 
-extern __u32			cleared_cpu_caps[NCAPINTS];
+extern __u32			cpu_caps_cleared[NCAPINTS];
+extern __u32			cpu_caps_set[NCAPINTS];
 
 #ifdef CONFIG_SMP
 DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
@@ -427,9 +428,6 @@ DECLARE_PER_CPU(unsigned long, stack_can
 extern unsigned int xstate_size;
 extern void free_thread_xstate(struct task_struct *);
 extern struct kmem_cache *task_xstate_cachep;
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern unsigned short num_cache_leaves;
 
 struct thread_struct {
 	/* Cached TLS descriptors: */
@@ -444,8 +442,12 @@ struct thread_struct {
 	unsigned short		fsindex;
 	unsigned short		gsindex;
 #endif
+#ifdef CONFIG_X86_32
 	unsigned long		ip;
+#endif
+#ifdef CONFIG_X86_64
 	unsigned long		fs;
+#endif
 	unsigned long		gs;
 	/* Hardware debugging registers: */
 	unsigned long		debugreg0;
@@ -474,14 +476,8 @@ struct thread_struct {
 	unsigned		io_bitmap_max;
 /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
 	unsigned long	debugctlmsr;
-#ifdef CONFIG_X86_DS
-/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
+	/* Debug Store context; see asm/ds.h */
 	struct ds_context	*ds_ctx;
-#endif /* CONFIG_X86_DS */
-#ifdef CONFIG_X86_PTRACE_BTS
-/* the signal to send on a bts buffer overflow */
-	unsigned int	bts_ovfl_signal;
-#endif /* CONFIG_X86_PTRACE_BTS */
 };
 
 static inline unsigned long xen_get_debugreg(int regno)
@@ -751,6 +747,21 @@ static inline unsigned long get_debugctl
     return debugctlmsr;
 }
 
+static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
+{
+	u64 debugctlmsr = 0;
+	u32 val1, val2;
+
+#ifndef CONFIG_X86_DEBUGCTLMSR
+	if (boot_cpu_data.x86 < 6)
+		return 0;
+#endif
+	rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
+	debugctlmsr = val1 | ((u64)val2 << 32);
+
+	return debugctlmsr;
+}
+
 static inline void update_debugctlmsr(unsigned long debugctlmsr)
 {
 #ifndef CONFIG_X86_DEBUGCTLMSR
@@ -760,6 +771,18 @@ static inline void update_debugctlmsr(un
 	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 }
 
+static inline void update_debugctlmsr_on_cpu(int cpu,
+					     unsigned long debugctlmsr)
+{
+#ifndef CONFIG_X86_DEBUGCTLMSR
+	if (boot_cpu_data.x86 < 6)
+		return;
+#endif
+	wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
+		     (u32)((u64)debugctlmsr),
+		     (u32)((u64)debugctlmsr >> 32));
+}
+
 /*
  * from system description table in BIOS. Mostly for MCA use, but
  * others may find it useful:
@@ -770,6 +793,7 @@ extern unsigned int		BIOS_revision;
 
 /* Boot loader type from the setup header: */
 extern int			bootloader_type;
+extern int			bootloader_version;
 
 extern char			ignore_fpu_irq;
 
@@ -830,7 +854,6 @@ static inline void spin_lock_prefetch(co
 	.vm86_info		= NULL,					  \
 	.sysenter_cs		= __KERNEL_CS,				  \
 	.io_bitmap_ptr		= NULL,					  \
-	.fs			= __KERNEL_PERCPU,			  \
 }
 
 /*
--- 12.2.orig/arch/x86/include/mach-xen/asm/smp.h	2011-04-12 17:22:07.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/smp.h	2011-04-13 13:55:08.000000000 +0200
@@ -198,7 +198,7 @@ extern unsigned disabled_cpus __cpuinitd
 static inline int logical_smp_processor_id(void)
 {
 	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+	return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
 }
 
 #endif
--- 12.2.orig/arch/x86/include/mach-xen/asm/spinlock.h	2012-04-03 08:27:53.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/spinlock.h	2012-04-03 08:28:14.000000000 +0200
@@ -445,4 +445,8 @@ static inline void __raw_write_unlock(ra
 #define _raw_read_relax(lock)	cpu_relax()
 #define _raw_write_relax(lock)	cpu_relax()
 
+/* The {read|write|spin}_lock() on x86 are full memory barriers. */
+static inline void smp_mb__after_lock(void) { }
+#define ARCH_HAS_SMP_MB_AFTER_LOCK
+
 #endif /* _ASM_X86_SPINLOCK_H */
--- 12.2.orig/arch/x86/include/mach-xen/asm/tlbflush.h	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/tlbflush.h	2011-02-01 14:50:44.000000000 +0100
@@ -111,6 +111,6 @@ static inline void flush_tlb_kernel_rang
 	flush_tlb_all();
 }
 
-extern void zap_low_mappings(void);
+extern void zap_low_mappings(bool early);
 
 #endif /* _ASM_X86_TLBFLUSH_H */
--- 12.2.orig/arch/x86/include/mach-xen/asm/xor.h	2011-02-01 14:39:24.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/xor.h	2011-02-01 14:50:44.000000000 +0100
@@ -1,4 +1,7 @@
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_KMEMCHECK
+/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
+# include <asm-generic/xor.h>
+#elif defined(CONFIG_X86_32)
 # include "../../asm/xor_32.h"
 #else
 # include "xor_64.h"
--- 12.2.orig/arch/x86/kernel/Makefile	2012-04-10 16:57:17.000000000 +0200
+++ 12.2/arch/x86/kernel/Makefile	2012-04-10 17:00:42.000000000 +0200
@@ -119,6 +119,6 @@ ifeq ($(CONFIG_X86_64),y)
 endif
 
 disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8237.o \
-	i8253.o i8259.o irqinit_$(BITS).o pci-swiotlb.o reboot.o smpboot.o \
-	tsc.o tsc_sync.o uv_%.o vsmp_64.o
+	i8253.o i8259.o irqinit.o pci-swiotlb.o reboot.o smpboot.o tsc.o \
+	tsc_sync.o uv_%.o vsmp_64.o
 disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += probe_roms_32.o
--- 12.2.orig/arch/x86/kernel/apic/io_apic-xen.c	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/kernel/apic/io_apic-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -51,6 +51,7 @@
 #include <asm/i8259.h>
 #include <asm/nmi.h>
 #include <asm/setup.h>
+#include <asm/hw_irq.h>
 
 #include <asm/apic.h>
 
@@ -135,12 +136,9 @@ struct irq_pin_list {
 	struct irq_pin_list *next;
 };
 
-static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+static struct irq_pin_list *get_one_free_irq_2_pin(int node)
 {
 	struct irq_pin_list *pin;
-	int node;
-
-	node = cpu_to_node(cpu);
 
 	pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
 
@@ -154,9 +152,6 @@ struct irq_cfg {
 	unsigned move_cleanup_count;
 	u8 vector;
 	u8 move_in_progress : 1;
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-	u8 move_desc_pending : 1;
-#endif
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -188,16 +183,18 @@ int __init arch_early_irq_init(void)
 	struct irq_cfg *cfg;
 	struct irq_desc *desc;
 	int count;
+	int node;
 	int i;
 
 	cfg = irq_cfgx;
 	count = ARRAY_SIZE(irq_cfgx);
+	node= cpu_to_node(boot_cpu_id);
 
 	for (i = 0; i < count; i++) {
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
-		alloc_bootmem_cpumask_var(&cfg[i].domain);
-		alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+		zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
+		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
 		if (i < NR_IRQS_LEGACY)
 			cpumask_setall(cfg[i].domain);
 	}
@@ -218,12 +215,9 @@ static struct irq_cfg *irq_cfg(unsigned 
 	return cfg;
 }
 
-static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+static struct irq_cfg *get_one_free_irq_cfg(int node)
 {
 	struct irq_cfg *cfg;
-	int node;
-
-	node = cpu_to_node(cpu);
 
 	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
 	if (cfg) {
@@ -244,13 +238,13 @@ static struct irq_cfg *get_one_free_irq_
 	return cfg;
 }
 
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int node)
 {
 	struct irq_cfg *cfg;
 
 	cfg = desc->chip_data;
 	if (!cfg) {
-		desc->chip_data = get_one_free_irq_cfg(cpu);
+		desc->chip_data = get_one_free_irq_cfg(node);
 		if (!desc->chip_data) {
 			printk(KERN_ERR "can not alloc irq_cfg\n");
 			BUG_ON(1);
@@ -260,10 +254,9 @@ int arch_init_chip_data(struct irq_desc 
 	return 0;
 }
 
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-
+/* for move_irq_desc */
 static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
 {
 	struct irq_pin_list *old_entry, *head, *tail, *entry;
 
@@ -272,7 +265,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_
 	if (!old_entry)
 		return;
 
-	entry = get_one_free_irq_2_pin(cpu);
+	entry = get_one_free_irq_2_pin(node);
 	if (!entry)
 		return;
 
@@ -282,7 +275,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_
 	tail		= entry;
 	old_entry	= old_entry->next;
 	while (old_entry) {
-		entry = get_one_free_irq_2_pin(cpu);
+		entry = get_one_free_irq_2_pin(node);
 		if (!entry) {
 			entry = head;
 			while (entry) {
@@ -322,12 +315,12 @@ static void free_irq_2_pin(struct irq_cf
 }
 
 void arch_init_copy_chip_data(struct irq_desc *old_desc,
-				 struct irq_desc *desc, int cpu)
+				 struct irq_desc *desc, int node)
 {
 	struct irq_cfg *cfg;
 	struct irq_cfg *old_cfg;
 
-	cfg = get_one_free_irq_cfg(cpu);
+	cfg = get_one_free_irq_cfg(node);
 
 	if (!cfg)
 		return;
@@ -338,7 +331,7 @@ void arch_init_copy_chip_data(struct irq
 
 	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
 
-	init_copy_irq_2_pin(old_cfg, cfg, cpu);
+	init_copy_irq_2_pin(old_cfg, cfg, node);
 }
 
 static void free_irq_cfg(struct irq_cfg *old_cfg)
@@ -362,19 +355,7 @@ void arch_free_chip_data(struct irq_desc
 		old_desc->chip_data = NULL;
 	}
 }
-
-static void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-	struct irq_cfg *cfg = desc->chip_data;
-
-	if (!cfg->move_in_progress) {
-		/* it means that domain is not changed */
-		if (!cpumask_intersects(desc->affinity, mask))
-			cfg->move_desc_pending = 1;
-	}
-}
-#endif
+/* end for move_irq_desc */
 
 #else
 static struct irq_cfg *irq_cfg(unsigned int irq)
@@ -384,13 +365,6 @@ static struct irq_cfg *irq_cfg(unsigned 
 
 #endif
 
-#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-}
-#endif
-
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -522,7 +496,8 @@ static struct IO_APIC_route_entry ioapic
 static void
 __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
-	union entry_union eu;
+	union entry_union eu = {{0, 0}};
+
 	eu.entry = e;
 	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
 	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
@@ -553,132 +528,18 @@ static void ioapic_mask_entry(int apic, 
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
-{
-	cpumask_var_t cleanup_mask;
-
-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-		unsigned int i;
-		cfg->move_cleanup_count = 0;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			cfg->move_cleanup_count++;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
-	} else {
-		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
-		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		free_cpumask_var(cleanup_mask);
-	}
-	cfg->move_in_progress = 0;
-}
-
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
-	int apic, pin;
-	struct irq_pin_list *entry;
-	u8 vector = cfg->vector;
-
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		unsigned int reg;
-
-		if (!entry)
-			break;
-
-		apic = entry->apic;
-		pin = entry->pin;
-		/*
-		 * With interrupt-remapping, destination information comes
-		 * from interrupt-remapping table entry.
-		 */
-		if (!irq_remapped(irq))
-			io_apic_write(apic, 0x11 + pin*2, dest);
-		reg = io_apic_read(apic, 0x10 + pin*2);
-		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
-		reg |= vector;
-		io_apic_modify(apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-}
-
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
-/*
- * Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
- * leaves desc->affinity untouched.
- */
-static unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
-{
-	struct irq_cfg *cfg;
-	unsigned int irq;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return BAD_APICID;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return BAD_APICID;
-
-	/* check that before desc->addinity get updated */
-	set_extra_move_desc(desc, mask);
-
-	cpumask_copy(desc->affinity, mask);
-
-	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
-}
-
-static void
-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int dest;
-	unsigned int irq;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	dest = set_desc_affinity(desc, mask);
-	if (dest != BAD_APICID) {
-		/* Only the high 8 bits are valid. */
-		dest = SET_APIC_LOGICAL_ID(dest);
-		__target_IO_APIC_irq(irq, dest, cfg);
-	}
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void
-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	set_ioapic_affinity_irq_desc(desc, mask);
-}
-#endif /* CONFIG_SMP */
-
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
 	struct irq_pin_list *entry;
 
 	entry = cfg->irq_2_pin;
 	if (!entry) {
-		entry = get_one_free_irq_2_pin(cpu);
+		entry = get_one_free_irq_2_pin(node);
 		if (!entry) {
 			printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
 					apic, pin);
@@ -698,7 +559,7 @@ static void add_pin_to_irq_cpu(struct ir
 		entry = entry->next;
 	}
 
-	entry->next = get_one_free_irq_2_pin(cpu);
+	entry->next = get_one_free_irq_2_pin(node);
 	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
@@ -707,7 +568,7 @@ static void add_pin_to_irq_cpu(struct ir
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
+static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
@@ -727,7 +588,7 @@ static void __init replace_pin_at_irq_cp
 
 	/* why? call replace before add? */
 	if (!replaced)
-		add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+		add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
 static inline void io_apic_modify_irq(struct irq_cfg *cfg,
@@ -847,7 +708,7 @@ static void clear_IO_APIC (void)
 			clear_IO_APIC_pin(apic, pin);
 }
 #else
-#define add_pin_to_irq_cpu(cfg, cpu, apic, pin)
+#define add_pin_to_irq_node(cfg, node, apic, pin)
 #endif /* !CONFIG_XEN */
 
 #ifdef CONFIG_X86_32
@@ -888,7 +749,7 @@ static int __init ioapic_pirq_setup(char
 __setup("pirq=", ioapic_pirq_setup);
 #endif /* CONFIG_X86_32 */
 
-#ifdef CONFIG_INTR_REMAP
+#ifndef CONFIG_XEN
 struct IO_APIC_route_entry **alloc_ioapic_entries(void)
 {
 	int apic;
@@ -986,20 +847,6 @@ int restore_IO_APIC_setup(struct IO_APIC
 	return 0;
 }
 
-void reinit_intr_remapped_IO_APIC(int intr_remapping,
-	struct IO_APIC_route_entry **ioapic_entries)
-
-{
-	/*
-	 * for now plain restore of previous settings.
-	 * TBD: In the case of OS enabling interrupt-remapping,
-	 * IO-APIC RTE's need to be setup to point to interrupt-remapping
-	 * table entries. for now, do a plain restore, and wait for
-	 * the setup_IO_APIC_irqs() to do proper initialization.
-	 */
-	restore_IO_APIC_setup(ioapic_entries);
-}
-
 void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
 {
 	int apic;
@@ -1009,7 +856,7 @@ void free_ioapic_entries(struct IO_APIC_
 
 	kfree(ioapic_entries);
 }
-#endif
+#endif /* CONFIG_XEN */
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -1072,54 +919,6 @@ static int __init find_isa_irq_apic(int 
 }
 #endif
 
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
-	int apic, i, best_guess = -1;
-
-	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
-		bus, slot, pin);
-	if (test_bit(bus, mp_bus_not_pci)) {
-		apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
-		return -1;
-	}
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].srcbus;
-
-		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
-			    mp_irqs[i].dstapic == MP_APIC_ALL)
-				break;
-
-		if (!test_bit(lbus, mp_bus_not_pci) &&
-		    !mp_irqs[i].irqtype &&
-		    (bus == lbus) &&
-		    (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
-
-			if (!(apic || IO_APIC_IRQ(irq)))
-				continue;
-
-			if (pin == (mp_irqs[i].srcbusirq & 3))
-				return irq;
-			/*
-			 * Use the first all-but-pin matching entry as a
-			 * best-guess fuzzy result for broken mptables.
-			 */
-			if (best_guess < 0)
-				best_guess = irq;
-		}
-	}
-	return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 /*
  * EISA Edge/Level control register, ELCR
@@ -1338,6 +1137,64 @@ static int pin_2_irq(int idx, int apic, 
 	return irq;
 }
 
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
+				struct io_apic_irq_attr *irq_attr)
+{
+	int apic, i, best_guess = -1;
+
+	apic_printk(APIC_DEBUG,
+		    "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+		    bus, slot, pin);
+	if (test_bit(bus, mp_bus_not_pci)) {
+		apic_printk(APIC_VERBOSE,
+			    "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+		return -1;
+	}
+	for (i = 0; i < mp_irq_entries; i++) {
+		int lbus = mp_irqs[i].srcbus;
+
+		for (apic = 0; apic < nr_ioapics; apic++)
+			if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+			    mp_irqs[i].dstapic == MP_APIC_ALL)
+				break;
+
+		if (!test_bit(lbus, mp_bus_not_pci) &&
+		    !mp_irqs[i].irqtype &&
+		    (bus == lbus) &&
+		    (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+			int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
+
+			if (!(apic || IO_APIC_IRQ(irq)))
+				continue;
+
+			if (pin == (mp_irqs[i].srcbusirq & 3)) {
+				set_io_apic_irq_attr(irq_attr, apic,
+						     mp_irqs[i].dstirq,
+						     irq_trigger(i),
+						     irq_polarity(i));
+				return irq;
+			}
+			/*
+			 * Use the first all-but-pin matching entry as a
+			 * best-guess fuzzy result for broken mptables.
+			 */
+			if (best_guess < 0) {
+				set_io_apic_irq_attr(irq_attr, apic,
+						     mp_irqs[i].dstirq,
+						     irq_trigger(i),
+						     irq_polarity(i));
+				best_guess = irq;
+			}
+		}
+	}
+	return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
 #ifndef CONFIG_XEN
 void lock_vector_lock(void)
 {
@@ -1609,6 +1466,9 @@ int setup_ioapic_entry(int apic_id, int 
 		irte.vector = vector;
 		irte.dest_id = IRTE_DEST(destination);
 
+		/* Set source-id of interrupt request */
+		set_ioapic_sid(&irte, apic_id);
+
 		modify_irte(irq, &irte);
 
 		ir_entry->index2 = (index >> 15) & 0x1;
@@ -1684,63 +1544,75 @@ static void setup_IO_APIC_irq(int apic_i
 	ioapic_write_entry(apic_id, pin, entry);
 }
 
+static struct {
+	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
 static void __init setup_IO_APIC_irqs(void)
 {
-	int apic_id, pin, idx, irq;
+	int apic_id = 0, pin, idx, irq;
 	int notcon = 0;
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
-	int cpu = boot_cpu_id;
+	int node = cpu_to_node(boot_cpu_id);
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
-	for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
-		for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+#ifdef CONFIG_ACPI
+	if (!acpi_disabled && acpi_ioapic) {
+		apic_id = mp_find_ioapic(0);
+		if (apic_id < 0)
+			apic_id = 0;
+	}
+#endif
 
-			idx = find_irq_entry(apic_id, pin, mp_INT);
-			if (idx == -1) {
-				if (!notcon) {
-					notcon = 1;
-					apic_printk(APIC_VERBOSE,
-						KERN_DEBUG " %d-%d",
-						mp_ioapics[apic_id].apicid, pin);
-				} else
-					apic_printk(APIC_VERBOSE, " %d-%d",
-						mp_ioapics[apic_id].apicid, pin);
-				continue;
-			}
-			if (notcon) {
+	for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+		idx = find_irq_entry(apic_id, pin, mp_INT);
+		if (idx == -1) {
+			if (!notcon) {
+				notcon = 1;
 				apic_printk(APIC_VERBOSE,
-					" (apicid-pin) not connected\n");
-				notcon = 0;
-			}
+					KERN_DEBUG " %d-%d",
+					mp_ioapics[apic_id].apicid, pin);
+			} else
+				apic_printk(APIC_VERBOSE, " %d-%d",
+					mp_ioapics[apic_id].apicid, pin);
+			continue;
+		}
+		if (notcon) {
+			apic_printk(APIC_VERBOSE,
+				" (apicid-pin) not connected\n");
+			notcon = 0;
+		}
 
-			irq = pin_2_irq(idx, apic_id, pin);
+		irq = pin_2_irq(idx, apic_id, pin);
 
 #ifdef CONFIG_XEN
-			if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
-				continue;
+		if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
+			continue;
 #else
-			/*
-			 * Skip the timer IRQ if there's a quirk handler
-			 * installed and if it returns 1:
-			 */
-			if (apic->multi_timer_check &&
-					apic->multi_timer_check(apic_id, irq))
-				continue;
+		/*
+		 * Skip the timer IRQ if there's a quirk handler
+		 * installed and if it returns 1:
+		 */
+		if (apic->multi_timer_check &&
+				apic->multi_timer_check(apic_id, irq))
+			continue;
 #endif
 
-			desc = irq_to_desc_alloc_cpu(irq, cpu);
-			if (!desc) {
-				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
-				continue;
-			}
-			cfg = desc->chip_data;
-			add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
-
-			setup_IO_APIC_irq(apic_id, pin, irq, desc,
-					irq_trigger(idx), irq_polarity(idx));
+		desc = irq_to_desc_alloc_node(irq, node);
+		if (!desc) {
+			printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+			continue;
 		}
+		cfg = desc->chip_data;
+		add_pin_to_irq_node(cfg, node, apic_id, pin);
+		/*
+		 * don't mark it in pin_programmed, so later acpi could
+		 * set it correctly when irq < 16
+		 */
+		setup_IO_APIC_irq(apic_id, pin, irq, desc,
+				irq_trigger(idx), irq_polarity(idx));
 	}
 
 	if (notcon)
@@ -1908,36 +1780,30 @@ __apicdebuginit(void) print_IO_APIC(void
 	return;
 }
 
-__apicdebuginit(void) print_APIC_bitfield(int base)
+__apicdebuginit(void) print_APIC_field(int base)
 {
-	unsigned int v;
-	int i, j;
+	int i;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
 
-	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
-	for (i = 0; i < 8; i++) {
-		v = apic_read(base + i*0x10);
-		for (j = 0; j < 32; j++) {
-			if (v & (1<<j))
-				printk("1");
-			else
-				printk("0");
-		}
-		printk("\n");
-	}
+	printk(KERN_DEBUG);
+
+	for (i = 0; i < 8; i++)
+		printk(KERN_CONT "%08x", apic_read(base + i*0x10));
+
+	printk(KERN_CONT "\n");
 }
 
 __apicdebuginit(void) print_local_APIC(void *dummy)
 {
-	unsigned int v, ver, maxlvt;
+	unsigned int i, v, ver, maxlvt;
 	u64 icr;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
 
-	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+	printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 		smp_processor_id(), hard_smp_processor_id());
 	v = apic_read(APIC_ID);
 	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
@@ -1978,11 +1844,11 @@ __apicdebuginit(void) print_local_APIC(v
 	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 
 	printk(KERN_DEBUG "... APIC ISR field:\n");
-	print_APIC_bitfield(APIC_ISR);
+	print_APIC_field(APIC_ISR);
 	printk(KERN_DEBUG "... APIC TMR field:\n");
-	print_APIC_bitfield(APIC_TMR);
+	print_APIC_field(APIC_TMR);
 	printk(KERN_DEBUG "... APIC IRR field:\n");
-	print_APIC_bitfield(APIC_IRR);
+	print_APIC_field(APIC_IRR);
 
 	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
 		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
@@ -2019,6 +1885,18 @@ __apicdebuginit(void) print_local_APIC(v
 	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
 	v = apic_read(APIC_TDCR);
 	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+
+	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+		v = apic_read(APIC_EFEAT);
+		maxlvt = (v >> 16) & 0xff;
+		printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
+		v = apic_read(APIC_ECTRL);
+		printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
+		for (i = 0; i < maxlvt; i++) {
+			v = apic_read(APIC_EILVTn(i));
+			printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
+		}
+	}
 	printk("\n");
 }
 
@@ -2067,6 +1945,11 @@ __apicdebuginit(void) print_PIC(void)
 __apicdebuginit(int) print_all_ICs(void)
 {
 	print_PIC();
+
+	/* don't print out if apic is not there */
+	if (!cpu_has_apic || disable_apic)
+		return 0;
+
 	print_all_local_APICs();
 	print_IO_APIC();
 
@@ -2188,7 +2071,9 @@ void disable_IO_APIC(void)
 	/*
 	 * Use virtual wire A mode when interrupt remapping is enabled.
 	 */
-	disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1);
+	if (cpu_has_apic)
+		disconnect_bsp_APIC(!intr_remapping_enabled &&
+				ioapic_i8259.pin != -1);
 }
 
 #ifdef CONFIG_X86_32
@@ -2427,7 +2312,119 @@ static int ioapic_retrigger_irq(unsigned
  * races.
  */
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_SMP
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+	cpumask_var_t cleanup_mask;
+
+	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+		unsigned int i;
+		cfg->move_cleanup_count = 0;
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			cfg->move_cleanup_count++;
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+	} else {
+		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		free_cpumask_var(cleanup_mask);
+	}
+	cfg->move_in_progress = 0;
+}
+
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+	int apic, pin;
+	struct irq_pin_list *entry;
+	u8 vector = cfg->vector;
+
+	entry = cfg->irq_2_pin;
+	for (;;) {
+		unsigned int reg;
+
+		if (!entry)
+			break;
+
+		apic = entry->apic;
+		pin = entry->pin;
+		/*
+		 * With interrupt-remapping, destination information comes
+		 * from interrupt-remapping table entry.
+		 */
+		if (!irq_remapped(irq))
+			io_apic_write(apic, 0x11 + pin*2, dest);
+		reg = io_apic_read(apic, 0x10 + pin*2);
+		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+		reg |= vector;
+		io_apic_modify(apic, 0x10 + pin*2, reg);
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+}
+
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
+
+/*
+ * Either sets desc->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+{
+	struct irq_cfg *cfg;
+	unsigned int irq;
+
+	if (!cpumask_intersects(mask, cpu_online_mask))
+		return BAD_APICID;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
+		return BAD_APICID;
+
+	cpumask_copy(desc->affinity, mask);
+
+	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+}
+
+static int
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int dest;
+	unsigned int irq;
+	int ret = -1;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	dest = set_desc_affinity(desc, mask);
+	if (dest != BAD_APICID) {
+		/* Only the high 8 bits are valid. */
+		dest = SET_APIC_LOGICAL_ID(dest);
+		__target_IO_APIC_irq(irq, dest, cfg);
+		ret = 0;
+	}
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return ret;
+}
+
+static int
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	return set_ioapic_affinity_irq_desc(desc, mask);
+}
 
 #ifdef CONFIG_INTR_REMAP
 
@@ -2442,26 +2439,25 @@ static int ioapic_retrigger_irq(unsigned
  * Real vector that is used for interrupting cpu will be coming from
  * the interrupt-remapping table entry.
  */
-static void
+static int
 migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct irte irte;
 	unsigned int dest;
 	unsigned int irq;
+	int ret = -1;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
-		return;
+		return ret;
 
 	irq = desc->irq;
 	if (get_irte(irq, &irte))
-		return;
+		return ret;
 
 	cfg = desc->chip_data;
 	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
+		return ret;
 
 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
 
@@ -2477,27 +2473,30 @@ migrate_ioapic_irq_desc(struct irq_desc 
 		send_cleanup_vector(cfg);
 
 	cpumask_copy(desc->affinity, mask);
+
+	return 0;
 }
 
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 					    const struct cpumask *mask)
 {
-	migrate_ioapic_irq_desc(desc, mask);
+	return migrate_ioapic_irq_desc(desc, mask);
 }
-static void set_ir_ioapic_affinity_irq(unsigned int irq,
+static int set_ir_ioapic_affinity_irq(unsigned int irq,
 				       const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	set_ir_ioapic_affinity_irq_desc(desc, mask);
+	return set_ir_ioapic_affinity_irq_desc(desc, mask);
 }
 #else
-static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 						   const struct cpumask *mask)
 {
+	return 0;
 }
 #endif
 
@@ -2559,86 +2558,19 @@ static void irq_complete_move(struct irq
 	struct irq_cfg *cfg = desc->chip_data;
 	unsigned vector, me;
 
-	if (likely(!cfg->move_in_progress)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-		if (likely(!cfg->move_desc_pending))
-			return;
-
-		/* domain has not changed, but affinity did */
-		me = smp_processor_id();
-		if (cpumask_test_cpu(me, desc->affinity)) {
-			*descp = desc = move_irq_desc(desc, me);
-			/* get the new one */
-			cfg = desc->chip_data;
-			cfg->move_desc_pending = 0;
-		}
-#endif
+	if (likely(!cfg->move_in_progress))
 		return;
-	}
 
 	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
 
-	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-		*descp = desc = move_irq_desc(desc, me);
-		/* get the new one */
-		cfg = desc->chip_data;
-#endif
+	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
 		send_cleanup_vector(cfg);
-	}
 }
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
 #endif
 
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
-	int apic, pin;
-	struct irq_pin_list *entry;
-
-	entry = cfg->irq_2_pin;
-	for (;;) {
-
-		if (!entry)
-			break;
-
-		apic = entry->apic;
-		pin = entry->pin;
-		io_apic_eoi(apic, pin);
-		entry = entry->next;
-	}
-}
-
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int irq;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__eoi_ioapic_irq(irq, cfg);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#ifdef CONFIG_X86_X2APIC
-static void ack_x2apic_level(unsigned int irq)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-	ack_x2APIC_irq();
-	eoi_ioapic_irq(desc);
-}
-
-static void ack_x2apic_edge(unsigned int irq)
-{
-	ack_x2APIC_irq();
-}
-#endif
-
 static void ack_apic_edge(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -2702,9 +2634,6 @@ static void ack_apic_level(unsigned int 
 	 */
 	ack_APIC_irq();
 
-	if (irq_remapped(irq))
-		eoi_ioapic_irq(desc);
-
 	/* Now we can move and renable the irq */
 	if (unlikely(do_unmask_irq)) {
 		/* Only migrate the irq if the ack has been received.
@@ -2751,22 +2680,50 @@ static void ack_apic_level(unsigned int 
 }
 
 #ifdef CONFIG_INTR_REMAP
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+	int apic, pin;
+	struct irq_pin_list *entry;
+
+	entry = cfg->irq_2_pin;
+	for (;;) {
+
+		if (!entry)
+			break;
+
+		apic = entry->apic;
+		pin = entry->pin;
+		io_apic_eoi(apic, pin);
+		entry = entry->next;
+	}
+}
+
+static void
+eoi_ioapic_irq(struct irq_desc *desc)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int irq;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__eoi_ioapic_irq(irq, cfg);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
 static void ir_ack_apic_edge(unsigned int irq)
 {
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic_enabled())
-               return ack_x2apic_edge(irq);
-#endif
-       return ack_apic_edge(irq);
+	ack_APIC_irq();
 }
 
 static void ir_ack_apic_level(unsigned int irq)
 {
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic_enabled())
-               return ack_x2apic_level(irq);
-#endif
-       return ack_apic_level(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	ack_APIC_irq();
+	eoi_ioapic_irq(desc);
 }
 #endif /* CONFIG_INTR_REMAP */
 
@@ -2977,7 +2934,7 @@ static inline void __init check_timer(vo
 {
 	struct irq_desc *desc = irq_to_desc(0);
 	struct irq_cfg *cfg = desc->chip_data;
-	int cpu = boot_cpu_id;
+	int node = cpu_to_node(boot_cpu_id);
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
 	int no_pin1 = 0;
@@ -3043,7 +3000,7 @@ static inline void __init check_timer(vo
 		 * Ok, does IRQ0 through the IOAPIC work?
 		 */
 		if (no_pin1) {
-			add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+			add_pin_to_irq_node(cfg, node, apic1, pin1);
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		} else {
 			/* for edge trigger, setup_IO_APIC_irq already
@@ -3080,7 +3037,7 @@ static inline void __init check_timer(vo
 		/*
 		 * legacy devices should be connected to IO APIC #0
 		 */
-		replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+		replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
 		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
 		enable_8259A_irq(0);
 		if (timer_irq_works()) {
@@ -3310,14 +3267,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY;
 /*
  * Dynamic irq allocate and deallocation
  */
-unsigned int create_irq_nr(unsigned int irq_want)
+unsigned int create_irq_nr(unsigned int irq_want, int node)
 {
 	/* Allocate an unused irq */
 	unsigned int irq;
 	unsigned int new;
 	unsigned long flags;
 	struct irq_cfg *cfg_new = NULL;
-	int cpu = boot_cpu_id;
 	struct irq_desc *desc_new = NULL;
 
 	irq = 0;
@@ -3326,7 +3282,7 @@ unsigned int create_irq_nr(unsigned int 
 
 	spin_lock_irqsave(&vector_lock, flags);
 	for (new = irq_want; new < nr_irqs; new++) {
-		desc_new = irq_to_desc_alloc_cpu(new, cpu);
+		desc_new = irq_to_desc_alloc_node(new, node);
 		if (!desc_new) {
 			printk(KERN_INFO "can not get irq_desc for %d\n", new);
 			continue;
@@ -3335,6 +3291,9 @@ unsigned int create_irq_nr(unsigned int 
 
 		if (cfg_new->vector != 0)
 			continue;
+
+		desc_new = move_irq_desc(desc_new, node);
+
 		if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
 			irq = new;
 		break;
@@ -3352,11 +3311,12 @@ unsigned int create_irq_nr(unsigned int 
 
 int create_irq(void)
 {
+	int node = cpu_to_node(boot_cpu_id);
 	unsigned int irq_want;
 	int irq;
 
 	irq_want = nr_irqs_gsi;
-	irq = create_irq_nr(irq_want);
+	irq = create_irq_nr(irq_want, node);
 
 	if (irq == 0)
 		irq = -1;
@@ -3422,6 +3382,9 @@ static int msi_compose_msg(struct pci_de
 		irte.vector = cfg->vector;
 		irte.dest_id = IRTE_DEST(dest);
 
+		/* Set source-id of interrupt request */
+		set_msi_sid(&irte, pdev);
+
 		modify_irte(irq, &irte);
 
 		msg->address_hi = MSI_ADDR_BASE_HI;
@@ -3459,7 +3422,7 @@ static int msi_compose_msg(struct pci_de
 }
 
 #ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3468,7 +3431,7 @@ static void set_msi_irq_affinity(unsigne
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3480,13 +3443,15 @@ static void set_msi_irq_affinity(unsigne
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg_desc(desc, &msg);
+
+	return 0;
 }
 #ifdef CONFIG_INTR_REMAP
 /*
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void
+static int
 ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -3495,11 +3460,11 @@ ir_set_msi_irq_affinity(unsigned int irq
 	struct irte irte;
 
 	if (get_irte(irq, &irte))
-		return;
+		return -1;
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
@@ -3516,6 +3481,8 @@ ir_set_msi_irq_affinity(unsigned int irq
 	 */
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
+
+	return 0;
 }
 
 #endif
@@ -3611,15 +3578,17 @@ int arch_setup_msi_irqs(struct pci_dev *
 	unsigned int irq_want;
 	struct intel_iommu *iommu = NULL;
 	int index = 0;
+	int node;
 
 	/* x86 doesn't support multiple MSI yet */
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
 		return 1;
 
+	node = dev_to_node(&dev->dev);
 	irq_want = nr_irqs_gsi;
 	sub_handle = 0;
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
-		irq = create_irq_nr(irq_want);
+		irq = create_irq_nr(irq_want, node);
 		if (irq == 0)
 			return -1;
 		irq_want = irq + 1;
@@ -3669,7 +3638,7 @@ void arch_teardown_msi_irq(unsigned int 
 
 #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3678,7 +3647,7 @@ static void dmar_msi_set_affinity(unsign
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3690,11 +3659,13 @@ static void dmar_msi_set_affinity(unsign
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
+
+	return 0;
 }
 
 #endif /* CONFIG_SMP */
 
-struct irq_chip dmar_msi_type = {
+static struct irq_chip dmar_msi_type = {
 	.name = "DMAR_MSI",
 	.unmask = dmar_msi_unmask,
 	.mask = dmar_msi_mask,
@@ -3723,7 +3694,7 @@ int arch_setup_dmar_msi(unsigned int irq
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3732,7 +3703,7 @@ static void hpet_msi_set_affinity(unsign
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3744,6 +3715,8 @@ static void hpet_msi_set_affinity(unsign
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	hpet_msi_write(irq, &msg);
+
+	return 0;
 }
 
 #endif /* CONFIG_SMP */
@@ -3800,7 +3773,7 @@ static void target_ht_irq(unsigned int i
 	write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3808,11 +3781,13 @@ static void set_ht_irq_affinity(unsigned
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
 	target_ht_irq(irq, dest, cfg->vector);
+
+	return 0;
 }
 
 #endif
@@ -3887,6 +3862,8 @@ int arch_enable_uv_irq(char *irq_name, u
 	unsigned long flags;
 	int err;
 
+	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
 	cfg = irq_cfg(irq);
 
 	err = assign_irq_vector(irq, cfg, eligible_cpu);
@@ -3900,19 +3877,20 @@ int arch_enable_uv_irq(char *irq_name, u
 
 	mmr_value = 0;
 	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-	entry->vector = cfg->vector;
-	entry->delivery_mode = apic->irq_delivery_mode;
-	entry->dest_mode = apic->irq_dest_mode;
-	entry->polarity = 0;
-	entry->trigger = 0;
-	entry->mask = 0;
-	entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
+	entry->vector		= cfg->vector;
+	entry->delivery_mode	= apic->irq_delivery_mode;
+	entry->dest_mode	= apic->irq_dest_mode;
+	entry->polarity		= 0;
+	entry->trigger		= 0;
+	entry->mask		= 0;
+	entry->dest		= apic->cpu_mask_to_apicid(eligible_cpu);
 
 	mmr_pnode = uv_blade_to_pnode(mmr_blade);
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
 
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
+
 	return irq;
 }
 
@@ -3926,10 +3904,10 @@ void arch_disable_uv_irq(int mmr_blade, 
 	struct uv_IO_APIC_route_entry *entry;
 	int mmr_pnode;
 
+	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
 	mmr_value = 0;
 	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
 	entry->mask = 1;
 
 	mmr_pnode = uv_blade_to_pnode(mmr_blade);
@@ -3995,14 +3973,85 @@ int __init arch_probe_nr_irqs(void)
 #endif
 #endif /* CONFIG_XEN */
 
+static int __io_apic_set_pci_routing(struct device *dev, int irq,
+				struct io_apic_irq_attr *irq_attr)
+{
+	struct irq_desc *desc;
+	struct irq_cfg *cfg;
+	int node;
+	int ioapic, pin;
+	int trigger, polarity;
+
+	ioapic = irq_attr->ioapic;
+#ifdef CONFIG_XEN
+	if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs) {
+		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ %d\n",
+			    ioapic, irq);
+		return -EINVAL;
+	}
+#endif
+	if (!IO_APIC_IRQ(irq)) {
+		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+			ioapic);
+		return -EINVAL;
+	}
+
+	if (dev)
+		node = dev_to_node(dev);
+	else
+		node = cpu_to_node(boot_cpu_id);
+
+	desc = irq_to_desc_alloc_node(irq, node);
+	if (!desc) {
+		printk(KERN_INFO "can not get irq_desc %d\n", irq);
+		return 0;
+	}
+
+	pin = irq_attr->ioapic_pin;
+	trigger = irq_attr->trigger;
+	polarity = irq_attr->polarity;
+
+	/*
+	 * IRQs < 16 are already in the irq_2_pin[] map
+	 */
+	if (irq >= NR_IRQS_LEGACY) {
+		cfg = desc->chip_data;
+		add_pin_to_irq_node(cfg, node, ioapic, pin);
+	}
+
+	setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
+
+	return 0;
+}
+
+int io_apic_set_pci_routing(struct device *dev, int irq,
+				struct io_apic_irq_attr *irq_attr)
+{
+	int ioapic, pin;
+	/*
+	 * Avoid pin reprogramming.  PRTs typically include entries
+	 * with redundant pin->gsi mappings (but unique PCI devices);
+	 * we only program the IOAPIC on the first.
+	 */
+	ioapic = irq_attr->ioapic;
+	pin = irq_attr->ioapic_pin;
+	if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
+		pr_debug("Pin %d-%d already programmed\n",
+			 mp_ioapics[ioapic].apicid, pin);
+		return 0;
+	}
+	set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
+
+	return __io_apic_set_pci_routing(dev, irq, irq_attr);
+}
+
 /* --------------------------------------------------------------------------
                           ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
 
 #ifdef CONFIG_ACPI
 
-#ifdef CONFIG_X86_32
-#ifndef CONFIG_XEN
+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
 int __init io_apic_get_unique_id(int ioapic, int apic_id)
 {
 	union IO_APIC_reg_00 reg_00;
@@ -4076,7 +4125,7 @@ int __init io_apic_get_unique_id(int ioa
 
 	return apic_id;
 }
-#endif /* !CONFIG_XEN */
+#endif
 
 int __init io_apic_get_version(int ioapic)
 {
@@ -4089,47 +4138,6 @@ int __init io_apic_get_version(int ioapi
 
 	return reg_01.bits.version;
 }
-#endif
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
-	struct irq_desc *desc;
-	struct irq_cfg *cfg;
-	int cpu = boot_cpu_id;
-
-#ifdef CONFIG_XEN
-	if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs) {
-		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ %d\n",
-			    ioapic, irq);
-		return -EINVAL;
-	}
-#endif
-
-	if (!IO_APIC_IRQ(irq)) {
-		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-			ioapic);
-		return -EINVAL;
-	}
-
-	desc = irq_to_desc_alloc_cpu(irq, cpu);
-	if (!desc) {
-		printk(KERN_INFO "can not get irq_desc %d\n", irq);
-		return 0;
-	}
-
-	/*
-	 * IRQs < 16 are already in the irq_2_pin[] map
-	 */
-	if (irq >= NR_IRQS_LEGACY) {
-		cfg = desc->chip_data;
-		add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
-	}
-
-	setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
-
-	return 0;
-}
-
 
 int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 {
@@ -4161,51 +4169,44 @@ int acpi_get_override_irq(int bus_irq, i
 #ifdef CONFIG_SMP
 void __init setup_ioapic_dest(void)
 {
-	int pin, ioapic, irq, irq_entry;
+	int pin, ioapic = 0, irq, irq_entry;
 	struct irq_desc *desc;
-	struct irq_cfg *cfg;
 	const struct cpumask *mask;
 
 	if (skip_ioapic_setup == 1)
 		return;
 
-	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-			if (irq_entry == -1)
-				continue;
-			irq = pin_2_irq(irq_entry, ioapic, pin);
-
-			/* setup_IO_APIC_irqs could fail to get vector for some device
-			 * when you have too many devices, because at that time only boot
-			 * cpu is online.
-			 */
-			desc = irq_to_desc(irq);
-			cfg = desc->chip_data;
-			if (!cfg->vector) {
-				setup_IO_APIC_irq(ioapic, pin, irq, desc,
-						  irq_trigger(irq_entry),
-						  irq_polarity(irq_entry));
-				continue;
+#ifdef CONFIG_ACPI
+	if (!acpi_disabled && acpi_ioapic) {
+		ioapic = mp_find_ioapic(0);
+		if (ioapic < 0)
+			ioapic = 0;
+	}
+#endif
 
-			}
+	for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+		irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+		if (irq_entry == -1)
+			continue;
+		irq = pin_2_irq(irq_entry, ioapic, pin);
 
-			/*
-			 * Honour affinities which have been set in early boot
-			 */
-			if (desc->status &
-			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-				mask = desc->affinity;
-			else
-				mask = apic->target_cpus();
+		desc = irq_to_desc(irq);
 
-			if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq_desc(desc, mask);
-			else
-				set_ioapic_affinity_irq_desc(desc, mask);
-		}
+		/*
+		 * Honour affinities which have been set in early boot
+		 */
+		if (desc->status &
+		    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+			mask = desc->affinity;
+		else
+			mask = apic->target_cpus();
 
+		if (intr_remapping_enabled)
+			set_ir_ioapic_affinity_irq_desc(desc, mask);
+		else
+			set_ioapic_affinity_irq_desc(desc, mask);
 	}
+
 }
 #endif
 
@@ -4288,29 +4289,21 @@ fake_ioapic_page:
 	}
 }
 
-static int __init ioapic_insert_resources(void)
+void __init ioapic_insert_resources(void)
 {
 	int i;
 	struct resource *r = ioapic_resources;
 
 	if (!r) {
-		if (nr_ioapics > 0) {
+		if (nr_ioapics > 0)
 			printk(KERN_ERR
 				"IO APIC resources couldn't be allocated.\n");
-			return -1;
-		}
-		return 0;
+		return;
 	}
 
 	for (i = 0; i < nr_ioapics; i++) {
 		insert_resource(&iomem_resource, r);
 		r++;
 	}
-
-	return 0;
 }
-
-/* Insert the IO APIC resources after PCI initialization has occured to handle
- * IO APICS that are mapped in on a BAR in PCI space. */
-late_initcall(ioapic_insert_resources);
 #endif /* !CONFIG_XEN */
--- 12.2.orig/arch/x86/kernel/apic/probe_32-xen.c	2011-04-13 17:03:24.000000000 +0200
+++ 12.2/arch/x86/kernel/apic/probe_32-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -20,23 +20,12 @@
 #include <asm/apic.h>
 #include <asm/setup.h>
 
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <asm/mpspec.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
 #include <linux/smp.h>
-#include <linux/init.h>
 #include <asm/ipi.h>
 
-#include <linux/smp.h>
-#include <linux/init.h>
 #include <linux/interrupt.h>
 #include <asm/acpi.h>
 #include <asm/e820.h>
-#include <asm/setup.h>
 
 static int xen_phys_pkg_id(int cpuid_apic, int index_msb)
 {
--- 12.2.orig/arch/x86/kernel/cpu/amd.c	2012-05-08 10:49:58.000000000 +0200
+++ 12.2/arch/x86/kernel/cpu/amd.c	2012-05-08 10:52:08.000000000 +0200
@@ -471,7 +471,7 @@ static void __cpuinit early_init_amd(str
 		    (c->x86_model == 8 && c->x86_mask >= 8))
 			set_cpu_cap(c, X86_FEATURE_K6_MTRR);
 #endif
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) && !defined(CONFIG_XEN)
 	/* check CPU config space for extended APIC ID */
 	if (cpu_has_apic && c->x86 >= 0xf) {
 		unsigned int val;
--- 12.2.orig/arch/x86/kernel/cpu/common-xen.c	2012-04-20 15:13:47.000000000 +0200
+++ 12.2/arch/x86/kernel/cpu/common-xen.c	2011-05-18 10:46:35.000000000 +0200
@@ -13,6 +13,7 @@
 #include <linux/io.h>
 
 #include <asm/stackprotector.h>
+#include <asm/perf_counter.h>
 #include <asm/mmu_context.h>
 #include <asm/hypervisor.h>
 #include <asm/processor.h>
@@ -66,7 +67,30 @@ void __init setup_cpu_local_masks(void)
 #endif
 }
 
-static const struct cpu_dev *this_cpu __cpuinitdata;
+static void __cpuinit default_init(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
+	display_cacheinfo(c);
+#else
+	/* Not much we can do here... */
+	/* Check if at least it has cpuid */
+	if (c->cpuid_level == -1) {
+		/* No cpuid. It must be an ancient CPU */
+		if (c->x86 == 4)
+			strcpy(c->x86_model_id, "486");
+		else if (c->x86 == 3)
+			strcpy(c->x86_model_id, "386");
+	}
+#endif
+}
+
+static const struct cpu_dev __cpuinitconst default_cpu = {
+	.c_init		= default_init,
+	.c_vendor	= "Unknown",
+	.c_x86_vendor	= X86_VENDOR_UNKNOWN,
+};
+
+static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
 
 DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
 #ifdef CONFIG_X86_64
@@ -116,7 +140,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_p
 	/* data */
 	[GDT_ENTRY_APMBIOS_BASE+2]	= { { { 0x0000ffff, 0x00409200 } } },
 
-	[GDT_ENTRY_ESPFIX_SS]		= { { { 0x00000000, 0x00c09200 } } },
+	[GDT_ENTRY_ESPFIX_SS]		= { { { 0x0000ffff, 0x00cf9200 } } },
 #endif
 	[GDT_ENTRY_PERCPU]		= { { { 0x0000ffff, 0x00cf9200 } } },
 	GDT_STACK_CANARY_INIT
@@ -312,7 +336,8 @@ static const char *__cpuinit table_looku
 	return NULL;		/* Not found */
 }
 
-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_set[NCAPINTS] __cpuinitdata;
 
 void load_percpu_segment(int cpu)
 {
@@ -361,29 +386,6 @@ void switch_to_new_gdt(int cpu)
 
 static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
 
-static void __cpuinit default_init(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_X86_64
-	display_cacheinfo(c);
-#else
-	/* Not much we can do here... */
-	/* Check if at least it has cpuid */
-	if (c->cpuid_level == -1) {
-		/* No cpuid. It must be an ancient CPU */
-		if (c->x86 == 4)
-			strcpy(c->x86_model_id, "486");
-		else if (c->x86 == 3)
-			strcpy(c->x86_model_id, "386");
-	}
-#endif
-}
-
-static const struct cpu_dev __cpuinitconst default_cpu = {
-	.c_init	= default_init,
-	.c_vendor = "Unknown",
-	.c_x86_vendor = X86_VENDOR_UNKNOWN,
-};
-
 static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
 {
 	unsigned int *v;
@@ -516,7 +518,6 @@ out:
 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
 {
 	char *v = c->x86_vendor_id;
-	static int printed;
 	int i;
 
 	for (i = 0; i < X86_VENDOR_NUM; i++) {
@@ -533,13 +534,9 @@ static void __cpuinit get_cpu_vendor(str
 		}
 	}
 
-	if (!printed) {
-		printed++;
-		printk(KERN_ERR
-		    "CPU: vendor_id '%s' unknown, using generic init.\n", v);
-
-		printk(KERN_ERR "CPU: Your system may be unstable.\n");
-	}
+	printk_once(KERN_ERR
+			"CPU: vendor_id '%s' unknown, using generic init.\n" \
+			"CPU: Your system may be unstable.\n", v);
 
 	c->x86_vendor = X86_VENDOR_UNKNOWN;
 	this_cpu = &default_cpu;
@@ -809,6 +806,12 @@ static void __cpuinit identify_cpu(struc
 	if (this_cpu->c_identify)
 		this_cpu->c_identify(c);
 
+	/* Clear/Set all flags overriden by options, after probe */
+	for (i = 0; i < NCAPINTS; i++) {
+		c->x86_capability[i] &= ~cpu_caps_cleared[i];
+		c->x86_capability[i] |= cpu_caps_set[i];
+	}
+
 #if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
 	c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
 #endif
@@ -854,6 +857,16 @@ static void __cpuinit identify_cpu(struc
 #endif
 
 	init_hypervisor(c);
+
+	/*
+	 * Clear/Set all flags overriden by options, need do it
+	 * before following smp all cpus cap AND.
+	 */
+	for (i = 0; i < NCAPINTS; i++) {
+		c->x86_capability[i] &= ~cpu_caps_cleared[i];
+		c->x86_capability[i] |= cpu_caps_set[i];
+	}
+
 	/*
 	 * On SMP, boot_cpu_data holds the common feature set between
 	 * all CPUs; so make sure that we indicate which features are
@@ -866,10 +879,6 @@ static void __cpuinit identify_cpu(struc
 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
 	}
 
-	/* Clear all flags overriden by options */
-	for (i = 0; i < NCAPINTS; i++)
-		c->x86_capability[i] &= ~cleared_cpu_caps[i];
-
 #ifdef CONFIG_X86_MCE
 	/* Init Machine Check Exception if available. */
 	mcheck_init(c);
@@ -902,6 +911,7 @@ void __init identify_boot_cpu(void)
 #else
 	vgetcpu_set_mode();
 #endif
+	init_hw_perf_counters();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
--- 12.2.orig/arch/x86/kernel/cpu/mcheck/mce.c	2012-06-06 13:45:25.000000000 +0200
+++ 12.2/arch/x86/kernel/cpu/mcheck/mce.c	2012-06-06 14:02:38.000000000 +0200
@@ -118,8 +118,10 @@ void mce_setup(struct mce *m)
 	m->time = get_seconds();
 	m->cpuvendor = boot_cpu_data.x86_vendor;
 	m->cpuid = cpuid_eax(1);
+#ifndef CONFIG_XEN
 	m->socketid = cpu_data(m->extcpu).phys_proc_id;
 	m->apicid = cpu_data(m->extcpu).initial_apicid;
+#endif
 	rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
 }
 
@@ -2310,7 +2312,7 @@ static __init int mcheck_init_device(voi
 #ifdef CONFIG_X86_XEN_MCE
 	if (is_initial_xendomain()) {
 		/* Register vIRQ handler for MCE LOG processing */
-		extern void bind_virq_for_mce(void);
+		extern int bind_virq_for_mce(void);
 
 		printk(KERN_DEBUG "MCE: bind virq for DOM0 logging\n");
 		bind_virq_for_mce();
--- 12.2.orig/arch/x86/kernel/cpu/mcheck/mce_dom0.c	2011-08-15 10:44:51.000000000 +0200
+++ 12.2/arch/x86/kernel/cpu/mcheck/mce_dom0.c	2011-08-15 11:03:49.000000000 +0200
@@ -7,12 +7,17 @@
 #include <asm/hypercall.h>
 #include <asm/mce.h>
 
+static xen_mc_logical_cpu_t *g_physinfo;
+static unsigned int ncpus;
+
 static int convert_log(struct mc_info *mi)
 {
 	struct mcinfo_common *mic = NULL;
 	struct mcinfo_global *mc_global;
 	struct mcinfo_bank *mc_bank;
 	struct mce m;
+	unsigned int i;
+	bool found = false;
 
 	x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL);
 	if (mic == NULL)
@@ -21,9 +26,21 @@ static int convert_log(struct mc_info *m
 		return -1;
 	}
 
+	mce_setup(&m);
 	mc_global = (struct mcinfo_global*)mic;
 	m.mcgstatus = mc_global->mc_gstatus;
-	m.cpu = mc_global->mc_coreid;/*for test*/
+	m.apicid = mc_global->mc_apicid;
+
+	for (i = 0; i < ncpus; i++)
+		if (g_physinfo[i].mc_apicid == m.apicid) {
+			found = true;
+			break;
+		}
+	WARN_ON_ONCE(!found);
+	m.socketid = mc_global->mc_socketid;
+	m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
+	m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
+
 	x86_mcinfo_lookup(mic, mi, MC_TYPE_BANK);
 	do
 	{
@@ -36,7 +53,6 @@ static int convert_log(struct mc_info *m
 			m.status = mc_bank->mc_status;
 			m.addr = mc_bank->mc_addr;
 			m.tsc = mc_bank->mc_tsc;
-			m.res1 = mc_bank->mc_ctrl2;
 			m.bank = mc_bank->mc_bank;
 			printk(KERN_DEBUG "[CPU%d, BANK%d, addr %llx, state %llx]\n", 
 						m.bank, m.cpu, m.addr, m.status);
@@ -115,18 +131,54 @@ end:
 	return IRQ_HANDLED;
 }
 
-void bind_virq_for_mce(void)
+int __init bind_virq_for_mce(void)
 {
 	int ret;
+	xen_mc_t mc_op;
+
+	g_mi = kmalloc(sizeof(*g_mi), GFP_KERNEL);
+	if (!g_mi)
+		return -ENOMEM;
+
+	/* fetch physical CPU count */
+	mc_op.cmd = XEN_MC_physcpuinfo;
+	set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, NULL);
+	ret = HYPERVISOR_mca(&mc_op);
+	if (ret) {
+		pr_err("MCE: Failed to get physical CPU count\n");
+		kfree(g_mi);
+		return ret;
+	}
+
+	/* fetch CPU physical info for later reference */
+	ncpus = mc_op.u.mc_physcpuinfo.ncpus;
+	g_physinfo = kmalloc(sizeof(*g_physinfo) * ncpus, GFP_KERNEL);
+	if (!g_physinfo) {
+		kfree(g_mi);
+		return -ENOMEM;
+	}
+	set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+	ret = HYPERVISOR_mca(&mc_op);
+	if (ret) {
+		pr_err("MCE: Failed to get physical CPUs' info\n");
+		kfree(g_mi);
+		kfree(g_physinfo);
+		return ret;
+	}
 
 	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0, 
 		mce_dom0_interrupt, 0, "mce", NULL);
 
-	g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
-	if (ret < 0)
-		pr_err("MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+	if (ret < 0) {
+		pr_err("MCE: Failed to bind vIRQ for Dom0\n");
+		kfree(g_mi);
+		kfree(g_physinfo);
+		return ret;
+	}
 
 	/* Log the machine checks left over from the previous reset. */
 	mce_dom0_interrupt(VIRQ_MCA, NULL);
+
+	return 0;
 }
 
--- 12.2.orig/arch/x86/kernel/e820-xen.c	2011-09-23 15:54:55.000000000 +0200
+++ 12.2/arch/x86/kernel/e820-xen.c	2011-09-23 15:55:04.000000000 +0200
@@ -667,7 +667,7 @@ __init int e820_search_gap(unsigned long
  */
 __init void e820_setup_gap(void)
 {
-	unsigned long gapstart, gapsize, round;
+	unsigned long gapstart, gapsize;
 	int found;
 
 	gapstart = 0x10000000;
@@ -676,24 +676,18 @@ __init void e820_setup_gap(void)
 
 #ifdef CONFIG_X86_64
 	if (!found) {
-		printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
-		       "address range\n"
-		       KERN_ERR "PCI: Unassigned devices with 32bit resource "
-		       "registers may break!\n");
+		printk(KERN_ERR
+	"PCI: Warning: Cannot find a gap in the 32bit address range\n"
+	"PCI: Unassigned devices with 32bit resource registers may break!\n");
 		found = e820_search_gap(&gapstart, &gapsize, MAX_GAP_END, 0);
 		WARN_ON(!found);
 	}
 #endif
 
 	/*
-	 * See how much we want to round up: start off with
-	 * rounding to the next 1MB area.
+	 * e820_reserve_resources_late protect stolen RAM already
 	 */
-	round = 0x100000;
-	while ((gapsize >> 4) > round)
-		round += round;
-	/* Fun with two's complement */
-	pci_mem_start = (gapstart + round) & -round;
+	pci_mem_start = gapstart;
 
 	printk(KERN_INFO
 	       "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
@@ -1499,6 +1493,25 @@ void __init e820_reserve_resources(void)
 	}
 }
 
+/* How much should we pad RAM ending depending on where it is? */
+static unsigned long ram_alignment(resource_size_t pos)
+{
+	unsigned long mb = pos >> 20;
+
+	/* To 64kB in the first megabyte */
+	if (!mb)
+		return 64*1024;
+
+	/* To 1MB in the first 16MB */
+	if (mb < 16)
+		return 1024*1024;
+
+	/* To 32MB for anything above that */
+	return 32*1024*1024;
+}
+
+#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
+
 void __init e820_reserve_resources_late(void)
 {
 	int i;
@@ -1510,6 +1523,26 @@ void __init e820_reserve_resources_late(
 			insert_resource_expand_to_fit(&iomem_resource, res);
 		res++;
 	}
+
+	/*
+	 * Try to bump up RAM regions to reasonable boundaries to
+	 * avoid stolen RAM:
+	 */
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *entry = &e820.map[i];
+		u64 start, end;
+
+		if (entry->type != E820_RAM)
+			continue;
+		start = entry->addr + entry->size;
+		end = round_up(start, ram_alignment(start)) - 1;
+		if (end > MAX_RESOURCE_SIZE)
+			end = MAX_RESOURCE_SIZE;
+		if (start >= end)
+			continue;
+		reserve_region_with_split(&iomem_resource, start, end,
+					  "RAM buffer");
+	}
 }
 
 #undef e820
--- 12.2.orig/arch/x86/kernel/entry_32-xen.S	2012-02-29 14:15:06.000000000 +0100
+++ 12.2/arch/x86/kernel/entry_32-xen.S	2012-02-29 14:15:32.000000000 +0100
@@ -48,7 +48,6 @@
 #include <asm/segment.h>
 #include <asm/smp.h>
 #include <asm/page_types.h>
-#include <asm/desc.h>
 #include <asm/percpu.h>
 #include <asm/dwarf2.h>
 #include <asm/processor-flags.h>
@@ -88,7 +87,7 @@ NMI_MASK	= 0x80000000
 #define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
 #else
 #define preempt_stop(clobbers)
-#define resume_kernel		restore_nocheck
+#define resume_kernel		restore_all
 #endif
 
 .macro TRACE_IRQS_IRET
@@ -376,7 +375,7 @@ END(ret_from_exception)
 ENTRY(resume_kernel)
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
-	jnz restore_nocheck
+	jnz restore_all
 need_resched:
 	movl TI_flags(%ebp), %ecx	# need_resched set ?
 	testb $_TIF_NEED_RESCHED, %cl
@@ -570,6 +569,8 @@ syscall_exit:
 	jne syscall_exit_work
 
 restore_all:
+	TRACE_IRQS_IRET
+restore_all_notrace:
 #ifndef CONFIG_XEN
 	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS, SS and CS
 	# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
@@ -595,8 +596,6 @@ restore_nocheck:
 	CFI_REMEMBER_STATE
 	jnz restore_all_enable_events	#        != 0 => enable event delivery
 #endif
-	TRACE_IRQS_IRET
-restore_nocheck_notrace:
 	RESTORE_REGS 4			# skip orig_eax/error_code
 	CFI_ADJUST_CFA_OFFSET -4
 irq_return:
@@ -633,22 +632,34 @@ ldt_ss:
 	jne restore_nocheck
 #endif
 
-	/* If returning to userspace with 16bit stack,
-	 * try to fix the higher word of ESP, as the CPU
-	 * won't restore it.
-	 * This is an "official" bug of all the x86-compatible
-	 * CPUs, which we can try to work around to make
-	 * dosemu and wine happy. */
-	movl PT_OLDESP(%esp), %eax
-	movl %esp, %edx
-	call patch_espfix_desc
+/*
+ * Setup and switch to ESPFIX stack
+ *
+ * We're returning to userspace with a 16 bit stack. The CPU will not
+ * restore the high word of ESP for us on executing iret... This is an
+ * "official" bug of all the x86-compatible CPUs, which we can work
+ * around to make dosemu and wine happy. We do this by preloading the
+ * high word of ESP with the high word of the userspace ESP while
+ * compensating for the offset by changing to the ESPFIX segment with
+ * a base address that matches for the difference.
+ */
+	mov %esp, %edx			/* load kernel esp */
+	mov PT_OLDESP(%esp), %eax	/* load userspace esp */
+	mov %dx, %ax			/* eax: new kernel esp */
+	sub %eax, %edx			/* offset (low word is 0) */
+	PER_CPU(gdt_page, %ebx)
+	shr $16, %edx
+	mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */
+	mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */
 	pushl $__ESPFIX_SS
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl %eax
+	push %eax			/* new kernel esp */
 	CFI_ADJUST_CFA_OFFSET 4
+	/* Disable interrupts, but do not irqtrace this section: we
+	 * will soon execute iret and the tracer was already set to
+	 * the irqstate after the iret */
 	DISABLE_INTERRUPTS(CLBR_EAX)
-	TRACE_IRQS_OFF
-	lss (%esp), %esp
+	lss (%esp), %esp		/* switch to espfix segment */
 	CFI_ADJUST_CFA_OFFSET -8
 	jmp restore_nocheck
 #else
@@ -787,15 +798,24 @@ PTREGSCALL(vm86old)
 
 #ifndef CONFIG_XEN
 .macro FIXUP_ESPFIX_STACK
-	/* since we are on a wrong stack, we cant make it a C code :( */
+/*
+ * Switch back for ESPFIX stack to the normal zerobased stack
+ *
+ * We can't call C functions using the ESPFIX stack. This code reads
+ * the high word of the segment base from the GDT and swiches to the
+ * normal stack and adjusts ESP with the matching offset.
+ */
+	/* fixup the stack */
 	PER_CPU(gdt_page, %ebx)
-	GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
-	addl %esp, %eax
+	mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */
+	mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
+	shl $16, %eax
+	addl %esp, %eax			/* the adjusted stack pointer */
 	pushl $__KERNEL_DS
 	CFI_ADJUST_CFA_OFFSET 4
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
-	lss (%esp), %esp
+	lss (%esp), %esp		/* switch to the normal stack segment */
 	CFI_ADJUST_CFA_OFFSET -8
 .endm
 .macro UNWIND_ESPFIX_STACK
@@ -1286,6 +1306,7 @@ ENTRY(ftrace_graph_caller)
 	pushl %edx
 	movl 0xc(%esp), %edx
 	lea 0x4(%ebp), %eax
+	movl (%ebp), %ecx
 	subl $MCOUNT_INSN_SIZE, %edx
 	call prepare_ftrace_return
 	popl %edx
@@ -1300,6 +1321,7 @@ return_to_handler:
 	pushl %eax
 	pushl %ecx
 	pushl %edx
+	movl %ebp, %eax
 	call ftrace_return_to_handler
 	movl %eax, 0xc(%esp)
 	popl %edx
@@ -1599,7 +1621,7 @@ nmi_stack_correct:
 	xorl %edx,%edx		# zero error code
 	movl %esp,%eax		# pt_regs pointer
 	call do_nmi
-	jmp restore_nocheck_notrace
+	jmp restore_all_notrace
 	CFI_ENDPROC
 
 nmi_stack_fixup:
--- 12.2.orig/arch/x86/kernel/entry_64.S	2012-04-10 16:47:12.000000000 +0200
+++ 12.2/arch/x86/kernel/entry_64.S	2012-04-10 17:00:26.000000000 +0200
@@ -1409,7 +1409,7 @@ apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
 paranoidzeroentry_ist debug do_debug DEBUG_STACK
 paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
 paranoiderrorentry stack_segment do_stack_segment
-#ifdef CONFIG_XEN
+#ifdef CONFIG_PARAVIRT_XEN
 zeroentry xen_debug do_debug
 zeroentry xen_int3 do_int3
 errorentry xen_stack_segment do_stack_segment
--- 12.2.orig/arch/x86/kernel/entry_64-xen.S	2011-10-07 11:38:41.000000000 +0200
+++ 12.2/arch/x86/kernel/entry_64-xen.S	2011-10-07 11:39:17.000000000 +0200
@@ -139,6 +139,7 @@ ENTRY(ftrace_graph_caller)
 
 	leaq 8(%rbp), %rdi
 	movq 0x38(%rsp), %rsi
+	movq (%rbp), %rdx
 	subq $MCOUNT_INSN_SIZE, %rsi
 
 	call	prepare_ftrace_return
@@ -151,27 +152,15 @@ END(ftrace_graph_caller)
 GLOBAL(return_to_handler)
 	subq  $80, %rsp
 
+	/* Save the return values */
 	movq %rax, (%rsp)
-	movq %rcx, 8(%rsp)
-	movq %rdx, 16(%rsp)
-	movq %rsi, 24(%rsp)
-	movq %rdi, 32(%rsp)
-	movq %r8, 40(%rsp)
-	movq %r9, 48(%rsp)
-	movq %r10, 56(%rsp)
-	movq %r11, 64(%rsp)
+	movq %rdx, 8(%rsp)
+	movq %rbp, %rdi
 
 	call ftrace_return_to_handler
 
 	movq %rax, 72(%rsp)
-	movq 64(%rsp), %r11
-	movq 56(%rsp), %r10
-	movq 48(%rsp), %r9
-	movq 40(%rsp), %r8
-	movq 32(%rsp), %rdi
-	movq 24(%rsp), %rsi
-	movq 16(%rsp), %rdx
-	movq 8(%rsp), %rcx
+	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
 	addq $72, %rsp
 	retq
@@ -872,6 +861,8 @@ END(\sym)
 #ifdef CONFIG_SMP
 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
 	irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
+apicinterrupt REBOOT_VECTOR \
+	reboot_interrupt smp_reboot_interrupt
 #endif
 
 #ifdef CONFIG_X86_UV
@@ -903,10 +894,15 @@ apicinterrupt INVALIDATE_TLB_VECTOR_STAR
 #endif
 
 apicinterrupt THRESHOLD_APIC_VECTOR \
-	threshold_interrupt mce_threshold_interrupt
+	threshold_interrupt smp_threshold_interrupt
 apicinterrupt THERMAL_APIC_VECTOR \
 	thermal_interrupt smp_thermal_interrupt
 
+#ifdef CONFIG_X86_MCE
+apicinterrupt MCE_SELF_VECTOR \
+	mce_self_interrupt smp_mce_self_interrupt
+#endif
+
 #ifdef CONFIG_SMP
 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
 	call_function_single_interrupt smp_call_function_single_interrupt
@@ -920,6 +916,11 @@ apicinterrupt ERROR_APIC_VECTOR \
 	error_interrupt smp_error_interrupt
 apicinterrupt SPURIOUS_APIC_VECTOR \
 	spurious_interrupt smp_spurious_interrupt
+
+#ifdef CONFIG_PERF_COUNTERS
+apicinterrupt LOCAL_PENDING_VECTOR \
+	perf_pending_interrupt smp_perf_pending_interrupt
+#endif
 #endif /* !CONFIG_XEN */
 
 /*
@@ -1222,7 +1223,7 @@ paranoiderrorentry stack_segment do_stac
 errorentry general_protection do_general_protection
 errorentry page_fault do_page_fault
 #ifdef CONFIG_X86_MCE
-paranoidzeroentry machine_check do_machine_check
+paranoidzeroentry machine_check *machine_check_vector(%rip)
 #endif
 
 #ifndef CONFIG_XEN
--- 12.2.orig/arch/x86/kernel/head_32-xen.S	2011-08-09 11:00:03.000000000 +0200
+++ 12.2/arch/x86/kernel/head_32-xen.S	2011-08-09 11:03:22.000000000 +0200
@@ -119,12 +119,6 @@ ENTRY(hypercall_page)
 	CFI_ENDPROC
 
 /*
- * Real beginning of normal "text" segment
- */
-ENTRY(stext)
-ENTRY(_stext)
-
-/*
  * BSS section
  */
 .section ".bss.page_aligned","wa"
--- 12.2.orig/arch/x86/kernel/head_64-xen.S	2011-08-09 11:00:01.000000000 +0200
+++ 12.2/arch/x86/kernel/head_64-xen.S	2011-08-09 11:03:31.000000000 +0200
@@ -15,7 +15,6 @@
 #include <linux/threads.h>
 #include <linux/init.h>
 #include <linux/elfnote.h>
-#include <asm/desc.h>
 #include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/msr.h>
--- 12.2.orig/arch/x86/kernel/init_task.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/arch/x86/kernel/init_task.c	2011-04-13 13:55:08.000000000 +0200
@@ -31,6 +31,7 @@ union thread_union init_thread_union __i
 struct task_struct init_task = INIT_TASK(init_task);
 EXPORT_SYMBOL(init_task);
 
+#ifndef CONFIG_X86_NO_TSS
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
  * no more per-task TSS's. The TSS size is kept cacheline-aligned
@@ -39,4 +40,4 @@ EXPORT_SYMBOL(init_task);
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */
 DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
-
+#endif
--- 12.2.orig/arch/x86/kernel/irq-xen.c	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/kernel/irq-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -12,6 +12,8 @@
 #include <asm/io_apic.h>
 #include <asm/irq.h>
 #include <asm/idle.h>
+#include <asm/mce.h>
+#include <asm/hw_irq.h>
 
 atomic_t irq_err_count;
 
@@ -26,9 +28,10 @@ void (*generic_interrupt_extension)(void
  */
 void ack_bad_irq(unsigned int irq)
 {
-	printk(KERN_ERR "unexpected IRQ trap at irq %02x\n", irq);
+	if (printk_ratelimit())
+		pr_err("unexpected IRQ trap at vector %02x\n", irq);
 
-#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
+#ifndef CONFIG_XEN
 	/*
 	 * Currently unexpected vectors happen only on SMP and APIC.
 	 * We _must_ ack these because every local APIC has only N
@@ -38,8 +41,7 @@ void ack_bad_irq(unsigned int irq)
 	 * completely.
 	 * But only ack when the APIC is enabled -AK
 	 */
-	if (cpu_has_apic)
-		ack_APIC_irq();
+	ack_APIC_irq();
 #endif
 }
 
@@ -65,6 +67,14 @@ static int show_other_interrupts(struct 
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
 	seq_printf(p, "  Spurious interrupts\n");
+	seq_printf(p, "%*s: ", prec, "CNT");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
+	seq_printf(p, "  Performance counter interrupts\n");
+	seq_printf(p, "%*s: ", prec, "PND");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
+	seq_printf(p, "  Performance pending work\n");
 #endif
 #ifndef CONFIG_XEN
 	if (generic_interrupt_extension) {
@@ -95,17 +105,27 @@ static int show_other_interrupts(struct 
 	seq_printf(p, "  Spinlock wakeups\n");
 #endif
 #endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
 	seq_printf(p, "%*s: ", prec, "TRM");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
 	seq_printf(p, "  Thermal event interrupts\n");
-# ifdef CONFIG_X86_64
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
 	seq_printf(p, "%*s: ", prec, "THR");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
 	seq_printf(p, "  Threshold APIC interrupts\n");
-# endif
+#endif
+#ifdef CONFIG_X86_NEW_MCE
+	seq_printf(p, "%*s: ", prec, "MCE");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
+	seq_printf(p, "  Machine check exceptions\n");
+	seq_printf(p, "%*s: ", prec, "MCP");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
+	seq_printf(p, "  Machine check polls\n");
 #endif
 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
 #if defined(CONFIG_X86_IO_APIC)
@@ -177,6 +197,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 #ifdef CONFIG_X86_LOCAL_APIC
 	sum += irq_stats(cpu)->apic_timer_irqs;
 	sum += irq_stats(cpu)->irq_spurious_count;
+	sum += irq_stats(cpu)->apic_perf_irqs;
+	sum += irq_stats(cpu)->apic_pending_irqs;
 #endif
 #ifndef CONFIG_XEN
 	if (generic_interrupt_extension)
@@ -191,11 +213,15 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += irq_stats(cpu)->irq_lock_count;
 #endif
 #endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
 	sum += irq_stats(cpu)->irq_thermal_count;
-# ifdef CONFIG_X86_64
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
 	sum += irq_stats(cpu)->irq_threshold_count;
 #endif
+#ifdef CONFIG_X86_NEW_MCE
+	sum += per_cpu(mce_exception_count, cpu);
+	sum += per_cpu(mce_poll_count, cpu);
 #endif
 	return sum;
 }
@@ -231,14 +257,11 @@ unsigned int __irq_entry do_IRQ(struct p
 	irq = __get_cpu_var(vector_irq)[vector];
 
 	if (!handle_irq(irq, regs)) {
-#ifdef CONFIG_X86_64
-		if (!disable_apic)
-			ack_APIC_irq();
-#endif
+		ack_APIC_irq();
 
 		if (printk_ratelimit())
-			printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n",
-			       __func__, smp_processor_id(), vector, irq);
+			pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n",
+				__func__, smp_processor_id(), vector, irq);
 	}
 
 	irq_exit();
--- 12.2.orig/arch/x86/kernel/microcode_core-xen.c	2011-12-01 15:03:30.000000000 +0100
+++ 12.2/arch/x86/kernel/microcode_core-xen.c	2011-12-01 15:33:41.000000000 +0100
@@ -22,27 +22,21 @@
  *	2 of the License, or (at your option) any later version.
  */
 #include <linux/platform_device.h>
-#include <linux/capability.h>
 #include <linux/miscdevice.h>
-#include <linux/firmware.h>
+#include <linux/capability.h>
 #include <linux/smp_lock.h>
-#include <linux/spinlock.h>
-#include <linux/cpumask.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/firmware.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
 
 #include <asm/microcode.h>
 #include <asm/processor.h>
-#include <asm/msr.h>
 
 MODULE_DESCRIPTION("Microcode Update Driver");
 MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -53,7 +47,18 @@ module_param(verbose, int, 0644);
 
 #define MICROCODE_VERSION	"2.00-xen"
 
-/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+/*
+ * Synchronization.
+ *
+ * All non cpu-hotplug-callback call sites use:
+ *
+ * - microcode_mutex to synchronize with each other;
+ * - get/put_online_cpus() to synchronize with
+ *   the cpu-hotplug-callback call sites.
+ *
+ * We guarantee that only a single cpu is being
+ * updated at any particular moment of time.
+ */
 static DEFINE_MUTEX(microcode_mutex);
 
 #ifdef CONFIG_MICROCODE_OLD_INTERFACE
@@ -90,18 +95,16 @@ static int microcode_open(struct inode *
 static ssize_t microcode_write(struct file *file, const char __user *buf,
 			       size_t len, loff_t *ppos)
 {
-	ssize_t ret;
+	ssize_t ret = -EINVAL;
 
 	if ((len >> PAGE_SHIFT) > num_physpages) {
-		printk(KERN_ERR "microcode: too much data (max %ld pages)\n",
-		       num_physpages);
- 		return -EINVAL;
+		pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
+		return ret;
  	}
 
 	mutex_lock(&microcode_mutex);
 
-	ret = do_microcode_update(buf, len);
-	if (!ret)
+	if (do_microcode_update(buf, len) == 0)
 		ret = (ssize_t)len;
 
 	mutex_unlock(&microcode_mutex);
@@ -110,15 +113,16 @@ static ssize_t microcode_write(struct fi
 }
 
 static const struct file_operations microcode_fops = {
-	.owner		= THIS_MODULE,
-	.write		= microcode_write,
-	.open		= microcode_open,
+	.owner			= THIS_MODULE,
+	.write			= microcode_write,
+	.open			= microcode_open,
 };
 
 static struct miscdevice microcode_dev = {
-	.minor		= MICROCODE_MINOR,
-	.name		= "microcode",
-	.fops		= &microcode_fops,
+	.minor			= MICROCODE_MINOR,
+	.name			= "microcode",
+	.devnode		= "cpu/microcode",
+	.fops			= &microcode_fops,
 };
 
 static int __init microcode_dev_init(void)
@@ -130,16 +134,14 @@ static int __init microcode_dev_init(voi
 
 	error = misc_register(&microcode_dev);
 	if (error) {
-		printk(KERN_ERR
-			"microcode: can't misc_register on minor=%d\n",
-			MICROCODE_MINOR);
+		pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR);
 		return error;
 	}
 
 	return 0;
 }
 
-static void microcode_dev_exit(void)
+static void __exit microcode_dev_exit(void)
 {
 	misc_deregister(&microcode_dev);
 }
@@ -191,38 +193,36 @@ static int __init microcode_init(void)
 	else if (c->x86_vendor == X86_VENDOR_AMD)
 		fw_name = "amd-ucode/microcode_amd.bin";
 	else {
-		printk(KERN_ERR "microcode: no support for this CPU vendor\n");
+		pr_err("microcode: no support for this CPU vendor\n");
 		return -ENODEV;
 	}
 
-	error = microcode_dev_init();
-	if (error)
-		return error;
 	microcode_pdev = platform_device_register_simple("microcode", -1,
 							 NULL, 0);
-	if (IS_ERR(microcode_pdev)) {
-		microcode_dev_exit();
+	if (IS_ERR(microcode_pdev))
 		return PTR_ERR(microcode_pdev);
-	}
 
 	request_microcode(fw_name);
 
-	printk(KERN_INFO
-	       "Microcode Update Driver: v" MICROCODE_VERSION
+	error = microcode_dev_init();
+	if (error) {
+		platform_device_unregister(microcode_pdev);
+		return error;
+	}
+
+	pr_info("Microcode Update Driver: v" MICROCODE_VERSION
 	       " <tigran@aivazian.fsnet.co.uk>,"
 	       " Peter Oruba\n");
 
 	return 0;
 }
+module_init(microcode_init);
 
 static void __exit microcode_exit(void)
 {
 	microcode_dev_exit();
 	platform_device_unregister(microcode_pdev);
 
-	printk(KERN_INFO
-	       "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
+	pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
 }
-
-module_init(microcode_init);
 module_exit(microcode_exit);
--- 12.2.orig/arch/x86/kernel/mpparse-xen.c	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/kernel/mpparse-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -17,6 +17,7 @@
 #include <linux/acpi.h>
 #include <linux/module.h>
 #include <linux/smp.h>
+#include <linux/pci.h>
 
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
@@ -904,24 +905,17 @@ static
 inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
 #endif /* CONFIG_X86_IO_APIC */
 
-static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
-		      int count)
+static int
+check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
 {
-	if (!mpc_new_phys) {
-		pr_info("No spare slots, try to append...take your risk, "
-			"new mpc_length %x\n", count);
-	} else {
-		if (count <= mpc_new_length)
-			pr_info("No spare slots, try to append..., "
-				"new mpc_length %x\n", count);
-		else {
-			pr_err("mpc_new_length %lx is too small\n",
-				mpc_new_length);
-			return -1;
-		}
+	int ret = 0;
+
+	if (!mpc_new_phys || count <= mpc_new_length) {
+		WARN(1, "update_mptable: No spare slots (length: %x)\n", count);
+		return -1;
 	}
 
-	return 0;
+	return ret;
 }
 
 static int  __init replace_intsrc_all(struct mpc_table *mpc,
@@ -980,7 +974,7 @@ static int  __init replace_intsrc_all(st
 		} else {
 			struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
 			count += sizeof(struct mpc_intsrc);
-			if (!check_slot(mpc_new_phys, mpc_new_length, count))
+			if (check_slot(mpc_new_phys, mpc_new_length, count) < 0)
 				goto out;
 			assign_to_mpc_intsrc(&mp_irqs[i], m);
 			mpc->length = count;
@@ -997,11 +991,14 @@ out:
 	return 0;
 }
 
-static int __initdata enable_update_mptable;
+int enable_update_mptable;
 
 static int __init update_mptable_setup(char *str)
 {
 	enable_update_mptable = 1;
+#ifdef CONFIG_PCI
+	pci_routeirq = 1;
+#endif
 	return 0;
 }
 early_param("update_mptable", update_mptable_setup);
@@ -1014,6 +1011,9 @@ static int __initdata alloc_mptable;
 static int __init parse_alloc_mptable_opt(char *p)
 {
 	enable_update_mptable = 1;
+#ifdef CONFIG_PCI
+	pci_routeirq = 1;
+#endif
 	alloc_mptable = 1;
 	if (!p)
 		return 0;
--- 12.2.orig/arch/x86/kernel/pci-dma-xen.c	2012-04-04 14:08:49.000000000 +0200
+++ 12.2/arch/x86/kernel/pci-dma-xen.c	2012-04-04 14:30:36.000000000 +0200
@@ -28,6 +28,8 @@ int no_iommu __initdata;
 #ifndef CONFIG_XEN
 /* Set this to 1 if there is a HW IOMMU in the system */
 int iommu_detected __read_mostly = 0;
+
+int iommu_pass_through;
 #endif
 
 dma_addr_t bad_dma_address __read_mostly = 0;
@@ -262,8 +264,12 @@ static __init int iommu_setup(char *p)
 		if (!strncmp(p, "soft", 4))
 			swiotlb = 1;
 #endif
-
 #ifndef CONFIG_XEN
+		if (!strncmp(p, "pt", 2)) {
+			iommu_pass_through = 1;
+			return 1;
+		}
+
 		gart_parse_options(p);
 #endif
 
@@ -371,6 +377,8 @@ static int __init pci_iommu_init(void)
 void pci_iommu_shutdown(void)
 {
 	gart_iommu_shutdown();
+
+	amd_iommu_shutdown();
 }
 /* Must execute after PCI subsystem */
 fs_initcall(pci_iommu_init);
--- 12.2.orig/arch/x86/kernel/process-xen.c	2011-03-03 16:06:40.000000000 +0100
+++ 12.2/arch/x86/kernel/process-xen.c	2011-03-03 16:07:25.000000000 +0100
@@ -8,12 +8,15 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/clockchips.h>
+#include <linux/random.h>
 #include <trace/power.h>
 #include <asm/system.h>
 #include <asm/apic.h>
+#include <asm/syscalls.h>
 #include <asm/idle.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
+#include <asm/ds.h>
 #include <xen/evtchn.h>
 
 unsigned long idle_halt;
@@ -46,6 +49,8 @@ void free_thread_xstate(struct task_stru
 		kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
 		tsk->thread.xstate = NULL;
 	}
+
+	WARN(tsk->thread.ds_ctx, "leaking DS context\n");
 }
 
 void free_thread_info(struct thread_info *ti)
@@ -59,7 +64,7 @@ void arch_task_cache_init(void)
         task_xstate_cachep =
         	kmem_cache_create("task_xstate", xstate_size,
 				  __alignof__(union thread_xstate),
-				  SLAB_PANIC, NULL);
+				  SLAB_PANIC | SLAB_NOTRACK, NULL);
 }
 
 /*
@@ -85,8 +90,6 @@ void exit_thread(void)
 		t->io_bitmap_max = 0;
 		kfree(bp);
 	}
-
-	ds_exit_thread(current);
 }
 
 void flush_thread(void)
@@ -471,16 +474,12 @@ static void c1e_idle(void)
 		if (!cpumask_test_cpu(cpu, c1e_mask)) {
 			cpumask_set_cpu(cpu, c1e_mask);
 			/*
-			 * Force broadcast so ACPI can not interfere. Needs
-			 * to run with interrupts enabled as it uses
-			 * smp_function_call.
+			 * Force broadcast so ACPI can not interfere.
 			 */
-			local_irq_enable();
 			clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
 					   &cpu);
 			printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
 			       cpu);
-			local_irq_disable();
 		}
 		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
 
@@ -575,3 +574,16 @@ static int __init idle_setup(char *str)
 }
 early_param("idle", idle_setup);
 
+unsigned long arch_align_stack(unsigned long sp)
+{
+	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+		sp -= get_random_int() % 8192;
+	return sp & ~0xf;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long range_end = mm->brk + 0x02000000;
+	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
+
--- 12.2.orig/arch/x86/kernel/process_32-xen.c	2012-02-29 14:15:10.000000000 +0100
+++ 12.2/arch/x86/kernel/process_32-xen.c	2012-02-29 14:15:29.000000000 +0100
@@ -9,8 +9,6 @@
  * This file handles the architecture-dependent parts of process handling..
  */
 
-#include <stdarg.h>
-
 #include <linux/stackprotector.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
@@ -33,7 +31,6 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/ptrace.h>
-#include <linux/random.h>
 #include <linux/personality.h>
 #include <linux/tick.h>
 #include <linux/percpu.h>
@@ -299,7 +296,8 @@ int copy_thread(unsigned long clone_flag
 		p->thread.io_bitmap_max = 0;
 	}
 
-	ds_copy_thread(p, current);
+	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+	p->thread.ds_ctx = NULL;
 
 	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
 	p->thread.debugctlmsr = 0;
@@ -470,7 +468,7 @@ __switch_to(struct task_struct *prev_p, 
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_leave_lazy_cpu_mode();
+	arch_end_context_switch(next_p);
 
 	/* If the task has used fpu the last 5 timeslices, just do a full
 	 * restore of the math state immediately to avoid the trap; the
@@ -560,15 +558,3 @@ unsigned long get_wchan(struct task_stru
 	return 0;
 }
 
-unsigned long arch_align_stack(unsigned long sp)
-{
-	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() % 8192;
-	return sp & ~0xf;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long range_end = mm->brk + 0x02000000;
-	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
-}
--- 12.2.orig/arch/x86/kernel/process_64-xen.c	2011-02-02 08:37:17.000000000 +0100
+++ 12.2/arch/x86/kernel/process_64-xen.c	2011-02-02 08:37:47.000000000 +0100
@@ -17,8 +17,6 @@
  * This file handles the architecture-dependent parts of process handling..
  */
 
-#include <stdarg.h>
-
 #include <linux/stackprotector.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
@@ -35,7 +33,6 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
-#include <linux/random.h>
 #include <linux/notifier.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
@@ -344,7 +341,8 @@ int copy_thread(unsigned long clone_flag
 	}
         p->thread.iopl = current->thread.iopl;
 
-	ds_copy_thread(p, me);
+	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+	p->thread.ds_ctx = NULL;
 
 	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
 	p->thread.debugctlmsr = 0;
@@ -506,7 +504,7 @@ __switch_to(struct task_struct *prev_p, 
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_leave_lazy_cpu_mode();
+	arch_end_context_switch(next_p);
 
 	/*
 	 * Switch FS and GS.
@@ -723,15 +721,3 @@ long sys_arch_prctl(int code, unsigned l
 	return do_arch_prctl(current, code, addr);
 }
 
-unsigned long arch_align_stack(unsigned long sp)
-{
-	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() % 8192;
-	return sp & ~0xf;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long range_end = mm->brk + 0x02000000;
-	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
-}
--- 12.2.orig/arch/x86/kernel/setup-xen.c	2012-06-06 14:01:47.000000000 +0200
+++ 12.2/arch/x86/kernel/setup-xen.c	2012-06-06 14:02:35.000000000 +0200
@@ -142,6 +142,14 @@ EXPORT_SYMBOL(xen_start_info);
 #define ARCH_SETUP
 #endif
 
+/*
+ * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
+ * The direct mapping extends to max_pfn_mapped, so that we can directly access
+ * apertures, ACPI and other tables without having to play with fixmaps.
+ */
+unsigned long max_low_pfn_mapped;
+unsigned long max_pfn_mapped;
+
 RESERVE_BRK(dmi_alloc, 65536);
 
 unsigned int boot_cpu_id __read_mostly;
@@ -247,8 +255,8 @@ unsigned long mmu_cr4_features;
 unsigned long mmu_cr4_features = X86_CR4_PAE;
 #endif
 
-/* Boot loader ID as an integer, for the benefit of proc_dointvec */
-int bootloader_type;
+/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
+int bootloader_type, bootloader_version;
 
 /*
  * Setup options
@@ -316,6 +324,20 @@ void * __init extend_brk(size_t size, si
 	return ret;
 }
 
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
+static void __init init_gbpages(void)
+{
+	if (direct_gbpages && cpu_has_gbpages)
+		printk(KERN_INFO "Using GB pages for direct mapping\n");
+	else
+		direct_gbpages = 0;
+}
+#else
+static inline void init_gbpages(void)
+{
+}
+#endif
+
 static void __init reserve_brk(void)
 {
 	if (_brk_end > _brk_start)
@@ -328,15 +350,13 @@ static void __init reserve_brk(void)
 
 #ifdef CONFIG_BLK_DEV_INITRD
 
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
-
 #define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
 static void __init relocate_initrd(void)
 {
-
+#ifndef CONFIG_XEN
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
-	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
 	u64 ramdisk_here;
 	unsigned long slop, clen, mapaddr;
 	char *p, *q;
@@ -391,8 +411,14 @@ static void __init relocate_initrd(void)
 		" %08llx - %08llx\n",
 		ramdisk_image, ramdisk_image + ramdisk_size - 1,
 		ramdisk_here, ramdisk_here + ramdisk_size - 1);
-}
+#else
+	printk(KERN_ERR "initrd extends beyond end of memory "
+	       "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+	       __pa(xen_start_info->mod_start) + xen_start_info->mod_len,
+	       max_low_pfn_mapped << PAGE_SHIFT);
+	initrd_start = 0;
 #endif
+}
 
 static void __init reserve_initrd(void)
 {
@@ -400,7 +426,7 @@ static void __init reserve_initrd(void)
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
 	u64 ramdisk_end   = ramdisk_image + ramdisk_size;
-	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
 
 	if (!boot_params.hdr.type_of_loader ||
 	    !ramdisk_image || !ramdisk_size)
@@ -409,7 +435,7 @@ static void __init reserve_initrd(void)
 	unsigned long ramdisk_image = __pa(xen_start_info->mod_start);
 	unsigned long ramdisk_size  = xen_start_info->mod_len;
 	unsigned long ramdisk_end   = ramdisk_image + ramdisk_size;
-	unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+	unsigned long end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
 
 	if (!xen_start_info->mod_start || !ramdisk_size)
 		return;		/* No initrd provided by bootloader */
@@ -442,14 +468,8 @@ static void __init reserve_initrd(void)
 		return;
 	}
 
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
 	relocate_initrd();
-#else
-	printk(KERN_ERR "initrd extends beyond end of memory "
-	       "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
-	       ramdisk_end, end_of_lowmem);
-	initrd_start = 0;
-#endif
+
 	free_early(ramdisk_image, ramdisk_end);
 }
 #else
@@ -721,6 +741,19 @@ static struct dmi_system_id __initdata b
 			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
 		},
 	},
+	{
+	/*
+	 * AMI BIOS with low memory corruption was found on Intel DG45ID board.
+	 * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
+	 * match only DMI_BOARD_NAME and see if there is more bad products
+	 * with this vendor.
+	 */
+		.callback = dmi_low_memory_corruption,
+		.ident = "AMI BIOS",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
+		},
+	},
 #endif
 	{}
 };
@@ -788,6 +821,12 @@ void __init setup_arch(char **cmdline_p)
 #endif
 	saved_video_mode = boot_params.hdr.vid_mode;
 	bootloader_type = boot_params.hdr.type_of_loader;
+	if ((bootloader_type >> 4) == 0xe) {
+		bootloader_type &= 0xf;
+		bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4;
+	}
+	bootloader_version  = bootloader_type & 0xf;
+	bootloader_version |= boot_params.hdr.ext_loader_ver << 4;
 
 #ifdef CONFIG_BLK_DEV_RAM
 	rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
@@ -970,14 +1009,22 @@ void __init setup_arch(char **cmdline_p)
 		max_low_pfn = max_pfn;
 
 	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+#ifndef CONFIG_XEN
+	max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
+#endif
 #endif
 
 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
 	setup_bios_corruption_check();
 #endif
 
+	printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
+			max_pfn_mapped<<PAGE_SHIFT);
+
 	reserve_brk();
 
+	init_gbpages();
+
 	/* max_pfn_mapped is updated here */
 	max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
 	max_pfn_mapped = max_low_pfn_mapped;
@@ -1209,24 +1256,6 @@ void __init setup_arch(char **cmdline_p)
 #if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
 
 /**
- * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
- *
- * Description:
- *	Perform any necessary interrupt initialisation prior to setting up
- *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
- *	interrupts should be initialised here if the machine emulates a PC
- *	in any way.
- **/
-void __init x86_quirk_pre_intr_init(void)
-{
-	if (x86_quirks->arch_pre_intr_init) {
-		if (x86_quirks->arch_pre_intr_init())
-			return;
-	}
-	init_ISA_irqs();
-}
-
-/**
  * x86_quirk_intr_init - post gate setup interrupt initialisation
  *
  * Description:
--- 12.2.orig/arch/x86/kernel/smp-xen.c	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/kernel/smp-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -135,11 +135,36 @@ void xen_send_call_func_ipi(const struct
  * this function calls the 'stop' function on all other CPUs in the system.
  */
 
+irqreturn_t smp_reboot_interrupt(int irq, void *dev_id)
+{
+	stop_this_cpu(NULL);
+
+	return IRQ_HANDLED;
+}
+
 void xen_smp_send_stop(void)
 {
 	unsigned long flags;
+	unsigned long wait;
+
+	/*
+	 * Use an own vector here because smp_call_function
+	 * does lots of things not suitable in a panic situation.
+	 * On most systems we could also use an NMI here,
+	 * but there are a few systems around where NMI
+	 * is problematic so stay with an non NMI for now
+	 * (this implies we cannot stop CPUs spinning with irq off
+	 * currently)
+	 */
+	if (num_online_cpus() > 1) {
+		xen_send_IPI_allbutself(REBOOT_VECTOR);
+
+		/* Don't wait longer than a second */
+		wait = USEC_PER_SEC;
+		while (num_online_cpus() > 1 && wait--)
+			udelay(1);
+	}
 
-	smp_call_function(stop_this_cpu, NULL, 0);
 	local_irq_save(flags);
 	disable_all_local_evtchn();
 	local_irq_restore(flags);
--- 12.2.orig/arch/x86/kernel/traps-xen.c	2012-06-20 12:15:58.000000000 +0200
+++ 12.2/arch/x86/kernel/traps-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -45,6 +45,7 @@
 #include <linux/edac.h>
 #endif
 
+#include <asm/kmemcheck.h>
 #include <asm/stacktrace.h>
 #include <asm/processor.h>
 #include <asm/debugreg.h>
@@ -53,6 +54,7 @@
 #include <asm/traps.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
+#include <asm/mce.h>
 
 #include <asm/mach_traps.h>
 
@@ -64,8 +66,6 @@
 #include <asm/setup.h>
 #include <asm/traps.h>
 
-#include "cpu/mcheck/mce.h"
-
 asmlinkage int system_call(void);
 
 /* Do we ignore FPU interrupts ? */
@@ -347,6 +347,9 @@ io_check_error(unsigned char reason, str
 	printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
 	show_registers(regs);
 
+	if (panic_on_io_nmi)
+		panic("NMI IOCK error: Not continuing");
+
 	/* Re-enable the IOCK line, wait for a few seconds */
 	clear_io_check_error(reason);
 }
@@ -527,6 +530,10 @@ dotraplinkage void __kprobes do_debug(st
 
 	get_debugreg(condition, 6);
 
+	/* Catch kmemcheck conditions first of all! */
+	if (condition & DR_STEP && kmemcheck_trap(regs))
+		return;
+
 	/*
 	 * The processor cleared BTF, so don't mark that we need it set.
 	 */
@@ -792,15 +799,15 @@ unsigned long patch_espfix_desc(unsigned
 
 	return new_kesp;
 }
-#else
+#endif
+
 asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 {
 }
 
-asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
+asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
 {
 }
-#endif
 #endif /* CONFIG_XEN */
 
 /*
@@ -834,9 +841,6 @@ asmlinkage void math_state_restore(void)
 	}
 
 	/* NB. 'clts' is done for us by Xen during virtual trap. */
-#ifdef CONFIG_X86_32
-	restore_fpu(tsk);
-#else
 	/*
 	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
 	 */
@@ -845,7 +849,7 @@ asmlinkage void math_state_restore(void)
 		force_sig(SIGSEGV, tsk);
 		return;
 	}
-#endif
+
 	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
 	tsk->fpu_counter++;
 }
--- 12.2.orig/arch/x86/kernel/vsyscall_64-xen.c	2011-02-01 14:42:26.000000000 +0100
+++ 12.2/arch/x86/kernel/vsyscall_64-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -132,15 +132,7 @@ static __always_inline void do_vgettimeo
 			return;
 		}
 
-		/*
-		 * Surround the RDTSC by barriers, to make sure it's not
-		 * speculated to outside the seqlock critical section and
-		 * does not cause time warps:
-		 */
-		rdtsc_barrier();
 		now = vread();
-		rdtsc_barrier();
-
 		base = __vsyscall_gtod_data.clock.cycle_last;
 		mask = __vsyscall_gtod_data.clock.mask;
 		mult = __vsyscall_gtod_data.clock.mult;
--- 12.2.orig/arch/x86/mm/dump_pagetables-xen.c	2011-02-01 14:39:24.000000000 +0100
+++ 12.2/arch/x86/mm/dump_pagetables-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -173,13 +173,14 @@ static void note_page(struct seq_file *m
 		   st->current_address >= st->marker[1].start_address) {
 		const char *unit = units;
 		unsigned long delta;
+		int width = sizeof(unsigned long) * 2;
 
 		/*
 		 * Now print the actual finished series
 		 */
-		seq_printf(m, "0x%p-0x%p   ",
-			   (void *)st->start_address,
-			   (void *)st->current_address);
+		seq_printf(m, "0x%0*lx-0x%0*lx   ",
+			   width, st->start_address,
+			   width, st->current_address);
 
 		delta = (st->current_address - st->start_address) >> 10;
 		while (!(delta & 1023) && unit[1]) {
--- 12.2.orig/arch/x86/mm/fault-xen.c	2011-08-15 11:03:30.000000000 +0200
+++ 12.2/arch/x86/mm/fault-xen.c	2011-08-15 11:03:45.000000000 +0200
@@ -3,40 +3,18 @@
  *  Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
  *  Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
  */
-#include <linux/interrupt.h>
-#include <linux/mmiotrace.h>
-#include <linux/bootmem.h>
-#include <linux/compiler.h>
-#include <linux/highmem.h>
-#include <linux/kprobes.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/vt_kern.h>
-#include <linux/signal.h>
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-#include <linux/errno.h>
-#include <linux/magic.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/mman.h>
-#include <linux/tty.h>
-#include <linux/smp.h>
-#include <linux/mm.h>
-
-#include <asm-generic/sections.h>
-
-#include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
-#include <asm/segment.h>
-#include <asm/system.h>
-#include <asm/proto.h>
-#include <asm/traps.h>
-#include <asm/desc.h>
+#include <linux/magic.h>		/* STACK_END_MAGIC		*/
+#include <linux/sched.h>		/* test_thread_flag(), ...	*/
+#include <linux/kdebug.h>		/* oops_begin/end, ...		*/
+#include <linux/module.h>		/* search_exception_table	*/
+#include <linux/bootmem.h>		/* max_low_pfn			*/
+#include <linux/kprobes.h>		/* __kprobes, ...		*/
+#include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
+#include <linux/perf_counter.h>		/* perf_swcounter_event		*/
+
+#include <asm/traps.h>			/* dotraplinkage, ...		*/
+#include <asm/pgalloc.h>		/* pgd_*(), ...			*/
+#include <asm/kmemcheck.h>		/* kmemcheck_*(), ...		*/
 
 /*
  * Page fault error code bits:
@@ -229,10 +207,7 @@ static inline pmd_t *vmalloc_sync_one(pg
 	if (!pmd_present(*pmd_k))
 		return NULL;
 
-	if (!pmd_present(*pmd)) {
-		bool lazy = percpu_read(xen_lazy_mmu);
-
-		percpu_write(xen_lazy_mmu, false);
+	if (!pmd_present(*pmd))
 #if CONFIG_XEN_COMPAT > 0x030002
 		set_pmd(pmd, *pmd_k);
 #else
@@ -242,10 +217,8 @@ static inline pmd_t *vmalloc_sync_one(pg
 		 */
 		set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
 #endif
-		percpu_write(xen_lazy_mmu, lazy);
-	} else {
+	else
 		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
-	}
 
 	return pmd_k;
 }
@@ -475,10 +448,11 @@ static noinline int vmalloc_fault(unsign
 }
 
 static const char errata93_warning[] =
-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
-KERN_ERR "******* Please consider a BIOS update.\n"
-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+KERN_ERR
+"******* Your BIOS seems to not contain a fix for K8 errata #93\n"
+"******* Working around it, but it may cause SEGVs or burn power.\n"
+"******* Please consider a BIOS update.\n"
+"******* Disabling USB legacy in the BIOS may also help.\n";
 
 /*
  * No vm86 mode in 64-bit mode:
@@ -563,8 +537,6 @@ bad:
 static int is_errata93(struct pt_regs *regs, unsigned long address)
 {
 #ifdef CONFIG_X86_64
-	static int once;
-
 	if (address != regs->ip)
 		return 0;
 
@@ -574,10 +546,7 @@ static int is_errata93(struct pt_regs *r
 	address |= 0xffffffffUL << 32;
 	if ((address >= (u64)_stext && address <= (u64)_etext) ||
 	    (address >= MODULES_VADDR && address <= MODULES_END)) {
-		if (!once) {
-			printk(errata93_warning);
-			once = 1;
-		}
+		printk_once(errata93_warning);
 		regs->ip = address;
 		return 1;
 	}
@@ -751,7 +720,7 @@ show_signal_msg(struct pt_regs *regs, un
 	if (!printk_ratelimit())
 		return;
 
-	printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+	printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
 		task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
 		tsk->comm, task_pid_nr(tsk), address,
 		(void *)regs->ip, (void *)regs->sp, error_code);
@@ -1013,11 +982,17 @@ do_page_fault(struct pt_regs *regs, unsi
 	tsk = current;
 	mm = tsk->mm;
 
-	prefetchw(&mm->mmap_sem);
-
 	/* Get the faulting address: */
 	address = read_cr2();
 
+	/*
+	 * Detect and handle instructions that would cause a page fault for
+	 * both a tracked kernel page and a userspace page.
+	 */
+	if (kmemcheck_active(regs))
+		kmemcheck_hide(regs);
+	prefetchw(&mm->mmap_sem);
+
 	if (unlikely(kmmio_fault(regs, address)))
 		return;
 
@@ -1046,9 +1021,13 @@ do_page_fault(struct pt_regs *regs, unsi
 			return;
 		}
 
-		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
-		    vmalloc_fault(address) >= 0)
-			return;
+		if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
+			if (vmalloc_fault(address) >= 0)
+				return;
+
+			if (kmemcheck_fault(regs, address, error_code))
+				return;
+		}
 
 		/* Can handle a stale RO->RW TLB: */
 		if (spurious_fault(error_code, address))
@@ -1087,6 +1066,8 @@ do_page_fault(struct pt_regs *regs, unsi
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
+	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+
 	/*
 	 * If we're in an interrupt, have no user context or are running
 	 * in an atomic region then we must not take the fault:
@@ -1173,17 +1154,22 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault:
 	 */
-	fault = handle_mm_fault(mm, vma, address, write);
+	fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
 
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		mm_fault_error(regs, error_code, address, fault);
 		return;
 	}
 
-	if (fault & VM_FAULT_MAJOR)
+	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-	else
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+				     regs, address);
+	} else {
 		tsk->min_flt++;
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+				     regs, address);
+	}
 
 	check_v8086_mode(regs, address, tsk);
 
--- 12.2.orig/arch/x86/mm/highmem_32-xen.c	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/mm/highmem_32-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -44,7 +44,6 @@ void *kmap_atomic_prot(struct page *page
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	BUG_ON(!pte_none(*(kmap_pte-idx)));
 	set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
-	/*arch_flush_lazy_mmu_mode();*/
 
 	return (void *)vaddr;
 }
@@ -74,7 +73,6 @@ void kunmap_atomic(void *kvaddr, enum km
 #endif
 	}
 
-	/*arch_flush_lazy_mmu_mode();*/
 	pagefault_enable();
 }
 
@@ -150,6 +148,7 @@ EXPORT_SYMBOL(kmap);
 EXPORT_SYMBOL(kunmap);
 EXPORT_SYMBOL(kmap_atomic);
 EXPORT_SYMBOL(kunmap_atomic);
+EXPORT_SYMBOL(kmap_atomic_prot);
 #ifdef CONFIG_HIGHPTE
 EXPORT_SYMBOL(kmap_atomic_to_page);
 #endif
--- 12.2.orig/arch/x86/mm/hypervisor.c	2012-05-31 14:44:38.000000000 +0200
+++ 12.2/arch/x86/mm/hypervisor.c	2011-03-23 09:59:29.000000000 +0100
@@ -116,8 +116,8 @@ static int _xen_multicall_flush(bool ret
 	return 0;
 }
 
-void xen_multicall_flush(bool force) {
-	if (force || use_lazy_mmu_mode())
+void xen_multicall_flush(void) {
+	if (use_lazy_mmu_mode())
 		_xen_multicall_flush(false);
 }
 
--- 12.2.orig/arch/x86/mm/init-xen.c	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/mm/init-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -1,3 +1,4 @@
+#include <linux/initrd.h>
 #include <linux/ioport.h>
 #include <linux/swap.h>
 #include <linux/bootmem.h>
@@ -11,6 +12,10 @@
 #include <asm/setup.h>
 #include <asm/system.h>
 #include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/proto.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 unsigned long __meminitdata e820_table_start;
 unsigned long __meminitdata e820_table_end;
@@ -31,6 +36,69 @@ extern unsigned long extend_init_mapping
 extern void xen_finish_init_mapping(void);
 #endif
 
+int nx_enabled;
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+static int disable_nx __cpuinitdata;
+
+/*
+ * noexec = on|off
+ *
+ * Control non-executable mappings for processes.
+ *
+ * on      Enable
+ * off     Disable
+ */
+static int __init noexec_setup(char *str)
+{
+	if (!str)
+		return -EINVAL;
+	if (!strncmp(str, "on", 2)) {
+		__supported_pte_mask |= _PAGE_NX;
+		disable_nx = 0;
+	} else if (!strncmp(str, "off", 3)) {
+		disable_nx = 1;
+		__supported_pte_mask &= ~_PAGE_NX;
+	}
+	return 0;
+}
+early_param("noexec", noexec_setup);
+#endif
+
+#ifdef CONFIG_X86_PAE
+static void __init set_nx(void)
+{
+	unsigned int v[4], l, h;
+
+	if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
+		cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
+
+		if ((v[3] & (1 << 20)) && !disable_nx) {
+			rdmsr(MSR_EFER, l, h);
+			l |= EFER_NX;
+			wrmsr(MSR_EFER, l, h);
+			nx_enabled = 1;
+			__supported_pte_mask |= _PAGE_NX;
+		}
+	}
+}
+#else
+static inline void set_nx(void)
+{
+}
+#endif
+
+#ifdef CONFIG_X86_64
+void __cpuinit check_efer(void)
+{
+	unsigned long efer;
+
+	rdmsrl(MSR_EFER, efer);
+	if (!(efer & EFER_NX) || disable_nx)
+		__supported_pte_mask &= ~_PAGE_NX;
+}
+#endif
+
 static void __init find_early_table_space(unsigned long end, int use_pse,
 					  int use_gbpages)
 {
@@ -127,20 +195,6 @@ static int __meminit save_mr(struct map_
 	return nr_range;
 }
 
-#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
-static void __init init_gbpages(void)
-{
-	if (direct_gbpages && cpu_has_gbpages)
-		printk(KERN_INFO "Using GB pages for direct mapping\n");
-	else
-		direct_gbpages = 0;
-}
-#else
-static inline void init_gbpages(void)
-{
-}
-#endif
-
 /*
  * Setup the direct mapping of the physical memory at PAGE_OFFSET.
  * This runs before bootmem is initialized and gets pages directly from
@@ -160,10 +214,7 @@ unsigned long __init_refok init_memory_m
 
 	printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
 
-	if (!after_bootmem)
-		init_gbpages();
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
 	/*
 	 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
 	 * This will simplify cpa(), which otherwise needs to support splitting
@@ -175,12 +226,9 @@ unsigned long __init_refok init_memory_m
 	use_gbpages = direct_gbpages;
 #endif
 
-#ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_PAE
 	set_nx();
 	if (nx_enabled)
 		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-#endif
 
 	/* Enable PSE if available */
 	if (cpu_has_pse)
@@ -191,7 +239,6 @@ unsigned long __init_refok init_memory_m
 		set_in_cr4(X86_CR4_PGE);
 		__supported_pte_mask |= _PAGE_GLOBAL;
 	}
-#endif
 
 	if (use_gbpages)
 		page_size_mask |= 1 << PG_LEVEL_1G;
--- 12.2.orig/arch/x86/mm/init_32-xen.c	2011-11-03 12:27:10.000000000 +0100
+++ 12.2/arch/x86/mm/init_32-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -52,12 +52,9 @@
 #include <asm/swiotlb.h>
 #include <asm/setup.h>
 #include <asm/cacheflush.h>
+#include <asm/page_types.h>
 #include <asm/init.h>
 
-unsigned long max_low_pfn_mapped;
-unsigned long max_pfn_mapped;
-
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 unsigned long highstart_pfn, highend_pfn;
 
 static noinline int do_test_wp_bit(void);
@@ -122,7 +119,7 @@ static pte_t * __init one_page_table_ini
 		pte_t *page_table = NULL;
 
 		if (after_bootmem) {
-#ifdef CONFIG_DEBUG_PAGEALLOC
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
 			page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
 #endif
 			if (!page_table)
@@ -569,7 +566,7 @@ static inline void save_pg_dir(void)
 }
 #endif /* !CONFIG_ACPI_SLEEP */
 
-void zap_low_mappings(void)
+void zap_low_mappings(bool early)
 {
 	int i;
 
@@ -586,64 +583,16 @@ void zap_low_mappings(void)
 		set_pgd(swapper_pg_dir+i, __pgd(0));
 #endif
 	}
-	flush_tlb_all();
-}
 
-int nx_enabled;
+	if (early)
+		__flush_tlb();
+	else
+		flush_tlb_all();
+}
 
 pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
-#ifdef CONFIG_X86_PAE
-
-static int disable_nx __initdata;
-
-/*
- * noexec = on|off
- *
- * Control non executable mappings.
- *
- * on      Enable
- * off     Disable
- */
-static int __init noexec_setup(char *str)
-{
-	if (!str || !strcmp(str, "on")) {
-		if (cpu_has_nx) {
-			__supported_pte_mask |= _PAGE_NX;
-			disable_nx = 0;
-		}
-	} else {
-		if (!strcmp(str, "off")) {
-			disable_nx = 1;
-			__supported_pte_mask &= ~_PAGE_NX;
-		} else {
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-early_param("noexec", noexec_setup);
-
-void __init set_nx(void)
-{
-	unsigned int v[4], l, h;
-
-	if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
-		cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
-
-		if ((v[3] & (1 << 20)) && !disable_nx) {
-			rdmsr(MSR_EFER, l, h);
-			l |= EFER_NX;
-			wrmsr(MSR_EFER, l, h);
-			nx_enabled = 1;
-			__supported_pte_mask |= _PAGE_NX;
-		}
-	}
-}
-#endif
-
 /* user-defined highmem size */
 static unsigned int highmem_pages = -1;
 
@@ -763,15 +712,15 @@ void __init initmem_init(unsigned long s
 	highstart_pfn = highend_pfn = max_pfn;
 	if (max_pfn > max_low_pfn)
 		highstart_pfn = max_low_pfn;
-	memory_present(0, 0, highend_pfn);
 	e820_register_active_regions(0, 0, highend_pfn);
+	sparse_memory_present_with_active_regions(0);
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 		pages_to_mb(highend_pfn - highstart_pfn));
 	num_physpages = highend_pfn;
 	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
-	memory_present(0, 0, max_low_pfn);
 	e820_register_active_regions(0, 0, max_low_pfn);
+	sparse_memory_present_with_active_regions(0);
 	num_physpages = max_low_pfn;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
@@ -1074,7 +1023,7 @@ void __init mem_init(void)
 		test_wp_bit();
 
 	save_pg_dir();
-	zap_low_mappings();
+	zap_low_mappings(true);
 
 	SetPagePinned(virt_to_page(init_mm.pgd));
 }
--- 12.2.orig/arch/x86/mm/init_64-xen.c	2011-11-03 12:27:12.000000000 +0100
+++ 12.2/arch/x86/mm/init_64-xen.c	2011-06-30 17:04:34.000000000 +0200
@@ -56,21 +56,11 @@
 
 #include <xen/features.h>
 
-/*
- * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
- * The direct mapping extends to max_pfn_mapped, so that we can directly access
- * apertures, ACPI and other tables without having to play with fixmaps.
- */
-unsigned long max_low_pfn_mapped;
-unsigned long max_pfn_mapped;
-
 #if CONFIG_XEN_COMPAT <= 0x030002
 unsigned int __kernel_page_user;
 EXPORT_SYMBOL(__kernel_page_user);
 #endif
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD];
 extern pte_t level1_fixmap_pgt[PTRS_PER_PTE];
 
@@ -151,39 +141,6 @@ early_param("gbpages", parse_direct_gbpa
 pteval_t __supported_pte_mask __read_mostly = ~0UL;
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
-static int disable_nx __cpuinitdata;
-
-/*
- * noexec=on|off
- * Control non-executable mappings for 64-bit processes.
- *
- * on	Enable (default)
- * off	Disable
- */
-static int __init nonx_setup(char *str)
-{
-	if (!str)
-		return -EINVAL;
-	if (!strncmp(str, "on", 2)) {
-		__supported_pte_mask |= _PAGE_NX;
-		disable_nx = 0;
-	} else if (!strncmp(str, "off", 3)) {
-		disable_nx = 1;
-		__supported_pte_mask &= ~_PAGE_NX;
-	}
-	return 0;
-}
-early_param("noexec", nonx_setup);
-
-void __cpuinit check_efer(void)
-{
-	unsigned long efer;
-
-	rdmsrl(MSR_EFER, efer);
-	if (!(efer & EFER_NX) || disable_nx)
-		__supported_pte_mask &= ~_PAGE_NX;
-}
-
 int force_personality32;
 
 /*
@@ -213,7 +170,7 @@ static __ref void *spp_getpage(void)
 	void *ptr;
 
 	if (after_bootmem)
-		ptr = (void *) get_zeroed_page(GFP_ATOMIC);
+		ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
 	else if (e820_table_end < e820_table_top) {
 		ptr = __va(e820_table_end << PAGE_SHIFT);
 		e820_table_end++;
@@ -399,7 +356,7 @@ static __ref void *alloc_low_page(unsign
 	void *adr;
 
 	if (after_bootmem) {
-		adr = (void *)get_zeroed_page(GFP_ATOMIC);
+		adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
 		*phys = __pa(adr);
 
 		return adr;
@@ -810,7 +767,7 @@ void __init xen_finish_init_mapping(void
 		e820_table_top = e820_table_end;
 }
 
-unsigned long __init
+unsigned long __meminit
 kernel_physical_mapping_init(unsigned long start,
 			     unsigned long end,
 			     unsigned long page_size_mask)
@@ -879,6 +836,7 @@ void __init initmem_init(unsigned long s
 	early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
 	reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
 }
+#endif
 
 void __init paging_init(void)
 {
@@ -889,13 +847,21 @@ void __init paging_init(void)
 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
 	max_zone_pfns[ZONE_NORMAL] = max_pfn;
 
-	memory_present(0, 0, max_pfn);
+	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
+
+	/*
+	 * clear the default setting with node 0
+	 * note: don't use nodes_clear here, that is really clearing when
+	 *	 numa support is not compiled in, and later node_set_state
+	 *	 will not set it back.
+	 */
+	node_clear_state(0, N_NORMAL_MEMORY);
+
 	free_area_init_nodes(max_zone_pfns);
 
 	SetPagePinned(virt_to_page(init_mm.pgd));
 }
-#endif
 
 /*
  * Memory hotplug specific functions
@@ -1090,7 +1056,7 @@ int __init reserve_bootmem_generic(unsig
 		return ret;
 
 #else
-	reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
+	reserve_bootmem(phys, len, flags);
 #endif
 
 #ifndef CONFIG_XEN
--- 12.2.orig/arch/x86/mm/iomap_32-xen.c	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/mm/iomap_32-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -84,7 +84,6 @@ iounmap_atomic(void *kvaddr, enum km_typ
 	if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
 		kpte_clear_flush(kmap_pte-idx, vaddr);
 
-	/*arch_flush_lazy_mmu_mode();*/
 	pagefault_enable();
 }
 EXPORT_SYMBOL_GPL(iounmap_atomic);
--- 12.2.orig/arch/x86/mm/pageattr-xen.c	2011-03-23 09:58:23.000000000 +0100
+++ 12.2/arch/x86/mm/pageattr-xen.c	2011-03-23 09:59:25.000000000 +0100
@@ -11,6 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/pfn.h>
 
 #include <asm/e820.h>
 #include <asm/processor.h>
@@ -486,7 +487,7 @@ static int split_large_page(pte_t *kpte,
 
 	if (!debug_pagealloc)
 		spin_unlock(&cpa_lock);
-	base = alloc_pages(GFP_KERNEL, 0);
+	base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
 	if (!debug_pagealloc)
 		spin_lock(&cpa_lock);
 	if (!base)
@@ -610,9 +611,12 @@ static int __change_page_attr(struct cpa
 	unsigned int level;
 	pte_t *kpte, old_pte;
 
-	if (cpa->flags & CPA_PAGES_ARRAY)
-		address = (unsigned long)page_address(cpa->pages[cpa->curpage]);
-	else if (cpa->flags & CPA_ARRAY)
+	if (cpa->flags & CPA_PAGES_ARRAY) {
+		struct page *page = cpa->pages[cpa->curpage];
+		if (unlikely(PageHighMem(page)))
+			return 0;
+		address = (unsigned long)page_address(page);
+	} else if (cpa->flags & CPA_ARRAY)
 		address = cpa->vaddr[cpa->curpage];
 	else
 		address = *cpa->vaddr;
@@ -724,8 +728,9 @@ static int __change_page_attr_set_clr(st
 static int cpa_process_alias(struct cpa_data *cpa)
 {
 	struct cpa_data alias_cpa;
-	int ret = 0;
-	unsigned long temp_cpa_vaddr, vaddr;
+	unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
+	unsigned long vaddr, remapped;
+	int ret;
 
 	if (cpa->pfn >= max_pfn_mapped)
 		return 0;
@@ -738,9 +743,12 @@ static int cpa_process_alias(struct cpa_
 	 * No need to redo, when the primary call touched the direct
 	 * mapping already:
 	 */
-	if (cpa->flags & CPA_PAGES_ARRAY)
-		vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]);
-	else if (cpa->flags & CPA_ARRAY)
+	if (cpa->flags & CPA_PAGES_ARRAY) {
+		struct page *page = cpa->pages[cpa->curpage];
+		if (unlikely(PageHighMem(page)))
+			return 0;
+		vaddr = (unsigned long)page_address(page);
+	} else if (cpa->flags & CPA_ARRAY)
 		vaddr = cpa->vaddr[cpa->curpage];
 	else
 		vaddr = *cpa->vaddr;
@@ -749,42 +757,55 @@ static int cpa_process_alias(struct cpa_
 		    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
 
 		alias_cpa = *cpa;
-		temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
-		alias_cpa.vaddr = &temp_cpa_vaddr;
+		alias_cpa.vaddr = &laddr;
 		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
 
-
 		ret = __change_page_attr_set_clr(&alias_cpa, 0);
+		if (ret)
+			return ret;
 	}
 
 #ifdef CONFIG_X86_64
-	if (ret)
-		return ret;
-	/*
-	 * No need to redo, when the primary call touched the high
-	 * mapping already:
-	 */
-	if (within(vaddr, (unsigned long) _text, _brk_end))
-		return 0;
-
 	/*
-	 * If the physical address is inside the kernel map, we need
+	 * If the primary call didn't touch the high mapping already
+	 * and the physical address is inside the kernel map, we need
 	 * to touch the high mapped kernel as well:
 	 */
-	if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn()))
-		return 0;
+	if (!within(vaddr, (unsigned long)_text, _brk_end) &&
+	    within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) {
+		unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
+					       __START_KERNEL_map;
+		alias_cpa = *cpa;
+		alias_cpa.vaddr = &temp_cpa_vaddr;
+		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
 
-	alias_cpa = *cpa;
-	temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map;
-	alias_cpa.vaddr = &temp_cpa_vaddr;
-	alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+		/*
+		 * The high mapping range is imprecise, so ignore the
+		 * return value.
+		 */
+		__change_page_attr_set_clr(&alias_cpa, 0);
+	}
+#endif
 
 	/*
-	 * The high mapping range is imprecise, so ignore the return value.
-	 */
-	__change_page_attr_set_clr(&alias_cpa, 0);
-#endif
-	return ret;
+	 * If the PMD page was partially used for per-cpu remapping,
+	 * the recycled area needs to be split and modified.  Because
+	 * the area is always proper subset of a PMD page
+	 * cpa->numpages is guaranteed to be 1 for these areas, so
+	 * there's no need to loop over and check for further remaps.
+	 */
+	remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
+	if (remapped) {
+		WARN_ON(cpa->numpages > 1);
+		alias_cpa = *cpa;
+		alias_cpa.vaddr = &remapped;
+		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+		ret = __change_page_attr_set_clr(&alias_cpa, 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
@@ -882,15 +903,6 @@ static int change_page_attr_set_clr(unsi
 
 	vm_unmap_aliases();
 
-	/*
-	 * If we're called with lazy mmu updates enabled, the
-	 * in-memory pte state may be stale.  Flush pending updates to
-	 * bring them up to date.
-	 *
-	arch_flush_lazy_mmu_mode();*/
-	if (arch_use_lazy_mmu_mode())
-		xen_multicall_flush(true);
-
 	cpa.vaddr = addr;
 	cpa.pages = pages;
 	cpa.numpages = numpages;
@@ -935,14 +947,6 @@ static int change_page_attr_set_clr(unsi
 	} else
 		cpa_flush_all(cache);
 
-	/*
-	 * If we've been called with lazy mmu updates enabled, then
-	 * make sure that everything gets flushed out before we
-	 * return.
-	 *
-	arch_flush_lazy_mmu_mode();*/
-	WARN_ON_ONCE(arch_use_lazy_mmu_mode() && !irq_count());
-
 out:
 	return ret;
 }
@@ -1087,12 +1091,15 @@ EXPORT_SYMBOL(set_memory_array_uc);
 int _set_memory_wc(unsigned long addr, int numpages)
 {
 	int ret;
+	unsigned long addr_copy = addr;
+
 	ret = change_page_attr_set(&addr, numpages,
 				    __pgprot(_PAGE_CACHE_UC_MINUS), 0);
-
 	if (!ret) {
-		ret = change_page_attr_set(&addr, numpages,
-				    __pgprot(_PAGE_CACHE_WC), 0);
+		ret = change_page_attr_set_clr(&addr_copy, numpages,
+					       __pgprot(_PAGE_CACHE_WC),
+					       __pgprot(_PAGE_CACHE_MASK),
+					       0, 0, NULL);
 	}
 	return ret;
 }
@@ -1209,7 +1216,9 @@ int set_pages_array_uc(struct page **pag
 	int free_idx;
 
 	for (i = 0; i < addrinarray; i++) {
-		start = (unsigned long)page_address(pages[i]);
+		if (PageHighMem(pages[i]))
+			continue;
+		start = page_to_pfn(pages[i]) << PAGE_SHIFT;
 		end = start + PAGE_SIZE;
 		if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
 			goto err_out;
@@ -1222,7 +1231,9 @@ int set_pages_array_uc(struct page **pag
 err_out:
 	free_idx = i;
 	for (i = 0; i < free_idx; i++) {
-		start = (unsigned long)page_address(pages[i]);
+		if (PageHighMem(pages[i]))
+			continue;
+		start = page_to_pfn(pages[i]) << PAGE_SHIFT;
 		end = start + PAGE_SIZE;
 		free_memtype(start, end);
 	}
@@ -1251,7 +1262,9 @@ int set_pages_array_wb(struct page **pag
 		return retval;
 
 	for (i = 0; i < addrinarray; i++) {
-		start = (unsigned long)page_address(pages[i]);
+		if (PageHighMem(pages[i]))
+			continue;
+		start = page_to_pfn(pages[i]) << PAGE_SHIFT;
 		end = start + PAGE_SIZE;
 		free_memtype(start, end);
 	}
--- 12.2.orig/arch/x86/mm/pat-xen.c	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/mm/pat-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -639,7 +639,8 @@ static int reserve_pfn_range(u64 paddr, 
 		return ret;
 
 	if (flags != want_flags) {
-		if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) {
+		if (strict_prot ||
+		    !is_new_memtype_allowed(paddr, size, want_flags, flags)) {
 			free_memtype(paddr, paddr + size);
 			printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
 				" for %Lx-%Lx, got %s\n",
--- 12.2.orig/arch/x86/mm/pgtable-xen.c	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/mm/pgtable-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -8,9 +8,11 @@
 #include <asm/hypervisor.h>
 #include <asm/mmu_context.h>
 
+#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte_t *pte = (pte_t *)__get_free_page(PGALLOC_GFP);
 	if (pte)
 		make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
 	return pte;
@@ -27,9 +29,9 @@ pgtable_t pte_alloc_one(struct mm_struct
 	struct page *pte;
 
 #ifdef CONFIG_HIGHPTE
-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
 #else
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(PGALLOC_GFP, 0);
 #endif
 	if (pte) {
 		pgtable_page_ctor(pte);
@@ -65,7 +67,7 @@ void __pte_free(pgtable_t pte)
 	__free_page(pte);
 }
 
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
+void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
 	pgtable_page_dtor(pte);
 	paravirt_release_pte(page_to_pfn(pte));
@@ -83,7 +85,7 @@ pmd_t *pmd_alloc_one(struct mm_struct *m
 {
 	struct page *pmd;
 
-	pmd = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	pmd = alloc_pages(PGALLOC_GFP, 0);
 	if (!pmd)
 		return NULL;
 	SetPageForeign(pmd, _pmd_free);
@@ -107,14 +109,14 @@ void __pmd_free(pgtable_t pmd)
 	__free_page(pmd);
 }
 
-void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 {
 	paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pmd));
 }
 
 #if PAGETABLE_LEVELS > 3
-void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
+void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
 {
 	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pud));
@@ -609,7 +611,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	pmd_t *pmds[PREALLOCATED_PMDS];
 	unsigned long flags;
 
-	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
+	pgd = (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ORDER);
 
 	if (pgd == NULL)
 		goto out;
--- 12.2.orig/arch/x86/pci/i386.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/arch/x86/pci/i386.c	2012-04-10 17:00:16.000000000 +0200
@@ -323,12 +323,14 @@ void __init pcibios_resource_survey(void
 	pcibios_allocate_resources(1);
 
 	e820_reserve_resources_late();
+#ifndef CONFIG_XEN
 	/*
 	 * Insert the IO APIC resources after PCI initialization has
 	 * occurred to handle IO APICS that are mapped in on a BAR in
 	 * PCI space, but before trying to assign unassigned pci res.
 	 */
 	ioapic_insert_resources();
+#endif
 }
 
 /**
--- 12.2.orig/arch/x86/pci/pcifront.c	2011-02-01 14:42:26.000000000 +0100
+++ 12.2/arch/x86/pci/pcifront.c	2011-02-01 14:50:44.000000000 +0100
@@ -6,6 +6,7 @@
  */
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/irq.h>
 #include <linux/pci.h>
 #include <asm/acpi.h>
 #include <asm/pci_x86.h>
@@ -15,6 +16,8 @@ static int pcifront_enable_irq(struct pc
 {
 	u8 irq;
 	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+	if (!irq_to_desc_alloc_node(irq, numa_node_id()))
+		return -ENOMEM;
 	evtchn_register_pirq(irq);
 	dev->irq = irq;
 
--- 12.2.orig/arch/x86/vdso/vdso32-setup-xen.c	2012-02-29 14:12:24.000000000 +0100
+++ 12.2/arch/x86/vdso/vdso32-setup-xen.c	2012-02-29 14:15:36.000000000 +0100
@@ -386,6 +386,8 @@ int arch_setup_additional_pages(struct l
 		}
 	}
 
+	current->mm->context.vdso = (void *)addr;
+
 	if (compat_uses_vma || !compat) {
 		/*
 		 * MAYWRITE to allow gdb to COW and set breakpoints
@@ -406,11 +408,13 @@ int arch_setup_additional_pages(struct l
 			goto up_fail;
 	}
 
-	current->mm->context.vdso = (void *)addr;
 	current_thread_info()->sysenter_return =
 		VDSO32_SYMBOL(addr, SYSENTER_RETURN);
 
   up_fail:
+	if (ret)
+		current->mm->context.vdso = NULL;
+
 	up_write(&mm->mmap_sem);
 
 	return ret;
--- 12.2.orig/drivers/acpi/processor_driver.c	2012-02-08 12:02:58.000000000 +0100
+++ 12.2/drivers/acpi/processor_driver.c	2012-02-08 12:13:33.000000000 +0100
@@ -338,7 +338,14 @@ static int acpi_processor_get_info(struc
 	 * generated as the following format:
 	 * CPU+CPU ID.
 	 */
-	sprintf(acpi_device_bid(device), "CPU%X", pr->id);
+	if (pr->id != -1)
+		sprintf(acpi_device_bid(device), "CPU%X", pr->id);
+	else
+		snprintf(acpi_device_bid(device),
+			 ARRAY_SIZE(acpi_device_bid(device)),
+			 "#%0*X",
+			 (int)ARRAY_SIZE(acpi_device_bid(device)) - 2,
+			 pr->acpi_id);
 	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id,
 			  pr->acpi_id));
 
--- 12.2.orig/drivers/char/agp/intel-gtt.c	2012-04-10 15:43:16.000000000 +0200
+++ 12.2/drivers/char/agp/intel-gtt.c	2012-04-10 17:00:06.000000000 +0200
@@ -281,7 +281,11 @@ static struct agp_memory *alloc_agpphysm
 	new->page_count = pg_count;
 	new->num_scratch_pages = pg_count;
 	new->type = AGP_PHYS_MEMORY;
+#ifndef CONFIG_XEN
 	new->physical = page_to_phys(new->pages[0]);
+#else
+	new->physical = page_to_pseudophys(new->pages[0]);
+#endif
 	return new;
 }
 
--- 12.2.orig/drivers/edac/Kconfig	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/edac/Kconfig	2011-11-17 11:40:04.000000000 +0100
@@ -74,6 +74,7 @@ config EDAC_MM_EDAC
 config EDAC_AMD64
 	tristate "AMD64 (Opteron, Athlon64) K8, F10h"
 	depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE
+	depends on !XEN
 	help
 	  Support for error detection and correction of DRAM ECC errors on
 	  the AMD64 families of memory controllers (K8 and F10h)
--- 12.2.orig/drivers/gpu/drm/ttm/ttm_bo.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/gpu/drm/ttm/ttm_bo.c	2012-06-20 12:16:17.000000000 +0200
@@ -1460,6 +1460,14 @@ int ttm_bo_global_init(struct drm_global
 		ret = -ENOMEM;
 		goto out_no_drp;
 	}
+#ifdef CONFIG_XEN
+	ret = xen_limit_pages_to_max_mfn(glob->dummy_read_page, 0, 32);
+	if (!ret)
+		clear_page(page_address(glob->dummy_read_page));
+	else
+		printk(KERN_WARNING
+		       "Error restricting dummy read page: %d\n", ret);
+#endif
 
 	INIT_LIST_HEAD(&glob->swap_lru);
 	INIT_LIST_HEAD(&glob->device_list);
--- 12.2.orig/drivers/gpu/drm/ttm/ttm_bo_vm.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/gpu/drm/ttm/ttm_bo_vm.c	2012-04-10 16:59:52.000000000 +0200
@@ -171,7 +171,13 @@ static int ttm_bo_vm_fault(struct vm_are
 	if (bo->mem.bus.is_iomem) {
 		vma->vm_page_prot = ttm_io_prot(bo->mem.placement,
 						vma->vm_page_prot);
+#if defined(CONFIG_XEN) && defined(_PAGE_IOMAP)
+		pgprot_val(vma->vm_page_prot) |= _PAGE_IOMAP;
+#endif
 	} else {
+#if defined(CONFIG_XEN) && defined(_PAGE_IOMAP)
+		pgprot_val(vma->vm_page_prot) &= ~_PAGE_IOMAP;
+#endif
 		ttm = bo->ttm;
 		vma->vm_page_prot = (bo->mem.placement & TTM_PL_FLAG_CACHED) ?
 		    vm_get_page_prot(vma->vm_flags) :
--- 12.2.orig/drivers/pci/msi-xen.c	2012-04-04 11:02:16.000000000 +0200
+++ 12.2/drivers/pci/msi-xen.c	2012-04-04 11:03:28.000000000 +0200
@@ -58,22 +58,17 @@ int arch_msi_check_device(struct pci_dev
 }
 #endif
 
-static void __msi_set_enable(struct pci_dev *dev, int pos, int enable)
+static void msi_set_enable(struct pci_dev *dev, int pos, int enable)
 {
 	u16 control;
 
-	if (pos) {
-		pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
-		control &= ~PCI_MSI_FLAGS_ENABLE;
-		if (enable)
-			control |= PCI_MSI_FLAGS_ENABLE;
-		pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
-	}
-}
+	BUG_ON(!pos);
 
-static void msi_set_enable(struct pci_dev *dev, int enable)
-{
-	__msi_set_enable(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), enable);
+	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
+	control &= ~PCI_MSI_FLAGS_ENABLE;
+	if (enable)
+		control |= PCI_MSI_FLAGS_ENABLE;
+	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
 }
 
 static void msix_set_enable(struct pci_dev *dev, int enable)
@@ -309,8 +304,11 @@ void pci_restore_msi_state(struct pci_de
 		return;
 
 	pci_intx_for_msi(dev, 0);
-	if (dev->msi_enabled)
-		msi_set_enable(dev, 0);
+	if (dev->msi_enabled) {
+		int pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+
+		msi_set_enable(dev, pos, 0);
+	}
 	if (dev->msix_enabled)
 		msix_set_enable(dev, 0);
 
@@ -355,9 +353,9 @@ static int msi_capability_init(struct pc
 	int pos, pirq;
 	u16 control;
 
-	msi_set_enable(dev, 0);	/* Ensure msi is disabled as I set it up */
-
 	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+	msi_set_enable(dev, pos, 0);	/* Disable MSI during set up */
+
 	pci_read_config_word(dev, msi_control_reg(pos), &control);
 
 	pirq = msi_map_vector(dev, 0, 0);
@@ -366,7 +364,7 @@ static int msi_capability_init(struct pc
 
 	/* Set MSI enabled bits	 */
 	pci_intx_for_msi(dev, 0);
-	msi_set_enable(dev, 1);
+	msi_set_enable(dev, pos, 1);
 	dev->msi_enabled = 1;
 
 	dev->irq = pirq;
@@ -388,6 +386,7 @@ static int msix_capability_init(struct p
 {
 	u64 table_base;
 	int pirq, i, j, mapped, pos;
+	u16 control;
 	struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
 	struct msi_pirq_entry *pirq_entry;
 
@@ -397,11 +396,24 @@ static int msix_capability_init(struct p
 	msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
 
 	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+	pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
+
+	/* Ensure MSI-X is disabled while it is set up */
+	control &= ~PCI_MSIX_FLAGS_ENABLE;
+	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
+
 	table_base = find_table_base(dev, pos);
 	if (!table_base)
 		return -ENODEV;
 
-	/* MSI-X Table Initialization */
+	/*
+	 * Some devices require MSI-X to be enabled before we can touch the
+	 * MSI-X registers.  We need to mask all the vectors to prevent
+	 * interrupts coming in before they're fully set up.
+	 */
+	control |= PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE;
+	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
+
 	for (i = 0; i < nvec; i++) {
 		mapped = 0;
 		list_for_each_entry(pirq_entry, &dev->msi_list, list) {
@@ -438,10 +450,13 @@ static int msix_capability_init(struct p
 		return avail;
 	}
 
+	/* Set MSI-X enabled bits and unmask the function */
 	pci_intx_for_msi(dev, 0);
-	msix_set_enable(dev, 1);
 	dev->msix_enabled = 1;
 
+	control &= ~PCI_MSIX_FLAGS_MASKALL;
+	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
+
 	return 0;
 }
 
@@ -563,7 +578,7 @@ EXPORT_SYMBOL(pci_enable_msi_block);
 extern void pci_frontend_disable_msi(struct pci_dev* dev);
 void pci_msi_shutdown(struct pci_dev *dev)
 {
-	int pirq;
+	int pirq, pos;
 	struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
 
 	if (!pci_msi_enable || !dev || !dev->msi_enabled)
@@ -585,7 +600,8 @@ void pci_msi_shutdown(struct pci_dev *de
 	msi_unmap_pirq(dev, pirq);
 
 	/* Disable MSI mode */
-	msi_set_enable(dev, 0);
+	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+	msi_set_enable(dev, pos, 0);
 	pci_intx_for_msi(dev, 1);
 	dev->msi_enabled = 0;
 }
@@ -625,8 +641,8 @@ int pci_msix_table_size(struct pci_dev *
  * indicates the successful configuration of MSI-X capability structure
  * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
  * Or a return of > 0 indicates that driver request is exceeding the number
- * of irqs available. Driver should use the returned value to re-send
- * its request.
+ * of irqs or MSI-X vectors available. Driver should use the returned value to
+ * re-send its request.
  **/
 extern int pci_frontend_enable_msix(struct pci_dev *dev,
 		struct msix_entry *entries, int nvec);
@@ -684,7 +700,7 @@ int pci_enable_msix(struct pci_dev* dev,
 
 	nr_entries = pci_msix_table_size(dev);
 	if (nvec > nr_entries)
-		return -EINVAL;
+		return nr_entries;
 
 	/* Check for any invalid entries */
 	for (i = 0; i < nvec; i++) {
--- 12.2.orig/drivers/xen/Kconfig	2011-11-03 12:26:46.000000000 +0100
+++ 12.2/drivers/xen/Kconfig	2012-04-03 13:15:22.000000000 +0200
@@ -305,14 +305,6 @@ config XEN_DISABLE_SERIAL
 	  Disable serial port drivers, allowing the Xen console driver
 	  to provide a serial console at ttyS0.
 
-config XEN_SYSFS
-	tristate "Export Xen attributes in sysfs"
-	depends on SYSFS
-	select SYS_HYPERVISOR
-	default y
-	help
-	  Xen hypervisor attributes will show up under /sys/hypervisor/.
-
 choice
 	prompt "Xen version compatibility"
 	default XEN_COMPAT_030002_AND_LATER
@@ -446,7 +438,8 @@ config XEN_SCRUB_PAGES
 
 config XEN_DEV_EVTCHN
 	tristate "Xen /dev/xen/evtchn device"
-	default y
+	depends on XEN || PARAVIRT_XEN
+	default PARAVIRT_XEN || XEN_PRIVILEGED_GUEST || m
 	help
 	  The evtchn driver allows a userspace process to triger event
 	  channels and to receive notification of an event channel
--- 12.2.orig/drivers/xen/Makefile	2011-11-03 12:20:37.000000000 +0100
+++ 12.2/drivers/xen/Makefile	2011-11-03 12:27:36.000000000 +0100
@@ -5,7 +5,6 @@ xen-balloon-$(CONFIG_PARAVIRT_XEN) := ba
 xen-balloon-$(CONFIG_XEN)	:= balloon/
 obj-$(CONFIG_XEN)		+= core/
 obj-$(CONFIG_XEN)		+= console/
-obj-$(CONFIG_XEN)		+= evtchn/
 obj-y				+= xenbus/
 obj-$(CONFIG_XEN)		+= char/
 
@@ -15,7 +14,9 @@ obj-$(CONFIG_XEN)			+= features.o $(xen-
 obj-$(CONFIG_HOTPLUG_CPU)		+= $(xen-hotplug-y)
 obj-$(CONFIG_XEN_XENCOMM)		+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)		+= $(xen-balloon-y)
+obj-$(CONFIG_XEN_DEV_EVTCHN)		+= evtchn.o
 obj-$(CONFIG_XENFS)			+= xenfs/
+obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
 obj-$(CONFIG_XEN_BLKDEV_TAP)		+= blktap/
 obj-$(filter m,$(CONFIG_XEN_BLKDEV_TAP2)) += blktap2/ blktap2-new/
--- 12.2.orig/drivers/xen/balloon/balloon.c	2012-01-20 14:53:22.000000000 +0100
+++ 12.2/drivers/xen/balloon/balloon.c	2012-06-06 14:02:30.000000000 +0200
@@ -106,8 +106,8 @@ static DECLARE_WORK(balloon_worker, ball
 
 /* When ballooning out (allocating memory to return to Xen) we don't really 
    want the kernel to try too hard since that can trigger the oom killer. */
-#define GFP_BALLOON \
-	(GFP_HIGHUSER|__GFP_NOWARN|__GFP_NORETRY|__GFP_NOMEMALLOC|__GFP_COLD)
+#define GFP_BALLOON (GFP_HIGHUSER|__GFP_NOWARN|__GFP_NORETRY|__GFP_NOMEMALLOC|\
+		     __GFP_NOTRACK|__GFP_COLD)
 
 #define PAGE_TO_LIST(p) (&(p)->lru)
 #define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
@@ -314,7 +314,7 @@ static int increase_reservation(unsigned
 	balloon_unlock(flags);
 
 #ifndef MODULE
-	setup_per_zone_pages_min();
+	setup_per_zone_wmarks();
 	if (rc > 0)
 		kswapd_run(0);
 	if (need_zonelists_rebuild)
@@ -672,7 +672,7 @@ struct page **alloc_empty_pages_and_page
 		}
 		balloon_unlock(flags);
 
-		page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_COLD);
+		page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_NOTRACK|__GFP_COLD);
 		if (page == NULL)
 			goto err;
 
--- 12.2.orig/drivers/xen/blkback/blkback.c	2012-03-26 12:22:21.000000000 +0200
+++ 12.2/drivers/xen/blkback/blkback.c	2012-04-04 10:24:14.000000000 +0200
@@ -511,7 +511,7 @@ static void dispatch_rw_block_io(blkif_t
 
 	for (i = 0; i < nseg; i++) {
 		if (((int)preq.sector_number|(int)seg[i].nsec) &
-		    ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
+		    ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
 			DPRINTK("Misaligned I/O request from domain %d",
 				blkif->domid);
 			goto fail_put_bio;
--- 12.2.orig/drivers/xen/blkback/vbd.c	2011-06-30 16:48:21.000000000 +0200
+++ 12.2/drivers/xen/blkback/vbd.c	2011-02-01 14:50:44.000000000 +0100
@@ -47,7 +47,7 @@ unsigned int vbd_info(struct vbd *vbd)
 
 unsigned long vbd_secsize(struct vbd *vbd)
 {
-	return bdev_hardsect_size(vbd->bdev);
+	return bdev_logical_block_size(vbd->bdev);
 }
 
 int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major,
--- 12.2.orig/drivers/xen/blkback/xenbus.c	2012-04-04 10:19:57.000000000 +0200
+++ 12.2/drivers/xen/blkback/xenbus.c	2012-03-22 14:09:45.000000000 +0100
@@ -108,7 +108,7 @@ static void update_blkif_status(blkif_t 
 		if (!get_device(_dev))					\
 			return ret;					\
 		dev = to_xenbus_device(_dev);				\
-		if ((be = dev->dev.driver_data) != NULL)		\
+		if ((be = dev_get_drvdata(&dev->dev)) != NULL)		\
 			ret = sprintf(buf, format, ##args);		\
 		put_device(_dev);					\
 		return ret;						\
@@ -173,7 +173,7 @@ void xenvbd_sysfs_delif(struct xenbus_de
 
 static int blkback_remove(struct xenbus_device *dev)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
 	DPRINTK("");
 
@@ -194,7 +194,7 @@ static int blkback_remove(struct xenbus_
 	}
 
 	kfree(be);
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 	return 0;
 }
 
@@ -226,7 +226,7 @@ static int blkback_probe(struct xenbus_d
 		return -ENOMEM;
 	}
 	be->dev = dev;
-	dev->dev.driver_data = be;
+	dev_set_drvdata(&dev->dev, be);
 
 	be->blkif = blkif_alloc(dev->otherend_id);
 	if (IS_ERR(be->blkif)) {
@@ -348,7 +348,7 @@ static void backend_changed(struct xenbu
 static void frontend_changed(struct xenbus_device *dev,
 			     enum xenbus_state frontend_state)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 	int err;
 
 	DPRINTK("%s", xenbus_strstate(frontend_state));
--- 12.2.orig/drivers/xen/blkfront/blkfront.c	2012-06-12 15:18:11.000000000 +0200
+++ 12.2/drivers/xen/blkfront/blkfront.c	2012-06-12 15:19:18.000000000 +0200
@@ -120,12 +120,12 @@ static int blkfront_probe(struct xenbus_
 
 	/* Front end dir is a number, which is used as the id. */
 	info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
-	dev->dev.driver_data = info;
+	dev_set_drvdata(&dev->dev, info);
 
 	err = talk_to_backend(dev, info);
 	if (err) {
 		kfree(info);
-		dev->dev.driver_data = NULL;
+		dev_set_drvdata(&dev->dev, NULL);
 		return err;
 	}
 
@@ -141,7 +141,7 @@ static int blkfront_probe(struct xenbus_
  */
 static int blkfront_resume(struct xenbus_device *dev)
 {
-	struct blkfront_info *info = dev->dev.driver_data;
+	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 	int err;
 
 	DPRINTK("blkfront_resume: %s\n", dev->nodename);
@@ -266,7 +266,7 @@ fail:
 static void backend_changed(struct xenbus_device *dev,
 			    enum xenbus_state backend_state)
 {
-	struct blkfront_info *info = dev->dev.driver_data;
+	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 	struct block_device *bd;
 
 	DPRINTK("blkfront:backend_changed.\n");
@@ -439,7 +439,7 @@ static void blkfront_closing(struct blkf
 
 static int blkfront_remove(struct xenbus_device *dev)
 {
-	struct blkfront_info *info = dev->dev.driver_data;
+	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 	struct block_device *bd;
 	struct gendisk *disk;
 
@@ -773,7 +773,7 @@ static int blkif_queue_request(struct re
 	info->shadow[id].request = (unsigned long)req;
 
 	ring_req->id = id;
-	ring_req->sector_number = (blkif_sector_t)req->sector;
+	ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
 	ring_req->handle = info->handle;
 
 	ring_req->operation = rq_data_dir(req) ?
@@ -829,31 +829,27 @@ void do_blkif_request(struct request_que
 
 	queued = 0;
 
-	while ((req = elv_next_request(rq)) != NULL) {
+	while ((req = blk_peek_request(rq)) != NULL) {
 		info = req->rq_disk->private_data;
-		if (!blk_fs_request(req)) {
-			if (blk_pc_request(req)) {
-				req->errors = (DID_ERROR << 16)
-					      | (DRIVER_INVALID << 24);
-				req->hard_cur_sectors = (req->data_len
-							 + 511) >> 9;
-			}
-			end_request(req, 0);
-			continue;
-		}
 
 		if (RING_FULL(&info->ring))
 			goto wait;
 
-		DPRINTK("do_blk_req %p: cmd %p, sec %llx, "
-			"(%u/%li) buffer:%p [%s]\n",
-			req, req->cmd, (long long)req->sector,
-			req->current_nr_sectors,
-			req->nr_sectors, req->buffer,
-			rq_data_dir(req) ? "write" : "read");
+		blk_start_request(req);
 
+		if (!blk_fs_request(req)) {
+			req->errors = (DID_ERROR << 16) |
+				      (DRIVER_INVALID << 24);
+			__blk_end_request_all(req, -EIO);
+			continue;
+		}
+
+		DPRINTK("do_blk_req %p: cmd %p, sec %llx, "
+			"(%u/%u) buffer:%p [%s]\n",
+			req, req->cmd, (long long)blk_rq_pos(req),
+			blk_rq_cur_sectors(req), blk_rq_sectors(req),
+			req->buffer, rq_data_dir(req) ? "write" : "read");
 
-		blkdev_dequeue_request(req);
 		if (blkif_queue_request(req)) {
 			blk_requeue_request(rq, req);
 		wait:
@@ -938,8 +934,7 @@ static irqreturn_t blkif_int(int irq, vo
 					op_name(bret->operation),
 					bret->status);
 
-			ret = __blk_end_request(req, ret, blk_rq_bytes(req));
-			BUG_ON(ret);
+			__blk_end_request_all(req, ret);
 			break;
 		default:
 			BUG();
@@ -1069,7 +1064,7 @@ static int blkif_recover(struct blkfront
 
 int blkfront_is_ready(struct xenbus_device *dev)
 {
-	struct blkfront_info *info = dev->dev.driver_data;
+	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 
 	return info->is_ready && info->xbdev;
 }
--- 12.2.orig/drivers/xen/blkfront/vbd.c	2012-03-12 16:14:02.000000000 +0100
+++ 12.2/drivers/xen/blkfront/vbd.c	2012-03-12 16:15:59.000000000 +0100
@@ -368,7 +368,7 @@ xlvbd_init_blk_queue(struct gendisk *gd,
 #endif
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
-	blk_queue_hardsect_size(rq, sector_size);
+	blk_queue_logical_block_size(rq, sector_size);
 	blk_queue_max_sectors(rq, 512);
 
 	/* Each segment in a request is up to an aligned page in size. */
@@ -551,7 +551,7 @@ static ssize_t show_media(struct device 
 		                  struct device_attribute *attr, char *buf)
 {
 	struct xenbus_device *xendev = to_xenbus_device(dev);
-	struct blkfront_info *info = xendev->dev.driver_data;
+	struct blkfront_info *info = dev_get_drvdata(&xendev->dev);
 
 	if (info->gd->flags & GENHD_FL_CD)
 		return sprintf(buf, "cdrom\n");
--- 12.2.orig/drivers/xen/blktap/blktap.c	2012-05-23 13:33:28.000000000 +0200
+++ 12.2/drivers/xen/blktap/blktap.c	2012-05-23 13:34:28.000000000 +0200
@@ -276,6 +276,15 @@ static inline unsigned int OFFSET_TO_SEG
     } while(0)
 
 
+static char *blktap_nodename(struct device *dev)
+{
+	return kasprintf(GFP_KERNEL, "xen/blktap%u", MINOR(dev->devt));
+}
+
+static struct device_type blktap_type = {
+	.nodename = blktap_nodename
+};
+
 /******************************************************************
  * BLKTAP VM OPS
  */
@@ -442,7 +451,6 @@ static const struct file_operations blkt
 
 static tap_blkif_t *get_next_free_dev(void)
 {
-	struct class *class;
 	tap_blkif_t *info;
 	int minor;
 
@@ -506,9 +514,9 @@ found:
 		wmb();
 		tapfds[minor] = info;
 
-		if ((class = get_xen_class()) != NULL)
-			device_create(class, NULL, MKDEV(blktap_major, minor),
-				      NULL, "blktap%d", minor);
+		xen_class_device_create(&blktap_type, NULL,
+					MKDEV(blktap_major, minor),
+					NULL, "blktap%d", minor);
 	}
 
 out:
@@ -551,7 +559,8 @@ void signal_tapdisk(int idx) 
 		return;
 
 	if (info->pid > 0) {
-		ptask = find_task_by_pid_ns(info->pid, info->pid_ns);
+		ptask = pid_task(find_pid_ns(info->pid, info->pid_ns),
+				 PIDTYPE_PID);
 		if (ptask)
 			info->status = CLEANSHUTDOWN;
 	}
@@ -1701,7 +1710,6 @@ static void make_response(blkif_t *blkif
 static int __init blkif_init(void)
 {
 	int i, ret;
-	struct class *class;
 
 	if (!is_running_on_xen())
 		return -ENODEV;
@@ -1737,7 +1745,7 @@ static int __init blkif_init(void)
 	DPRINTK("Created misc_dev %d:0 [/dev/xen/blktap0]\n", ret);
 
 	/* Make sure the xen class exists */
-	if ((class = get_xen_class()) != NULL) {
+	if (get_xen_class()) {
 		/*
 		 * This will allow udev to create the blktap ctrl device.
 		 * We only want to create blktap0 first.  We don't want
@@ -1745,8 +1753,9 @@ static int __init blkif_init(void)
 		 * We only create the device when a request of a new device is
 		 * made.
 		 */
-		device_create(class, NULL, MKDEV(blktap_major, 0), NULL,
-			      "blktap0");
+		xen_class_device_create(&blktap_type, NULL,
+					MKDEV(blktap_major, 0), NULL,
+					"blktap0");
 	} else {
 		/* this is bad, but not fatal */
 		WPRINTK("sysfs xen_class not created\n");
--- 12.2.orig/drivers/xen/blktap/xenbus.c	2012-04-04 10:19:53.000000000 +0200
+++ 12.2/drivers/xen/blktap/xenbus.c	2012-02-16 13:29:26.000000000 +0100
@@ -126,7 +126,7 @@ static int blktap_name(blkif_t *blkif, c
 		if (!get_device(_dev))					\
 			return ret;					\
 		dev = to_xenbus_device(_dev);				\
-		if ((be = dev->dev.driver_data) != NULL)		\
+		if ((be = dev_get_drvdata(&dev->dev)) != NULL)		\
 			ret = sprintf(buf, format, ##args);		\
 		put_device(_dev);					\
 		return ret;						\
@@ -156,7 +156,7 @@ static const struct attribute_group taps
 int xentap_sysfs_addif(struct xenbus_device *dev)
 {
 	int err;
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 	err = sysfs_create_group(&dev->dev.kobj, &tapstat_group);
 	if (!err)
 		be->group_added = 1;
@@ -165,14 +165,14 @@ int xentap_sysfs_addif(struct xenbus_dev
 
 void xentap_sysfs_delif(struct xenbus_device *dev)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 	sysfs_remove_group(&dev->dev.kobj, &tapstat_group);
 	be->group_added = 0;
 }
 
 static int blktap_remove(struct xenbus_device *dev)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
 	if (be->group_added)
 		xentap_sysfs_delif(be->dev);
@@ -190,7 +190,7 @@ static int blktap_remove(struct xenbus_d
 		be->blkif = NULL;
 	}
 	kfree(be);
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 	return 0;
 }
 
@@ -259,7 +259,7 @@ static int blktap_probe(struct xenbus_de
 	}
 
 	be->dev = dev;
-	dev->dev.driver_data = be;
+	dev_set_drvdata(&dev->dev, be);
 	be->xenbus_id = get_id(dev->nodename);
 
 	be->blkif = tap_alloc_blkif(dev->otherend_id);
@@ -346,7 +346,7 @@ static void blkif_disconnect(blkif_t *bl
 static void tap_frontend_changed(struct xenbus_device *dev,
 			     enum xenbus_state frontend_state)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 	int err;
 
 	DPRINTK("fe_changed(%s,%d)\n", dev->nodename, frontend_state);
--- 12.2.orig/drivers/xen/blktap2/blktap.h	2012-06-06 13:58:26.000000000 +0200
+++ 12.2/drivers/xen/blktap2/blktap.h	2012-06-06 14:02:20.000000000 +0200
@@ -26,6 +26,8 @@ extern int blktap_debug_level;
 #define BTWARN(_f, _a...)            BTPRINTK(0, KERN_WARNING, 0, _f, ##_a)
 #define BTERR(_f, _a...)             BTPRINTK(0, KERN_ERR, 0, _f, ##_a)
 
+#define BLKTAP2_DEV_DIR "xen/blktap-2/"
+
 #define MAX_BLKTAP_DEVICE            256
 
 #define BLKTAP_CONTROL               1
--- 12.2.orig/drivers/xen/blktap2/control.c	2011-01-31 17:56:27.000000000 +0100
+++ 12.2/drivers/xen/blktap2/control.c	2011-02-01 14:50:44.000000000 +0100
@@ -154,6 +154,7 @@ static const struct file_operations blkt
 static struct miscdevice blktap_misc = {
 	.minor    = MISC_DYNAMIC_MINOR,
 	.name     = "blktap-control",
+	.devnode  = BLKTAP2_DEV_DIR "control",
 	.fops     = &blktap_control_file_operations,
 };
 
--- 12.2.orig/drivers/xen/blktap2/device.c	2012-02-16 12:35:01.000000000 +0100
+++ 12.2/drivers/xen/blktap2/device.c	2012-02-16 13:29:12.000000000 +0100
@@ -184,13 +184,6 @@ flush_tlb_kernel_page(unsigned long kvad
 #endif
 }
 
-static void
-blktap_device_end_dequeued_request(struct request *req, int ret)
-{
-	if (blk_end_request(req, ret, blk_rq_bytes(req)))
-		BUG();
-}
-
 /*
  * tap->tap_sem held on entry
  */
@@ -356,7 +349,7 @@ blktap_device_fail_pending_requests(stru
 
 		blktap_unmap(tap, request);
 		req = (struct request *)(unsigned long)request->id;
-		blktap_device_end_dequeued_request(req, -ENODEV);
+		blk_end_request_all(req, -ENODEV);
 		blktap_request_free(tap, request);
 	}
 
@@ -395,7 +388,7 @@ blktap_device_finish_request(struct blkt
 		if (unlikely(res->status != BLKIF_RSP_OKAY))
 			BTERR("Bad return from device data "
 				"request: %x\n", res->status);
-		blktap_device_end_dequeued_request(req,
+		blk_end_request_all(req,
 			res->status == BLKIF_RSP_OKAY ? 0 : -EIO);
 		break;
 	default:
@@ -625,7 +618,7 @@ blktap_device_process_request(struct blk
 	ring    = &tap->ring;
 	usr_idx = request->usr_idx;
 	blkif_req.id = usr_idx;
-	blkif_req.sector_number = (blkif_sector_t)req->sector;
+	blkif_req.sector_number = (blkif_sector_t)blk_rq_pos(req);
 	blkif_req.handle = 0;
 	blkif_req.operation = rq_data_dir(req) ?
 		BLKIF_OP_WRITE : BLKIF_OP_READ;
@@ -822,26 +815,24 @@ blktap_device_run_queue(struct blktap *t
 
 	BTDBG("running queue for %d\n", tap->minor);
 
-	while ((req = elv_next_request(rq)) != NULL) {
+	while ((req = blk_peek_request(rq)) != NULL) {
 		if (!blk_fs_request(req)) {
-			if (blk_pc_request(req)) {
-				req->errors = (DID_ERROR << 16)
-					      | (DRIVER_INVALID << 24);
-				req->hard_cur_sectors = (req->data_len
-							 + 511) >> 9;
-			}
-			end_request(req, 0);
+			blk_start_request(req);
+			req->errors = (DID_ERROR << 16) |
+				      (DRIVER_INVALID << 24);
+			__blk_end_request_all(req, -EIO);
 			continue;
 		}
 
 		if (blk_barrier_rq(req)) {
-			end_request(req, 0);
+			blk_start_request(req);
+			__blk_end_request_all(req, -EOPNOTSUPP);
 			continue;
 		}
 
 #ifdef ENABLE_PASSTHROUGH
 		if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
-			blkdev_dequeue_request(req);
+			blk_start_request(req);
 			blktap_device_forward_request(tap, req);
 			continue;
 		}
@@ -861,13 +852,13 @@ blktap_device_run_queue(struct blktap *t
 			goto wait;
 		}
 
-		BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%lx) "
+		BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%x) "
 		      "buffer:%p [%s], pending: %p\n", req, tap->minor,
-		      req->cmd, (unsigned long long)req->sector,
-		      req->current_nr_sectors, req->nr_sectors, req->buffer,
+		      req->cmd, (unsigned long long)blk_rq_pos(req),
+		      blk_rq_cur_sectors(req), blk_rq_sectors(req), req->buffer,
 		      rq_data_dir(req) ? "write" : "read", request);
 
-		blkdev_dequeue_request(req);
+		blk_start_request(req);
 
 		spin_unlock_irq(&dev->lock);
 		down_write(&tap->tap_sem);
@@ -876,7 +867,7 @@ blktap_device_run_queue(struct blktap *t
 		if (!err)
 			queued++;
 		else {
-			blktap_device_end_dequeued_request(req, err);
+			blk_end_request_all(req, err);
 			blktap_request_free(tap, request);
 		}
 
@@ -916,18 +907,16 @@ blktap_device_do_request(struct request_
 	return;
 
 fail:
-	while ((req = elv_next_request(rq))) {
+	while ((req = blk_fetch_request(rq))) {
 		if (blk_fs_request(req)) {
-			unsigned long long sec = req->sector;
+			unsigned long long sec = blk_rq_pos(req);
 
 			BTERR("device closed: failing secs %#Lx-%#Lx\n",
-			      sec, sec + req->nr_sectors - 1);
-		} else if (blk_pc_request(req)) {
+			      sec, sec + blk_rq_sectors(req) - 1);
+		} else
 			req->errors = (DID_ERROR << 16)
 				      | (DRIVER_INVALID << 24);
-			req->hard_cur_sectors = (req->data_len + 511) >> 9;
-		}
-		end_request(req, 0);
+		__blk_end_request_all(req, -EIO);
 	}
 }
 
@@ -982,7 +971,7 @@ blktap_device_configure(struct blktap *t
 	set_capacity(dev->gd, tap->params.capacity);
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
-	blk_queue_hardsect_size(rq, tap->params.sector_size);
+	blk_queue_logical_block_size(rq, tap->params.sector_size);
 	blk_queue_max_sectors(rq, 512);
 
 	/* Each segment in a request is up to an aligned page in size. */
@@ -1080,6 +1069,12 @@ blktap_device_destroy(struct blktap *tap
 	return 0;
 }
 
+static char *blktap_nodename(struct gendisk *gd)
+{
+	return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "tapdev%u",
+			 gd->first_minor);
+}
+
 int
 blktap_device_create(struct blktap *tap)
 {
@@ -1116,6 +1111,7 @@ blktap_device_create(struct blktap *tap)
 
 	gd->major = blktap_device_major;
 	gd->first_minor = minor;
+	gd->nodename = blktap_nodename;
 	gd->fops = &blktap_device_file_operations;
 	gd->private_data = dev;
 
--- 12.2.orig/drivers/xen/blktap2/sysfs.c	2011-02-01 14:38:38.000000000 +0100
+++ 12.2/drivers/xen/blktap2/sysfs.c	2011-02-01 14:50:44.000000000 +0100
@@ -436,6 +436,12 @@ blktap_sysfs_free(void)
 	class_destroy(class);
 }
 
+static char *blktap_nodename(struct device *dev)
+{
+	return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "blktap%u",
+			 MINOR(dev->devt));
+}
+
 int __init
 blktap_sysfs_init(void)
 {
@@ -449,6 +455,8 @@ blktap_sysfs_init(void)
 	if (IS_ERR(cls))
 		return PTR_ERR(cls);
 
+	cls->nodename = blktap_nodename;
+
 	err = class_create_file(cls, &class_attr_verbosity);
 	if (!err) {
 		err = class_create_file(cls, &class_attr_devices);
--- 12.2.orig/drivers/xen/console/console.c	2012-03-22 14:04:26.000000000 +0100
+++ 12.2/drivers/xen/console/console.c	2012-03-22 14:09:37.000000000 +0100
@@ -44,7 +44,6 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/console.h>
-#include <linux/bootmem.h>
 #include <linux/sysrq.h>
 #include <linux/screen_info.h>
 #include <linux/vt.h>
@@ -228,7 +227,7 @@ static int __init xen_console_init(void)
 		goto out;
 	}
 
-	wbuf = alloc_bootmem(wbuf_size);
+	wbuf = kmalloc(wbuf_size, GFP_KERNEL);
 
 	register_console(&kcons_info);
 
@@ -624,8 +623,8 @@ static void xencons_close(struct tty_str
 	tty->closing = 1;
 	tty_wait_until_sent(tty, 0);
 	tty_driver_flush_buffer(tty);
-	if (tty->ldisc.ops->flush_buffer != NULL)
-		tty->ldisc.ops->flush_buffer(tty);
+	if (tty->ldisc->ops->flush_buffer)
+		tty->ldisc->ops->flush_buffer(tty);
 	tty->closing = 0;
 	spin_lock_irqsave(&xencons_lock, flags);
 	xencons_tty = NULL;
--- 12.2.orig/drivers/xen/core/Makefile	2011-11-03 12:26:50.000000000 +0100
+++ 12.2/drivers/xen/core/Makefile	2012-02-17 14:28:52.000000000 +0100
@@ -7,9 +7,7 @@ obj-y := evtchn.o gnttab.o reboot.o mach
 obj-$(CONFIG_PCI)		+= pci.o
 obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += firmware.o
 obj-$(CONFIG_PROC_FS)		+= xen_proc.o
-obj-$(CONFIG_SYS_HYPERVISOR)	+= hypervisor_sysfs.o
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
-obj-$(CONFIG_XEN_SYSFS)		+= xen_sysfs.o
 obj-$(CONFIG_XEN_SMPBOOT)	+= smpboot.o
 obj-$(CONFIG_SMP)		+= spinlock.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o
--- 12.2.orig/drivers/xen/core/evtchn.c	2012-06-06 14:01:39.000000000 +0200
+++ 12.2/drivers/xen/core/evtchn.c	2012-06-06 14:02:11.000000000 +0200
@@ -35,7 +35,6 @@
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
-#include <linux/bootmem.h>
 #include <linux/ftrace.h>
 #include <linux/version.h>
 #include <asm/atomic.h>
@@ -137,6 +136,12 @@ static inline unsigned int type_from_irq
 	return cfg ? cfg->info >> (32 - _IRQT_BITS) : IRQT_UNBOUND;
 }
 
+unsigned int irq_from_evtchn(unsigned int port)
+{
+	return evtchn_to_irq[port];
+}
+EXPORT_SYMBOL_GPL(irq_from_evtchn);
+
 /* IRQ <-> VIRQ mapping. */
 DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
 
@@ -331,6 +336,8 @@ asmlinkage void __irq_entry evtchn_do_up
 			}
 
 			do {
+				bool handled = false;
+
 				masked_l2 = l2 & ((~0UL) << l2i);
 				if (masked_l2 == 0)
 					break;
@@ -341,13 +348,12 @@ asmlinkage void __irq_entry evtchn_do_up
 				mask_evtchn(port);
 				if ((irq = evtchn_to_irq[port]) != -1) {
 					clear_evtchn(port);
-					if (!handle_irq(irq, regs)
-					    && printk_ratelimit())
-						pr_emerg("No handler for "
-						         "irq %d (port %u)\n",
-						         irq, port);
-				} else
-					evtchn_device_upcall(port);
+					handled = handle_irq(irq, regs);
+				}
+				if (!handled && printk_ratelimit())
+					pr_emerg("No handler for irq %d"
+						 " (port %u)\n",
+						 irq, port);
 
 				l2i = (l2i + 1) % BITS_PER_LONG;
 
@@ -376,16 +382,26 @@ asmlinkage void __irq_entry evtchn_do_up
 	set_irq_regs(old_regs);
 }
 
-static int find_unbound_irq(unsigned int cpu, struct irq_chip *chip)
+static int find_unbound_irq(unsigned int node, struct irq_chip *chip)
 {
 	static int warned;
 	int irq;
 
 	for (irq = DYNIRQ_BASE; irq < nr_irqs; irq++) {
-		struct irq_desc *desc = irq_to_desc_alloc_cpu(irq, cpu);
-		struct irq_cfg *cfg = desc->chip_data;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
 
-		if (!cfg->bindcount) {
+		desc = irq_to_desc(irq);
+		if (!desc)
+			desc = irq_to_desc_alloc_node(irq, node);
+		else if (desc->chip != &no_irq_chip &&
+			 desc->chip != &dynirq_chip)
+			continue;
+		if (!desc)
+			return -ENOMEM;
+
+		cfg = desc->chip_data;
+		if (cfg && !cfg->bindcount) {
 			desc->status |= IRQ_NOPROBE;
 			set_irq_chip_and_handler_name(irq, chip,
 						      handle_fasteoi_irq,
@@ -412,7 +428,7 @@ static int bind_caller_port_to_irq(unsig
 	spin_lock(&irq_mapping_update_lock);
 
 	if ((irq = evtchn_to_irq[caller_port]) == -1) {
-		if ((irq = find_unbound_irq(smp_processor_id(), &dynirq_chip)) < 0)
+		if ((irq = find_unbound_irq(numa_node_id(), &dynirq_chip)) < 0)
 			goto out;
 
 		evtchn_to_irq[caller_port] = irq;
@@ -435,9 +451,8 @@ static int bind_local_port_to_irq(unsign
 
 	BUG_ON(evtchn_to_irq[local_port] != -1);
 
-	if ((irq = find_unbound_irq(smp_processor_id(), &dynirq_chip)) < 0) {
-		struct evtchn_close close = { .port = local_port };
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
+	if ((irq = find_unbound_irq(numa_node_id(), &dynirq_chip)) < 0) {
+		if (close_evtchn(local_port))
 			BUG();
 		goto out;
 	}
@@ -488,7 +503,8 @@ static int bind_virq_to_irq(unsigned int
 	spin_lock(&irq_mapping_update_lock);
 
 	if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) {
-		if ((irq = find_unbound_irq(cpu, &dynirq_chip)) < 0)
+		if ((irq = find_unbound_irq(cpu_to_node(cpu),
+					    &dynirq_chip)) < 0)
 			goto out;
 
 		bind_virq.virq = virq;
@@ -521,7 +537,8 @@ static int bind_ipi_to_irq(unsigned int 
 	spin_lock(&irq_mapping_update_lock);
 
 	if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) {
-		if ((irq = find_unbound_irq(cpu, &dynirq_chip)) < 0)
+		if ((irq = find_unbound_irq(cpu_to_node(cpu),
+					    &dynirq_chip)) < 0)
 			goto out;
 
 		bind_ipi.vcpu = cpu;
@@ -547,16 +564,14 @@ static int bind_ipi_to_irq(unsigned int 
 
 static void unbind_from_irq(unsigned int irq)
 {
-	struct evtchn_close close;
 	unsigned int cpu;
 	int evtchn = evtchn_from_irq(irq);
 
 	spin_lock(&irq_mapping_update_lock);
 
 	if (!--irq_cfg(irq)->bindcount && VALID_EVTCHN(evtchn)) {
-		close.port = evtchn;
 		if ((type_from_irq(irq) != IRQT_CALLER_PORT) &&
-		    HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
+		    close_evtchn(evtchn))
 			BUG();
 
 		switch (type_from_irq(irq)) {
@@ -717,29 +732,25 @@ void unbind_from_irqhandler(unsigned int
 EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
 
 #ifdef CONFIG_SMP
-void rebind_evtchn_to_cpu(int port, unsigned int cpu)
+static int set_affinity_irq(unsigned int irq, const struct cpumask *dest)
 {
+	unsigned int port = evtchn_from_irq(irq);
+	unsigned int cpu = cpumask_any(dest);
 	struct evtchn_bind_vcpu ebv = { .port = port, .vcpu = cpu };
-	int masked;
+	bool masked;
+	int rc;
+
+	if (!VALID_EVTCHN(port))
+		return -ENXIO;
 
 	masked = test_and_set_evtchn_mask(port);
-	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &ebv) == 0)
+	rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &ebv);
+	if (rc == 0)
 		bind_evtchn_to_cpu(port, cpu);
 	if (!masked)
 		unmask_evtchn(port);
-}
 
-static void rebind_irq_to_cpu(unsigned int irq, unsigned int tcpu)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		rebind_evtchn_to_cpu(evtchn, tcpu);
-}
-
-static void set_affinity_irq(unsigned int irq, const struct cpumask *dest)
-{
-	rebind_irq_to_cpu(irq, cpumask_any(dest));
+	return rc;
 }
 #endif
 
@@ -913,7 +924,6 @@ static unsigned int startup_pirq(unsigne
 
 static void shutdown_pirq(unsigned int irq)
 {
-	struct evtchn_close close;
 	int evtchn = evtchn_from_irq(irq);
 
 	if (!VALID_EVTCHN(evtchn))
@@ -921,8 +931,7 @@ static void shutdown_pirq(unsigned int i
 
 	mask_evtchn(evtchn);
 
-	close.port = evtchn;
-	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+	if (close_evtchn(evtchn))
 		BUG();
 
 	bind_evtchn_to_cpu(evtchn, 0);
@@ -1263,7 +1272,7 @@ int evtchn_map_pirq(int irq, int xen_pir
 	if (irq < 0) {
 #ifdef CONFIG_SPARSE_IRQ
 		spin_lock(&irq_mapping_update_lock);
-		irq = find_unbound_irq(smp_processor_id(), &pirq_chip);
+		irq = find_unbound_irq(numa_node_id(), &pirq_chip);
 		if (irq >= 0) {
 			struct irq_desc *desc;
 			struct irq_cfg *cfg;
@@ -1291,7 +1300,7 @@ int evtchn_map_pirq(int irq, int xen_pir
 
 			if (identity_mapped_irq(irq))
 				continue;
-			desc = irq_to_desc_alloc_cpu(irq, smp_processor_id());
+			desc = irq_to_desc_alloc_node(irq, numa_node_id());
 			cfg = desc->chip_data;
 			if (!index_from_irq(irq)) {
 				BUG_ON(type_from_irq(irq) != IRQT_UNBOUND);
@@ -1351,8 +1360,9 @@ void __init xen_init_IRQ(void)
 #else
 	i = nr_pirqs;
 #endif
-	pirq_needs_eoi = alloc_bootmem_pages(sizeof(unsigned long)
-		* BITS_TO_LONGS(ALIGN(i, PAGE_SIZE * 8)));
+	i = get_order(sizeof(unsigned long) * BITS_TO_LONGS(i));
+	pirq_needs_eoi = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, i);
+	BUILD_BUG_ON(NR_PIRQS > PAGE_SIZE * 8);
  	eoi_gmfn.gmfn = virt_to_machine(pirq_needs_eoi) >> PAGE_SHIFT;
 	if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v1, &eoi_gmfn) == 0)
 		pirq_eoi_does_unmask = true;
--- 12.2.orig/drivers/xen/core/smpboot.c	2012-03-22 16:21:28.000000000 +0100
+++ 12.2/drivers/xen/core/smpboot.c	2012-03-22 16:21:46.000000000 +0100
@@ -38,9 +38,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 static DEFINE_PER_CPU(int, resched_irq);
 static DEFINE_PER_CPU(int, callfunc_irq);
 static DEFINE_PER_CPU(int, call1func_irq);
+static DEFINE_PER_CPU(int, reboot_irq);
 static char resched_name[NR_CPUS][15];
 static char callfunc_name[NR_CPUS][15];
 static char call1func_name[NR_CPUS][15];
+static char reboot_name[NR_CPUS][15];
 
 void __init prefill_possible_map(void)
 {
@@ -72,7 +74,7 @@ static int __cpuinit xen_smp_intr_init(u
 	int rc;
 
 	per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) =
-		per_cpu(call1func_irq, cpu) = -1;
+		per_cpu(call1func_irq, cpu) = per_cpu(reboot_irq, cpu) = -1;
 
 	sprintf(resched_name[cpu], "resched%u", cpu);
 	rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
@@ -107,6 +109,17 @@ static int __cpuinit xen_smp_intr_init(u
 		goto fail;
 	per_cpu(call1func_irq, cpu) = rc;
 
+	sprintf(reboot_name[cpu], "reboot%u", cpu);
+	rc = bind_ipi_to_irqhandler(REBOOT_VECTOR,
+				    cpu,
+				    smp_reboot_interrupt,
+				    IRQF_DISABLED|IRQF_NOBALANCING,
+				    reboot_name[cpu],
+				    NULL);
+	if (rc < 0)
+		goto fail;
+	per_cpu(reboot_irq, cpu) = rc;
+
 	rc = xen_spinlock_init(cpu);
 	if (rc < 0)
 		goto fail;
@@ -123,6 +136,8 @@ static int __cpuinit xen_smp_intr_init(u
 		unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
 	if (per_cpu(call1func_irq, cpu) >= 0)
 		unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
+	if (per_cpu(reboot_irq, cpu) >= 0)
+		unbind_from_irqhandler(per_cpu(reboot_irq, cpu), NULL);
 	xen_spinlock_cleanup(cpu);
 	return rc;
 }
@@ -135,6 +150,7 @@ static void __cpuinit xen_smp_intr_exit(
 	unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
 	unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
 	unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
+	unbind_from_irqhandler(per_cpu(reboot_irq, cpu), NULL);
 	xen_spinlock_cleanup(cpu);
 }
 
--- 12.2.orig/drivers/xen/evtchn.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/xen/evtchn.c	2011-04-13 13:55:08.000000000 +0200
@@ -48,10 +48,17 @@
 #include <linux/mutex.h>
 #include <linux/cpu.h>
 
+#ifdef CONFIG_PARAVIRT_XEN
 #include <xen/xen.h>
 #include <xen/events.h>
 #include <xen/evtchn.h>
 #include <asm/xen/hypervisor.h>
+#else
+#include <xen/evtchn.h>
+#include <xen/public/evtchn.h>
+#define xen_domain() is_running_on_xen()
+#define bind_evtchn_to_irqhandler bind_caller_port_to_irqhandler
+#endif
 
 struct per_user_data {
 	struct mutex bind_mutex; /* serialize bind/unbind operations */
@@ -278,6 +285,9 @@ static void evtchn_unbind_from_user(stru
 	int irq = irq_from_evtchn(port);
 
 	unbind_from_irqhandler(irq, (void *)(unsigned long)port);
+#ifdef CONFIG_XEN
+	WARN_ON(close_evtchn(port));
+#endif
 
 	set_port_user(port, NULL);
 }
@@ -450,7 +460,8 @@ static int evtchn_open(struct inode *ino
 	if (u == NULL)
 		return -ENOMEM;
 
-	u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm);
+	u->name = kasprintf(GFP_KERNEL, "evtchn:%s[%d]",
+			    current->comm, current->pid);
 	if (u->name == NULL) {
 		kfree(u);
 		return -ENOMEM;
@@ -519,6 +530,7 @@ static const struct file_operations evtc
 static struct miscdevice evtchn_miscdev = {
 	.minor        = MISC_DYNAMIC_MINOR,
 	.name         = "xen/evtchn",
+	.devnode      = "xen/evtchn",
 	.fops         = &evtchn_fops,
 };
 static int __init evtchn_init(void)
@@ -534,10 +546,10 @@ static int __init evtchn_init(void)
 
 	spin_lock_init(&port_user_lock);
 
-	/* Create '/dev/misc/evtchn'. */
+	/* Create '/dev/xen/evtchn'. */
 	err = misc_register(&evtchn_miscdev);
 	if (err != 0) {
-		printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+		pr_alert("Could not register /dev/xen/evtchn\n");
 		return err;
 	}
 
--- 12.2.orig/drivers/xen/fbfront/xenfb.c	2011-02-17 10:11:23.000000000 +0100
+++ 12.2/drivers/xen/fbfront/xenfb.c	2011-02-17 10:16:12.000000000 +0100
@@ -597,7 +597,7 @@ static int __devinit xenfb_probe(struct 
 		fb_size = XENFB_DEFAULT_FB_LEN;
 	}
 
-	dev->dev.driver_data = info;
+	dev_set_drvdata(&dev->dev, info);
 	info->xbdev = dev;
 	info->irq = -1;
 	info->x1 = info->y1 = INT_MAX;
@@ -701,7 +701,7 @@ static int __devinit xenfb_probe(struct 
 
 static int xenfb_resume(struct xenbus_device *dev)
 {
-	struct xenfb_info *info = dev->dev.driver_data;
+	struct xenfb_info *info = dev_get_drvdata(&dev->dev);
 
 	xenfb_disconnect_backend(info);
 	xenfb_init_shared_page(info, info->fb_info);
@@ -710,7 +710,7 @@ static int xenfb_resume(struct xenbus_de
 
 static int xenfb_remove(struct xenbus_device *dev)
 {
-	struct xenfb_info *info = dev->dev.driver_data;
+	struct xenfb_info *info = dev_get_drvdata(&dev->dev);
 
 	del_timer(&info->refresh);
 	if (info->kthread)
@@ -819,7 +819,7 @@ static void xenfb_disconnect_backend(str
 static void xenfb_backend_changed(struct xenbus_device *dev,
 				  enum xenbus_state backend_state)
 {
-	struct xenfb_info *info = dev->dev.driver_data;
+	struct xenfb_info *info = dev_get_drvdata(&dev->dev);
 	int val;
 
 	switch (backend_state) {
--- 12.2.orig/drivers/xen/fbfront/xenkbd.c	2011-05-23 11:10:58.000000000 +0200
+++ 12.2/drivers/xen/fbfront/xenkbd.c	2011-05-23 11:12:20.000000000 +0200
@@ -113,7 +113,7 @@ int __devinit xenkbd_probe(struct xenbus
 		xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
 		return -ENOMEM;
 	}
-	dev->dev.driver_data = info;
+	dev_set_drvdata(&dev->dev, info);
 	info->xbdev = dev;
 	snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename);
 
@@ -199,7 +199,7 @@ int __devinit xenkbd_probe(struct xenbus
 
 static int xenkbd_resume(struct xenbus_device *dev)
 {
-	struct xenkbd_info *info = dev->dev.driver_data;
+	struct xenkbd_info *info = dev_get_drvdata(&dev->dev);
 
 	xenkbd_disconnect_backend(info);
 	info->page->in_cons = info->page->in_prod = 0;
@@ -209,7 +209,7 @@ static int xenkbd_resume(struct xenbus_d
 
 static int xenkbd_remove(struct xenbus_device *dev)
 {
-	struct xenkbd_info *info = dev->dev.driver_data;
+	struct xenkbd_info *info = dev_get_drvdata(&dev->dev);
 
 	xenkbd_disconnect_backend(info);
 	input_unregister_device(info->kbd);
@@ -275,7 +275,7 @@ static void xenkbd_disconnect_backend(st
 static void xenkbd_backend_changed(struct xenbus_device *dev,
 				   enum xenbus_state backend_state)
 {
-	struct xenkbd_info *info = dev->dev.driver_data;
+	struct xenkbd_info *info = dev_get_drvdata(&dev->dev);
 	int ret, val;
 
 	switch (backend_state) {
--- 12.2.orig/drivers/xen/gntdev/gntdev.c	2012-05-23 13:33:01.000000000 +0200
+++ 12.2/drivers/xen/gntdev/gntdev.c	2012-05-23 13:34:31.000000000 +0200
@@ -356,6 +356,7 @@ nomem_out:
 static struct miscdevice gntdev_miscdev = {
 	.minor        = MISC_DYNAMIC_MINOR,
 	.name         = GNTDEV_NAME,
+	.devnode      = "xen/" GNTDEV_NAME,
 	.fops         = &gntdev_fops,
 };
 
--- 12.2.orig/drivers/xen/netback/accel.c	2011-01-31 17:29:16.000000000 +0100
+++ 12.2/drivers/xen/netback/accel.c	2011-02-01 14:50:44.000000000 +0100
@@ -103,7 +103,7 @@ static int netback_accelerator_probe_bac
 	struct xenbus_device *xendev = to_xenbus_device(dev);
 
 	if (!strcmp("vif", xendev->devicetype)) {
-		struct backend_info *be = xendev->dev.driver_data;
+		struct backend_info *be = dev_get_drvdata(&xendev->dev);
 
 		if (match_accelerator(xendev, be, accelerator) &&
 		    try_module_get(accelerator->hooks->owner)) {
@@ -124,7 +124,7 @@ static int netback_accelerator_remove_ba
 		(struct netback_accelerator *)arg;
 	
 	if (!strcmp("vif", xendev->devicetype)) {
-		struct backend_info *be = xendev->dev.driver_data;
+		struct backend_info *be = dev_get_drvdata(&xendev->dev);
 
 		if (be->accelerator == accelerator) {
 			be->accelerator->hooks->remove(xendev);
--- 12.2.orig/drivers/xen/netback/loopback.c	2011-03-01 11:52:05.000000000 +0100
+++ 12.2/drivers/xen/netback/loopback.c	2011-02-01 14:50:44.000000000 +0100
@@ -139,8 +139,8 @@ static int loopback_start_xmit(struct sk
 		return 0;
 	}
 
-	dst_release(skb->dst);
-	skb->dst = NULL;
+	dst_release(skb_dst(skb));
+	skb_dst_set(skb, NULL);
 
 	skb_orphan(skb);
 
--- 12.2.orig/drivers/xen/netback/xenbus.c	2012-01-06 10:44:04.000000000 +0100
+++ 12.2/drivers/xen/netback/xenbus.c	2012-01-03 12:01:46.000000000 +0100
@@ -39,7 +39,7 @@ static void netback_disconnect(struct de
 
 static int netback_remove(struct xenbus_device *dev)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
 	netback_remove_accelerators(be, dev);
 
@@ -50,7 +50,7 @@ static int netback_remove(struct xenbus_
 
 static void netback_disconnect(struct device *xbdev_dev, bool clear)
 {
-	struct backend_info *be = xbdev_dev->driver_data;
+	struct backend_info *be = dev_get_drvdata(xbdev_dev);
 
 	unregister_hotplug_status_watch(be);
 	if (be->netif)
@@ -64,7 +64,7 @@ static void netback_disconnect(struct de
 		be->netif = NULL;
 	}
 	if (clear)
-		xbdev_dev->driver_data = NULL;
+		dev_set_drvdata(xbdev_dev, NULL);
 	up_write(&teardown_sem);
 }
 
@@ -88,7 +88,7 @@ static int netback_probe(struct xenbus_d
 	}
 
 	be->dev = dev;
-	dev->dev.driver_data = be;
+	dev_set_drvdata(&dev->dev, be);
 
 	sg = 1;
 	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
@@ -185,7 +185,7 @@ static int netback_uevent(struct xenbus_
 	kfree(val);
 
 	down_read(&teardown_sem);
-	be = xdev->dev.driver_data;
+	be = dev_get_drvdata(&xdev->dev);
 	if (be && be->netif)
 		add_uevent_var(env, "vif=%s", be->netif->dev->name);
 	up_read(&teardown_sem);
@@ -228,7 +228,7 @@ static void backend_create_netif(struct 
 static void frontend_changed(struct xenbus_device *dev,
 			     enum xenbus_state frontend_state)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
 	DPRINTK("%s", xenbus_strstate(frontend_state));
 
--- 12.2.orig/drivers/xen/netfront/netfront.c	2012-06-06 14:01:08.000000000 +0200
+++ 12.2/drivers/xen/netfront/netfront.c	2012-03-12 13:49:33.000000000 +0100
@@ -255,7 +255,7 @@ static int __devinit netfront_probe(stru
 	}
 
 	info = netdev_priv(netdev);
-	dev->dev.driver_data = info;
+	dev_set_drvdata(&dev->dev, info);
 
 	err = register_netdev(info->netdev);
 	if (err) {
@@ -276,13 +276,13 @@ static int __devinit netfront_probe(stru
 
  fail:
 	free_netdev(netdev);
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 	return err;
 }
 
 static int __devexit netfront_remove(struct xenbus_device *dev)
 {
-	struct netfront_info *info = dev->dev.driver_data;
+	struct netfront_info *info = dev_get_drvdata(&dev->dev);
 
 	DPRINTK("%s\n", dev->nodename);
 
@@ -304,14 +304,14 @@ static int __devexit netfront_remove(str
 
 static int netfront_suspend(struct xenbus_device *dev)
 {
-	struct netfront_info *info = dev->dev.driver_data;
+	struct netfront_info *info = dev_get_drvdata(&dev->dev);
 	return netfront_accelerator_suspend(info, dev);
 }
 
 
 static int netfront_suspend_cancel(struct xenbus_device *dev)
 {
-	struct netfront_info *info = dev->dev.driver_data;
+	struct netfront_info *info = dev_get_drvdata(&dev->dev);
 	return netfront_accelerator_suspend_cancel(info, dev);
 }
 
@@ -324,7 +324,7 @@ static int netfront_suspend_cancel(struc
  */
 static int netfront_resume(struct xenbus_device *dev)
 {
-	struct netfront_info *info = dev->dev.driver_data;
+	struct netfront_info *info = dev_get_drvdata(&dev->dev);
 
 	DPRINTK("%s\n", dev->nodename);
 
@@ -530,7 +530,7 @@ static int setup_device(struct xenbus_de
 static void backend_changed(struct xenbus_device *dev,
 			    enum xenbus_state backend_state)
 {
-	struct netfront_info *np = dev->dev.driver_data;
+	struct netfront_info *np = dev_get_drvdata(&dev->dev);
 	struct net_device *netdev = np->netdev;
 
 	DPRINTK("%s\n", xenbus_strstate(backend_state));
--- 12.2.orig/drivers/xen/pcifront/pci_op.c	2012-04-04 09:50:05.000000000 +0200
+++ 12.2/drivers/xen/pcifront/pci_op.c	2012-04-04 10:24:26.000000000 +0200
@@ -407,7 +407,7 @@ void pci_frontend_disable_msi(struct pci
 #endif /* CONFIG_PCI_MSI */
 
 /* Claim resources for the PCI frontend as-is, backend won't allow changes */
-static void pcifront_claim_resource(struct pci_dev *dev, void *data)
+static int pcifront_claim_resource(struct pci_dev *dev, void *data)
 {
 	struct pcifront_device *pdev = data;
 	int i;
@@ -422,6 +422,8 @@ static void pcifront_claim_resource(stru
 			pci_claim_resource(dev, i);
 		}
 	}
+
+	return 0;
 }
 
 int __devinit pcifront_scan_root(struct pcifront_device *pdev,
--- 12.2.orig/drivers/xen/pcifront/xenbus.c	2012-03-12 13:33:18.000000000 +0100
+++ 12.2/drivers/xen/pcifront/xenbus.c	2012-03-12 13:49:37.000000000 +0100
@@ -33,7 +33,7 @@ static struct pcifront_device *alloc_pde
 	/*Flag for registering PV AER handler*/
 	set_bit(_XEN_PCIB_AERHANDLER, (void*)&pdev->sh_info->flags);
 
-	xdev->dev.driver_data = pdev;
+	dev_set_drvdata(&xdev->dev, pdev);
 	pdev->xdev = xdev;
 
 	INIT_LIST_HEAD(&pdev->root_buses);
@@ -74,7 +74,7 @@ static void free_pdev(struct pcifront_de
 	else
 		free_page((unsigned long)pdev->sh_info);
 
-	pdev->xdev->dev.driver_data = NULL;
+	dev_set_drvdata(&pdev->xdev->dev, NULL);
 
 	kfree(pdev);
 }
@@ -393,7 +393,7 @@ static int pcifront_detach_devices(struc
 static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev,
 						  enum xenbus_state be_state)
 {
-	struct pcifront_device *pdev = xdev->dev.driver_data;
+	struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
 
 	switch (be_state) {
 	case XenbusStateUnknown:
@@ -445,8 +445,8 @@ static int pcifront_xenbus_probe(struct 
 
 static int pcifront_xenbus_remove(struct xenbus_device *xdev)
 {
-	if (xdev->dev.driver_data)
-		free_pdev(xdev->dev.driver_data);
+	if (dev_get_drvdata(&xdev->dev))
+		free_pdev(dev_get_drvdata(&xdev->dev));
 
 	return 0;
 }
--- 12.2.orig/drivers/xen/scsiback/scsiback.c	2012-01-20 14:50:46.000000000 +0100
+++ 12.2/drivers/xen/scsiback/scsiback.c	2011-04-11 15:04:22.000000000 +0200
@@ -226,7 +226,7 @@ static void scsiback_cmd_done(struct req
 	int errors;
 
 	sense_buffer = req->sense;
-	resid        = req->data_len;
+	resid        = blk_rq_bytes(req);
 	errors       = req->errors;
 
 	if (errors != 0) {
@@ -333,21 +333,6 @@ fail_flush:
 	return -ENOMEM;
 }
 
-/* quoted scsi_lib.c/scsi_merge_bio */
-static int scsiback_merge_bio(struct request *rq, struct bio *bio)
-{
-	struct request_queue *q = rq->q;
-
-	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
-	if (rq_data_dir(rq) == WRITE)
-		bio->bi_rw |= (1 << BIO_RW);
-
-	blk_queue_bounce(q, &bio);
-
-	return blk_rq_append_bio(q, rq, bio);
-}
-
-
 /* quoted scsi_lib.c/scsi_bi_endio */
 static void scsiback_bi_endio(struct bio *bio, int error)
 {
@@ -357,29 +342,28 @@ static void scsiback_bi_endio(struct bio
 
 
 /* quoted scsi_lib.c/scsi_req_map_sg . */
-static int request_map_sg(struct request *rq, pending_req_t *pending_req, unsigned int count)
+static struct bio *request_map_sg(pending_req_t *pending_req)
 {
-	struct request_queue *q = rq->q;
-	int nr_pages;
-	unsigned int nsegs = count;
-	unsigned int data_len = 0, len, bytes, off;
+	struct request_queue *q = pending_req->sdev->request_queue;
+	unsigned int nsegs = (unsigned int)pending_req->nr_segments;
+	unsigned int i, len, bytes, off, nr_pages, nr_vecs = 0;
 	struct scatterlist *sg;
 	struct page *page;
-	struct bio *bio = NULL;
-	int i, err, nr_vecs = 0;
+	struct bio *bio = NULL, *bio_first = NULL, *bio_last = NULL;
+	int err;
 
 	for_each_sg (pending_req->sgl, sg, nsegs, i) {
 		page = sg_page(sg);
 		off = sg->offset;
 		len = sg->length;
-		data_len += len;
 
 		nr_pages = (len + off + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		while (len > 0) {
 			bytes = min_t(unsigned int, len, PAGE_SIZE - off);
 
 			if (!bio) {
-				nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
+				nr_vecs = min_t(unsigned int, BIO_MAX_PAGES,
+					 	nr_pages);
 				nr_pages -= nr_vecs;
 				bio = bio_alloc(GFP_KERNEL, nr_vecs);
 				if (!bio) {
@@ -387,6 +371,11 @@ static int request_map_sg(struct request
 					goto free_bios;
 				}
 				bio->bi_end_io = scsiback_bi_endio;
+				if (bio_last)
+					bio_last->bi_next = bio;
+				else
+					bio_first = bio;
+				bio_last = bio;
 			}
 
 			if (bio_add_pc_page(q, bio, page, bytes, off) !=
@@ -397,11 +386,9 @@ static int request_map_sg(struct request
 			}
 
 			if (bio->bi_vcnt >= nr_vecs) {
-				err = scsiback_merge_bio(rq, bio);
-				if (err) {
-					bio_endio(bio, 0);
-					goto free_bios;
-				}
+				bio->bi_flags &= ~(1 << BIO_SEG_VALID);
+				if (pending_req->sc_data_direction == WRITE)
+					bio->bi_rw |= (1 << BIO_RW);
 				bio = NULL;
 			}
 
@@ -411,21 +398,15 @@ static int request_map_sg(struct request
 		}
 	}
 
-	rq->buffer   = rq->data = NULL;
-	rq->data_len = data_len;
-
-	return 0;
+	return bio_first;
 
 free_bios:
-	while ((bio = rq->bio) != NULL) {
-		rq->bio = bio->bi_next;
-		/*
-		 * call endio instead of bio_put incase it was bounced
-		 */
-		bio_endio(bio, 0);
+	while ((bio = bio_first) != NULL) {
+		bio_first = bio->bi_next;
+		bio_put(bio);
 	}
 
-	return err;
+	return ERR_PTR(err);
 }
 
 
@@ -433,7 +414,6 @@ void scsiback_cmd_exec(pending_req_t *pe
 {
 	int cmd_len  = (int)pending_req->cmd_len;
 	int data_dir = (int)pending_req->sc_data_direction;
-	unsigned int nr_segments = (unsigned int)pending_req->nr_segments;
 	unsigned int timeout;
 	struct request *rq;
 	int write;
@@ -447,7 +427,30 @@ void scsiback_cmd_exec(pending_req_t *pe
 		timeout = VSCSIIF_TIMEOUT;
 
 	write = (data_dir == DMA_TO_DEVICE);
-	rq = blk_get_request(pending_req->sdev->request_queue, write, GFP_KERNEL);
+	if (pending_req->nr_segments) {
+		struct bio *bio = request_map_sg(pending_req);
+
+		if (IS_ERR(bio)) {
+			pr_err("scsiback: SG Request Map Error\n");
+			return;
+		}
+
+		rq = blk_make_request(pending_req->sdev->request_queue, bio,
+				      GFP_KERNEL);
+		if (IS_ERR(rq)) {
+			pr_err("scsiback: Make Request Error\n");
+			return;
+		}
+
+		rq->buffer = NULL;
+	} else {
+		rq = blk_get_request(pending_req->sdev->request_queue, write,
+				     GFP_KERNEL);
+		if (unlikely(!rq)) {
+			pr_err("scsiback: Get Request Error\n");
+			return;
+		}
+	}
 
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_len = cmd_len;
@@ -462,14 +465,6 @@ void scsiback_cmd_exec(pending_req_t *pe
 	rq->timeout   = timeout;
 	rq->end_io_data = pending_req;
 
-	if (nr_segments) {
-
-		if (request_map_sg(rq, pending_req, nr_segments)) {
-			pr_err("scsiback: SG Request Map Error\n");
-			return;
-		}
-	}
-
 	scsiback_get(pending_req->info);
 	blk_execute_rq_nowait(rq->q, NULL, rq, 1, scsiback_cmd_done);
 
--- 12.2.orig/drivers/xen/scsiback/xenbus.c	2011-06-30 16:32:56.000000000 +0200
+++ 12.2/drivers/xen/scsiback/xenbus.c	2011-06-30 17:04:59.000000000 +0200
@@ -225,7 +225,7 @@ static void scsiback_do_lun_hotplug(stru
 static void scsiback_frontend_changed(struct xenbus_device *dev,
 					enum xenbus_state frontend_state)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 	int err;
 
 	switch (frontend_state) {
@@ -282,7 +282,7 @@ static void scsiback_frontend_changed(st
 
 static int scsiback_remove(struct xenbus_device *dev)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
 	if (be->info) {
 		scsiback_disconnect(be->info);
@@ -292,7 +292,7 @@ static int scsiback_remove(struct xenbus
 	}
 
 	kfree(be);
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 
 	return 0;
 }
@@ -315,7 +315,7 @@ static int scsiback_probe(struct xenbus_
 		return -ENOMEM;
 	}
 	be->dev = dev;
-	dev->dev.driver_data = be;
+	dev_set_drvdata(&dev->dev, be);
 
 	be->info = vscsibk_info_alloc(dev->otherend_id);
 	if (IS_ERR(be->info)) {
--- 12.2.orig/drivers/xen/scsifront/xenbus.c	2011-02-08 10:04:06.000000000 +0100
+++ 12.2/drivers/xen/scsifront/xenbus.c	2011-02-08 10:05:30.000000000 +0100
@@ -189,7 +189,7 @@ static int scsifront_probe(struct xenbus
 	info->host = host;
 
 
-	dev->dev.driver_data = info;
+	dev_set_drvdata(&dev->dev, info);
 	info->dev  = dev;
 
 	for (i = 0; i < VSCSIIF_MAX_REQS; i++) {
@@ -243,7 +243,7 @@ free_sring:
 
 static int scsifront_remove(struct xenbus_device *dev)
 {
-	struct vscsifrnt_info *info = dev->dev.driver_data;
+	struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
 
 	DPRINTK("%s: %s removed\n",__FUNCTION__ ,dev->nodename);
 
@@ -355,7 +355,7 @@ static void scsifront_do_lun_hotplug(str
 static void scsifront_backend_changed(struct xenbus_device *dev,
 				enum xenbus_state backend_state)
 {
-	struct vscsifrnt_info *info = dev->dev.driver_data;
+	struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
 
 	DPRINTK("%p %u %u\n", dev, dev->state, backend_state);
 
--- 12.2.orig/drivers/xen/sfc_netback/accel_xenbus.c	2011-01-31 17:29:16.000000000 +0100
+++ 12.2/drivers/xen/sfc_netback/accel_xenbus.c	2011-02-01 14:50:44.000000000 +0100
@@ -36,7 +36,7 @@
 #define NODENAME_PATH_FMT "backend/vif/%d/%d"
 
 #define NETBACK_ACCEL_FROM_XENBUS_DEVICE(_dev) (struct netback_accel *) \
-	((struct backend_info *)(_dev)->dev.driver_data)->netback_accel_priv
+	((struct backend_info *)dev_get_drvdata(&(_dev)->dev))->netback_accel_priv
 
 /* List of all the bends currently in existence. */
 struct netback_accel *bend_list = NULL;
@@ -615,7 +615,7 @@ int netback_accel_probe(struct xenbus_de
 	mutex_lock(&bend->bend_mutex);
 
 	/* ...and store it where we can get at it */
-	binfo = (struct backend_info *) dev->dev.driver_data;
+	binfo = dev_get_drvdata(&dev->dev);
 	binfo->netback_accel_priv = bend;
 	/* And vice-versa */
 	bend->hdev_data = dev;
@@ -729,7 +729,7 @@ int netback_accel_remove(struct xenbus_d
 	struct netback_accel *bend; 
 	int frontend_state;
 
-	binfo = (struct backend_info *) dev->dev.driver_data;
+	binfo = dev_get_drvdata(&dev->dev);
 	bend = (struct netback_accel *) binfo->netback_accel_priv;
 
 	DPRINTK("%s: dev %p bend %p\n", __FUNCTION__, dev, bend);
--- 12.2.orig/drivers/xen/sfc_netfront/accel_xenbus.c	2011-01-31 17:32:29.000000000 +0100
+++ 12.2/drivers/xen/sfc_netfront/accel_xenbus.c	2011-02-01 14:50:44.000000000 +0100
@@ -727,8 +727,7 @@ int netfront_accel_probe(struct net_devi
 
 int netfront_accel_remove(struct xenbus_device *dev)
 {
-	struct netfront_info *np =
-		(struct netfront_info *)dev->dev.driver_data;
+	struct netfront_info *np = dev_get_drvdata(&dev->dev);
 	netfront_accel_vnic *vnic = (netfront_accel_vnic *)np->accel_priv;
 
 	DPRINTK("%s %s\n", __FUNCTION__, dev->nodename);
--- 12.2.orig/drivers/xen/sys-hypervisor.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/xen/sys-hypervisor.c	2012-02-17 14:32:15.000000000 +0100
@@ -12,14 +12,20 @@
 #include <linux/module.h>
 #include <linux/kobject.h>
 
+#if defined(CONFIG_XEN) || defined(MODULE)
+#include <asm/hypervisor.h>
+#else
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
+#endif
 
 #include <xen/xen.h>
 #include <xen/xenbus.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/version.h>
 
+#include "xenbus/xenbus_comms.h"
+
 #define HYPERVISOR_ATTR_RO(_name) \
 static struct hyp_sysfs_attr  _name##_attr = __ATTR_RO(_name)
 
@@ -118,9 +124,8 @@ static ssize_t uuid_show(struct hyp_sysf
 {
 	char *vm, *val;
 	int ret;
-	extern int xenstored_ready;
 
-	if (!xenstored_ready)
+	if (!is_xenstored_ready())
 		return -EBUSY;
 
 	vm = xenbus_read(XBT_NIL, "vm", "", NULL);
@@ -355,6 +360,35 @@ static void xen_properties_destroy(void)
 	sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
 }
 
+#if defined(CONFIG_XEN) && defined(CONFIG_KEXEC)
+extern size_t vmcoreinfo_size_xen;
+extern unsigned long paddr_vmcoreinfo_xen;
+
+static ssize_t vmcoreinfo_show(struct hyp_sysfs_attr *attr, char *page)
+{
+	return sprintf(page, "%lx %zx\n",
+		       paddr_vmcoreinfo_xen, vmcoreinfo_size_xen);
+}
+
+HYPERVISOR_ATTR_RO(vmcoreinfo);
+
+static int __init xen_sysfs_vmcoreinfo_init(void)
+{
+	if (!vmcoreinfo_size_xen)
+		return 0;
+	return sysfs_create_file(hypervisor_kobj, &vmcoreinfo_attr.attr);
+}
+
+static void xen_sysfs_vmcoreinfo_destroy(void)
+{
+	if (vmcoreinfo_size_xen)
+		sysfs_remove_file(hypervisor_kobj, &vmcoreinfo_attr.attr);
+}
+#else
+static inline int __init xen_sysfs_vmcoreinfo_init(void) { return 0; }
+static inline void xen_sysfs_vmcoreinfo_destroy(void) { }
+#endif
+
 static int __init hyper_sysfs_init(void)
 {
 	int ret;
@@ -377,9 +411,11 @@ static int __init hyper_sysfs_init(void)
 	ret = xen_properties_init();
 	if (ret)
 		goto prop_out;
+	ret = xen_sysfs_vmcoreinfo_init();
+	if (!ret)
+		goto out;
 
-	goto out;
-
+	xen_properties_destroy();
 prop_out:
 	xen_sysfs_uuid_destroy();
 uuid_out:
@@ -394,6 +430,7 @@ out:
 
 static void __exit hyper_sysfs_exit(void)
 {
+	xen_sysfs_vmcoreinfo_destroy();
 	xen_properties_destroy();
 	xen_compilation_destroy();
 	xen_sysfs_uuid_destroy();
--- 12.2.orig/drivers/xen/tpmback/xenbus.c	2011-12-21 10:02:58.000000000 +0100
+++ 12.2/drivers/xen/tpmback/xenbus.c	2011-04-11 15:04:27.000000000 +0200
@@ -39,7 +39,7 @@ long int tpmback_get_instance(struct bac
 
 static int tpmback_remove(struct xenbus_device *dev)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
 	if (!be) return 0;
 
@@ -55,7 +55,7 @@ static int tpmback_remove(struct xenbus_
 		be->tpmif = NULL;
 	}
 	kfree(be);
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 	return 0;
 }
 
@@ -74,7 +74,7 @@ static int tpmback_probe(struct xenbus_d
 
 	be->is_instance_set = 0;
 	be->dev = dev;
-	dev->dev.driver_data = be;
+	dev_set_drvdata(&dev->dev, be);
 
 	err = xenbus_watch_path2(dev, dev->nodename,
 				 "instance", &be->backend_watch,
@@ -124,7 +124,7 @@ static void backend_changed(struct xenbu
 static void frontend_changed(struct xenbus_device *dev,
 			     enum xenbus_state frontend_state)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 	int err;
 
 	switch (frontend_state) {
--- 12.2.orig/drivers/xen/usbback/usbback.h	2012-06-06 13:53:31.000000000 +0200
+++ 12.2/drivers/xen/usbback/usbback.h	2012-06-06 14:02:16.000000000 +0200
@@ -61,6 +61,12 @@
 
 struct usbstub;
 
+#ifndef BUS_ID_SIZE
+#define USBBACK_BUS_ID_SIZE 20
+#else
+#define USBBACK_BUS_ID_SIZE BUS_ID_SIZE
+#endif
+
 #define USB_DEV_ADDR_SIZE 128
 
 typedef struct usbif_st {
@@ -103,7 +109,7 @@ typedef struct usbif_st {
 struct vusb_port_id {
 	struct list_head id_list;
 
-	char phys_bus[BUS_ID_SIZE];
+	char phys_bus[USBBACK_BUS_ID_SIZE];
 	domid_t domid;
 	unsigned int handle;
 	int portnum;
--- 12.2.orig/drivers/xen/usbback/usbstub.c	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/drivers/xen/usbback/usbstub.c	2011-02-01 14:50:44.000000000 +0100
@@ -56,7 +56,7 @@ struct vusb_port_id *find_portid_by_busi
 
 	spin_lock_irqsave(&port_list_lock, flags);
 	list_for_each_entry(portid, &port_list, id_list) {
-		if (!(strncmp(portid->phys_bus, busid, BUS_ID_SIZE))) {
+		if (!(strncmp(portid->phys_bus, busid, USBBACK_BUS_ID_SIZE))) {
 			found = 1;
 			break;
 		}
@@ -110,7 +110,7 @@ int portid_add(const char *busid,
 	portid->handle = handle;
 	portid->portnum = portnum;
 
-	strlcpy(portid->phys_bus, busid, BUS_ID_SIZE);
+	strlcpy(portid->phys_bus, busid, USBBACK_BUS_ID_SIZE);
 
 	spin_lock_irqsave(&port_list_lock, flags);
 	list_add(&portid->id_list, &port_list);
@@ -228,7 +228,7 @@ static int usbstub_probe(struct usb_inte
 		usbbk_hotplug_notify(usbif, portid->portnum, udev->speed);
 	} else {
 		/* maybe already called and connected by other intf */
-		if (strncmp(stub->portid->phys_bus, busid, BUS_ID_SIZE))
+		if (strncmp(stub->portid->phys_bus, busid, USBBACK_BUS_ID_SIZE))
 			goto out; /* invalid call */
 	}
 
--- 12.2.orig/drivers/xen/usbback/xenbus.c	2011-06-30 16:33:08.000000000 +0200
+++ 12.2/drivers/xen/usbback/xenbus.c	2011-06-30 17:05:05.000000000 +0200
@@ -112,7 +112,7 @@ again:
 		 */
 		portid = find_portid(usbif->domid, usbif->handle, i);
 		if (portid) {
-			if ((strncmp(portid->phys_bus, busid, BUS_ID_SIZE)))
+			if ((strncmp(portid->phys_bus, busid, USBBACK_BUS_ID_SIZE)))
 				xenbus_dev_fatal(dev, err,
 					"can't add port/%d, remove first", i);
 			else
@@ -142,7 +142,7 @@ abort:
 
 static int usbback_remove(struct xenbus_device *dev)
 {
-	usbif_t *usbif = dev->dev.driver_data;
+	usbif_t *usbif = dev_get_drvdata(&dev->dev);
 	int i;
 
 	if (usbif->backend_watch.node) {
@@ -158,7 +158,7 @@ static int usbback_remove(struct xenbus_
 		usbif_disconnect(usbif);
 		usbif_free(usbif);;
 	}
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 
 	return 0;
 }
@@ -182,7 +182,7 @@ static int usbback_probe(struct xenbus_d
 		return -ENOMEM;
 	}
 	usbif->xbdev = dev;
-	dev->dev.driver_data = usbif;
+	dev_set_drvdata(&dev->dev, usbif);
 
 	err = xenbus_scanf(XBT_NIL, dev->nodename,
 				"num-ports", "%d", &num_ports);
@@ -258,7 +258,7 @@ static int connect_rings(usbif_t *usbif)
 static void frontend_changed(struct xenbus_device *dev,
 				     enum xenbus_state frontend_state)
 {
-	usbif_t *usbif = dev->dev.driver_data;
+	usbif_t *usbif = dev_get_drvdata(&dev->dev);
 	int err;
 
 	switch (frontend_state) {
--- 12.2.orig/drivers/xen/usbfront/xenbus.c	2012-03-12 13:49:01.000000000 +0100
+++ 12.2/drivers/xen/usbfront/xenbus.c	2012-03-12 13:49:41.000000000 +0100
@@ -185,7 +185,7 @@ out:
 
 static int connect(struct xenbus_device *dev)
 {
-	struct usbfront_info *info = dev->dev.driver_data;
+	struct usbfront_info *info = dev_get_drvdata(&dev->dev);
 
 	usbif_conn_request_t *req;
 	int i, idx, err;
@@ -297,7 +297,7 @@ static int usbfront_probe(struct xenbus_
 	}
 
 	info = hcd_to_info(hcd);
-	dev->dev.driver_data = info;
+	dev_set_drvdata(&dev->dev, info);
 
 	err = usb_add_hcd(hcd, 0, 0);
 	if (err != 0) {
@@ -312,13 +312,13 @@ static int usbfront_probe(struct xenbus_
 
 fail:
 	usb_put_hcd(hcd);
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 	return err;
 }
 
 static void usbfront_disconnect(struct xenbus_device *dev)
 {
-	struct usbfront_info *info = dev->dev.driver_data;
+	struct usbfront_info *info = dev_get_drvdata(&dev->dev);
 	struct usb_hcd *hcd = info_to_hcd(info);
 
 	usb_remove_hcd(hcd);
@@ -362,7 +362,7 @@ static void backend_changed(struct xenbu
 
 static int usbfront_remove(struct xenbus_device *dev)
 {
-	struct usbfront_info *info = dev->dev.driver_data;
+	struct usbfront_info *info = dev_get_drvdata(&dev->dev);
 	struct usb_hcd *hcd = info_to_hcd(info);
 
 	destroy_rings(info);
--- 12.2.orig/drivers/xen/util.c	2011-01-31 17:56:27.000000000 +0100
+++ 12.2/drivers/xen/util.c	2011-02-01 14:50:44.000000000 +0100
@@ -1,20 +1,74 @@
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
 #include <xen/driver_util.h>
 
-struct class *get_xen_class(void)
+static struct class *_get_xen_class(void)
 {
 	static struct class *xen_class;
+	static DEFINE_MUTEX(xc_mutex);
 
-	if (xen_class)
-		return xen_class;
-
-	xen_class = class_create(THIS_MODULE, "xen");
-	if (IS_ERR(xen_class)) {
+	mutex_lock(&xc_mutex);
+	if (IS_ERR_OR_NULL(xen_class))
+		xen_class = class_create(THIS_MODULE, "xen");
+	mutex_unlock(&xc_mutex);
+	if (IS_ERR(xen_class))
 		pr_err("failed to create xen sysfs class\n");
-		xen_class = NULL;
-	}
 
 	return xen_class;
 }
+
+struct class *get_xen_class(void)
+{
+	struct class *class = _get_xen_class();
+
+	return !IS_ERR(class) ? class : NULL;
+}
 EXPORT_SYMBOL_GPL(get_xen_class);
+
+static void xcdev_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+struct device *xen_class_device_create(struct device_type *type,
+				       struct device *parent,
+				       dev_t devt, void *drvdata,
+				       const char *fmt, ...)
+{
+	struct device *dev;
+	int err;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (dev) {
+		va_list vargs;
+
+		va_start(vargs, fmt);
+		err = kobject_set_name_vargs(&dev->kobj, fmt, vargs);
+		va_end(vargs);
+	} else
+		err = -ENOMEM;
+
+	if (!err) {
+		dev->devt = devt;
+		dev->class = _get_xen_class();
+		if (IS_ERR(dev->class))
+			err = PTR_ERR(dev->class);
+	}
+
+	if (!err) {
+		dev->type = type;
+		dev->parent = parent;
+		dev_set_drvdata(dev, drvdata);
+		dev->release = xcdev_release;
+		err = device_register(dev);
+		if (!err)
+			return dev;
+		put_device(dev);
+	} else
+		kfree(dev);
+
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(xen_class_device_create);
--- 12.2.orig/drivers/xen/xenbus/xenbus_probe.c	2011-06-10 12:04:05.000000000 +0200
+++ 12.2/drivers/xen/xenbus/xenbus_probe.c	2012-03-22 14:09:32.000000000 +0100
@@ -92,6 +92,11 @@ static int xenbus_probe_frontend(const c
 
 static void xenbus_dev_shutdown(struct device *_dev);
 
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
+static int xenbus_dev_resume(struct device *dev);
+#endif
+
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
 match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
@@ -228,6 +233,10 @@ static struct xen_bus_type xenbus_fronte
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
 		.dev_attrs = xenbus_dev_attrs,
 #endif
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+		.suspend   = xenbus_dev_suspend,
+		.resume    = xenbus_dev_resume,
+#endif
 	},
 #if defined(CONFIG_XEN) || defined(MODULE)
 	.dev = {
@@ -756,6 +765,9 @@ void xenbus_dev_changed(const char *node
 
 	kfree(root);
 }
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+EXPORT_SYMBOL_GPL(xenbus_dev_changed);
+#endif
 
 static void frontend_changed(struct xenbus_watch *watch,
 			     const char **vec, unsigned int len)
@@ -771,8 +783,11 @@ static struct xenbus_watch fe_watch = {
 	.callback = frontend_changed,
 };
 
-#if defined(CONFIG_PM_SLEEP) || defined(MODULE)
-static int suspend_dev(struct device *dev, void *data)
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
+#else
+static int __maybe_unused suspend_dev(struct device *dev, void *data)
+#endif
 {
 	int err = 0;
 	struct xenbus_driver *drv;
@@ -785,14 +800,19 @@ static int suspend_dev(struct device *de
 	drv = to_xenbus_driver(dev->driver);
 	xdev = container_of(dev, struct xenbus_device, dev);
 	if (drv->suspend)
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+		err = drv->suspend(xdev, state);
+#else
 		err = drv->suspend(xdev);
+#endif
 	if (err)
 		pr_warning("xenbus: suspend %s failed: %i\n",
 			   dev_name(dev), err);
 	return 0;
 }
 
-static int suspend_cancel_dev(struct device *dev, void *data)
+#if defined(CONFIG_XEN) || defined(MODULE)
+static int __maybe_unused suspend_cancel_dev(struct device *dev, void *data)
 {
 	int err = 0;
 	struct xenbus_driver *drv;
@@ -811,8 +831,13 @@ static int suspend_cancel_dev(struct dev
 			   dev_name(dev), err);
 	return 0;
 }
+#endif
 
-static int resume_dev(struct device *dev, void *data)
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+static int xenbus_dev_resume(struct device *dev)
+#else
+static int __maybe_unused resume_dev(struct device *dev, void *data)
+#endif
 {
 	int err;
 	struct xenbus_driver *drv;
@@ -854,6 +879,7 @@ static int resume_dev(struct device *dev
 	return 0;
 }
 
+#if (defined(CONFIG_XEN) && defined(CONFIG_PM_SLEEP)) || defined(MODULE)
 void xenbus_suspend(void)
 {
 	DPRINTK("");
@@ -1115,13 +1141,6 @@ static int xsd_port_read(char *page, cha
 #endif
 
 #ifdef CONFIG_XEN_XENBUS_DEV
-static int xb_free_port(evtchn_port_t port)
-{
-	struct evtchn_close close;
-	close.port = port;
-	return HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
-}
-
 int xenbus_conn(domid_t remote_dom, grant_ref_t *grant_ref,
 		evtchn_port_t *local_port)
 {
@@ -1134,7 +1153,7 @@ int xenbus_conn(domid_t remote_dom, gran
 	remove_xen_proc_entry("xsd_kva");
 	remove_xen_proc_entry("xsd_port");
 
-	rc = xb_free_port(xen_store_evtchn);
+	rc = close_evtchn(xen_store_evtchn);
 	if (rc != 0)
 		goto fail0;
 
@@ -1160,7 +1179,7 @@ int xenbus_conn(domid_t remote_dom, gran
 	return 0;
 
 fail1:
-	rc2 = xb_free_port(xen_store_evtchn);
+	rc2 = close_evtchn(xen_store_evtchn);
 	if (rc2 != 0)
 		pr_warning("XENBUS: Error freeing xenstore event channel:"
 			   " %d\n", rc2);
--- 12.2.orig/drivers/xen/xenbus/xenbus_xs.c	2012-03-22 14:04:55.000000000 +0100
+++ 12.2/drivers/xen/xenbus/xenbus_xs.c	2012-03-22 14:09:34.000000000 +0100
@@ -736,6 +736,10 @@ void xs_resume(void)
 	struct xenbus_watch *watch;
 	char token[sizeof(watch) * 2 + 1];
 
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+	xb_init_comms();
+#endif
+
 	mutex_unlock(&xs_state.response_mutex);
 	mutex_unlock(&xs_state.request_mutex);
 	transaction_resume();
--- 12.2.orig/include/Kbuild	2011-02-01 14:38:38.000000000 +0100
+++ 12.2/include/Kbuild	2011-02-01 14:50:44.000000000 +0100
@@ -8,6 +8,5 @@ header-y += mtd/
 header-y += rdma/
 header-y += video/
 header-y += drm/
-header-y += xen/public/
 header-y += xen/
 header-y += scsi/
--- 12.2.orig/include/xen/Kbuild	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/include/xen/Kbuild	2011-02-01 14:50:44.000000000 +0100
@@ -1,3 +1,2 @@
-header-y += evtchn.h
 header-y += privcmd.h
 header-y += public/
--- 12.2.orig/include/xen/driver_util.h	2011-01-31 17:49:31.000000000 +0100
+++ 12.2/include/xen/driver_util.h	2011-02-01 14:50:44.000000000 +0100
@@ -1,8 +1,14 @@
 #ifndef __XEN_DRIVER_UTIL_H__
 #define __XEN_DRIVER_UTIL_H__
 
+#include <linux/compiler.h>
 #include <linux/device.h>
 
 extern struct class *get_xen_class(void);
+extern struct device *xen_class_device_create(struct device_type *,
+					      struct device *parent,
+					      dev_t devt, void *drvdata,
+					      const char *fmt, ...)
+		      __printf(5, 6);
 
 #endif /* __XEN_DRIVER_UTIL_H__ */
--- 12.2.orig/include/xen/evtchn.h	2011-02-01 14:42:26.000000000 +0100
+++ 12.2/include/xen/evtchn.h	2011-12-09 14:50:16.000000000 +0100
@@ -113,9 +113,6 @@ void irq_resume(void);
 /* Entry point for notifications into Linux subsystems. */
 asmlinkage void evtchn_do_upcall(struct pt_regs *regs);
 
-/* Entry point for notifications into the userland character device. */
-void evtchn_device_upcall(int port);
-
 /* Mark a PIRQ as unavailable for dynamic allocation. */
 void evtchn_register_pirq(int irq);
 /* Map a Xen-supplied PIRQ to a dynamically allocated one. */
@@ -126,12 +123,7 @@ int evtchn_get_xen_pirq(int irq);
 void mask_evtchn(int port);
 void disable_all_local_evtchn(void);
 void unmask_evtchn(int port);
-
-#ifdef CONFIG_SMP
-void rebind_evtchn_to_cpu(int port, unsigned int cpu);
-#else
-#define rebind_evtchn_to_cpu(port, cpu)	((void)0)
-#endif
+unsigned int irq_from_evtchn(unsigned int port);
 
 static inline int test_and_set_evtchn_mask(int port)
 {
@@ -163,6 +155,12 @@ static inline void notify_remote_via_evt
 	VOID(HYPERVISOR_event_channel_op(EVTCHNOP_send, &send));
 }
 
+static inline int close_evtchn(int port)
+{
+	struct evtchn_close close = { .port = port };
+	return HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+}
+
 /*
  * Use these to access the event channel underlying the IRQ handle returned
  * by bind_*_to_irqhandler().
--- 12.2.orig/include/xen/xenbus.h	2011-12-21 11:11:38.000000000 +0100
+++ 12.2/include/xen/xenbus.h	2011-02-02 16:59:07.000000000 +0100
@@ -104,8 +104,12 @@ struct xenbus_driver {
 	void (*otherend_changed)(struct xenbus_device *dev,
 				 enum xenbus_state backend_state);
 	int (*remove)(struct xenbus_device *dev);
+#if !defined(CONFIG_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H)
+	int (*suspend)(struct xenbus_device *dev, pm_message_t state);
+#else
 	int (*suspend)(struct xenbus_device *dev);
 	int (*suspend_cancel)(struct xenbus_device *dev);
+#endif
 	int (*resume)(struct xenbus_device *dev);
 	int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *);
 	struct device_driver driver;
--- 12.2.orig/lib/swiotlb-xen.c	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/lib/swiotlb-xen.c	2011-02-01 14:50:44.000000000 +0100
@@ -47,8 +47,8 @@ int swiotlb;
 int swiotlb_force;
 
 /*
- * Used to do a quick range check in swiotlb_unmap_single and
- * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
+ * Used to do a quick range check in unmap_single and
+ * sync_single_*, to see if the memory was in fact allocated by this
  * API.
  */
 static char *io_tlb_start, *io_tlb_end;
@@ -167,7 +167,7 @@ dma_addr_t swiotlb_phys_to_bus(struct de
 	return phys_to_machine(paddr);
 }
 
-phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
+phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 {
 	return machine_to_phys(baddr);
 }
@@ -178,9 +178,15 @@ static dma_addr_t swiotlb_virt_to_bus(st
 	return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
 }
 
-static void *swiotlb_bus_to_virt(dma_addr_t address)
+void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
 {
-	return phys_to_virt(swiotlb_bus_to_phys(address));
+	return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
+}
+
+int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
+					       dma_addr_t addr, size_t size)
+{
+	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
 }
 
 int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
@@ -315,7 +321,7 @@ static void swiotlb_bounce(phys_addr_t p
 		unsigned long flags;
 
 		while (size) {
-			sz = min((size_t)(PAGE_SIZE - offset), size);
+			sz = min_t(size_t, PAGE_SIZE - offset, size);
 
 			local_irq_save(flags);
 			buffer = kmap_atomic(pfn_to_page(pfn),
@@ -449,7 +455,7 @@ found:
  * dma_addr is the kernel virtual address of the bounce buffer to unmap.
  */
 static void
-unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
 {
 	unsigned long flags;
 	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -544,7 +550,7 @@ swiotlb_full(struct device *dev, size_t 
  * PCI address to use is returned.
  *
  * Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+ * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
  */
 dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 			    unsigned long offset, size_t size,
@@ -558,7 +564,7 @@ dma_addr_t swiotlb_map_page(struct devic
 	BUG_ON(dir == DMA_NONE);
 
 	/*
-	 * If the pointer passed in happens to be in the device's DMA window,
+	 * If the address happens to be in the device's DMA window,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
@@ -583,23 +589,32 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
 
 /*
  * Unmap a single streaming mode DMA translation.  The dma_addr and size must
- * match what was provided for in a previous swiotlb_map_single call.  All
+ * match what was provided for in a previous swiotlb_map_page call.  All
  * other usages are undefined.
  *
  * After this call, reads by the cpu to the buffer are guaranteed to see
  * whatever the device wrote there.
  */
+static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+			 size_t size, int dir)
+{
+	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+
+	BUG_ON(dir == DMA_NONE);
+
+	if (is_swiotlb_buffer(dev_addr)) {
+		do_unmap_single(hwdev, dma_addr, size, dir);
+		return;
+	}
+
+	gnttab_dma_unmap_page(dev_addr);
+}
+
 void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
 			size_t size, enum dma_data_direction dir,
 			struct dma_attrs *attrs)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
-
-	BUG_ON(dir == DMA_NONE);
-	if (is_swiotlb_buffer(dev_addr))
-		unmap_single(hwdev, dma_addr, size, dir);
-	else
-		gnttab_dma_unmap_page(dev_addr);
+	unmap_single(hwdev, dev_addr, size, dir);
 }
 EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
 
@@ -607,7 +622,7 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
  * Make physical memory consistent for a single streaming mode DMA translation
  * after a transfer.
  *
- * If you perform a swiotlb_map_single() but wish to interrogate the buffer
+ * If you perform a swiotlb_map_page() but wish to interrogate the buffer
  * using the cpu, yet do not wish to teardown the PCI dma mapping, you must
  * call this function before doing so.  At the next point you give the PCI dma
  * address back to the card, you must first perform a
@@ -617,9 +632,10 @@ static void
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 		    size_t size, int dir, int target)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
+	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
+
 	if (is_swiotlb_buffer(dev_addr))
 		sync_single(hwdev, dma_addr, size, dir, target);
 }
@@ -648,11 +664,7 @@ swiotlb_sync_single_range(struct device 
 			  unsigned long offset, size_t size,
 			  int dir, int target)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
-
-	BUG_ON(dir == DMA_NONE);
-	if (is_swiotlb_buffer(dev_addr))
-		sync_single(hwdev, dma_addr + offset, size, dir, target);
+	swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
 }
 
 void
@@ -677,7 +689,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_ra
 
 /*
  * Map a set of buffers described by scatterlist in streaming mode for DMA.
- * This is the scatter-gather version of the above swiotlb_map_single
+ * This is the scatter-gather version of the above swiotlb_map_page
  * interface.  Here the scatter gather list elements are each tagged with the
  * appropriate dma address and length.  They are obtained via
  * sg_dma_{address,length}(SG).
@@ -688,7 +700,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_ra
  *       The routine returns the number of addr/length pairs actually
  *       used, at most nents.
  *
- * Device ownership issues as mentioned above for swiotlb_map_single are the
+ * Device ownership issues as mentioned above for swiotlb_map_page are the
  * same here.
  */
 int
@@ -741,7 +753,7 @@ EXPORT_SYMBOL(swiotlb_map_sg);
 
 /*
  * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
- * concerning calls here are the same as for swiotlb_unmap_single() above.
+ * concerning calls here are the same as for swiotlb_unmap_page() above.
  */
 void
 swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
@@ -752,13 +764,9 @@ swiotlb_unmap_sg_attrs(struct device *hw
 
 	BUG_ON(dir == DMA_NONE);
 
-	for_each_sg(sgl, sg, nelems, i) {
-		if (sg->dma_address != sg_phys(sg))
-			unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
-				     sg->dma_length, dir);
-		else
-			gnttab_dma_unmap_page(sg->dma_address);
-	}
+	for_each_sg(sgl, sg, nelems, i)
+		unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+
 }
 EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
 
@@ -784,13 +792,9 @@ swiotlb_sync_sg(struct device *hwdev, st
 	struct scatterlist *sg;
 	int i;
 
-	BUG_ON(dir == DMA_NONE);
-
-	for_each_sg(sgl, sg, nelems, i) {
-		if (sg->dma_address != sg_phys(sg))
-			sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
+	for_each_sg(sgl, sg, nelems, i)
+		swiotlb_sync_single(hwdev, sg->dma_address,
 				    sg->dma_length, dir, target);
-	}
 }
 
 void
--- 12.2.orig/mm/init-mm.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/mm/init-mm.c	2011-04-13 13:55:08.000000000 +0200
@@ -13,6 +13,10 @@
 #define INIT_MM_CONTEXT(name)
 #endif
 
+#ifdef CONFIG_X86_XEN
+#define swapper_pg_dir ((pgd_t *)NULL)
+#endif
+
 struct mm_struct init_mm = {
 	.mm_rb		= RB_ROOT,
 	.pgd		= swapper_pg_dir,
--- 12.2.orig/mm/memory.c	2012-04-10 16:42:52.000000000 +0200
+++ 12.2/mm/memory.c	2012-04-10 16:59:34.000000000 +0200
@@ -1759,7 +1759,7 @@ int __get_user_pages(struct task_struct 
 					vmas[i] = vma;
 				i++;
 				start += PAGE_SIZE;
-				len--;
+				nr_pages--;
 				continue;
 			}
 		}
--- 12.2.orig/mm/page_alloc.c	2012-04-10 16:56:53.000000000 +0200
+++ 12.2/mm/page_alloc.c	2012-02-08 12:13:15.000000000 +0100
@@ -694,6 +694,7 @@ static bool free_pages_prepare(struct pa
 
 #ifdef CONFIG_XEN
 	if (PageForeign(page)) {
+		WARN_ON(wasMlocked);
 		PageForeignDestructor(page, order);
 		return;
 	}