Blob Blame History Raw
From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.38
Patch-mainline: 2.6.38

 This patch contains the differences between 2.6.37 and 2.6.38.

Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.kernel.org/patch-2.6.38" by xen-port-patches.py

--- head.orig/arch/x86/Kconfig	2012-04-10 17:06:46.000000000 +0200
+++ head/arch/x86/Kconfig	2012-04-10 17:07:07.000000000 +0200
@@ -53,7 +53,7 @@ config X86
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2 if !XEN
 	select HAVE_KERNEL_LZMA if !XEN
-	select HAVE_KERNEL_XZ
+	select HAVE_KERNEL_XZ if !XEN
 	select HAVE_KERNEL_LZO if !XEN
 	select HAVE_HW_BREAKPOINT
 	select HAVE_MIXED_BREAKPOINTS_REGS
@@ -547,7 +547,7 @@ config X86_ES7000
 
 config X86_32_IRIS
 	tristate "Eurobraille/Iris poweroff module"
-	depends on X86_32
+	depends on X86_32 && !XEN
 	---help---
 	  The Iris machines from EuroBraille do not have APM or ACPI support
 	  to shut themselves down properly.  A special I/O sequence is
--- head.orig/arch/x86/include/asm/apic.h	2012-04-10 15:45:45.000000000 +0200
+++ head/arch/x86/include/asm/apic.h	2012-04-10 17:07:20.000000000 +0200
@@ -239,7 +239,11 @@ extern void setup_local_APIC(void);
 extern void end_local_APIC_setup(void);
 extern void bsp_end_local_APIC_setup(void);
 extern void init_apic_mappings(void);
+#ifndef CONFIG_XEN
 void register_lapic_address(unsigned long address);
+#else
+#define register_lapic_address(address)
+#endif
 extern void setup_boot_APIC_clock(void);
 extern void setup_secondary_APIC_clock(void);
 extern int APIC_init_uniprocessor(void);
--- head.orig/arch/x86/include/asm/xen/hypervisor.h	2012-06-12 15:05:54.000000000 +0200
+++ head/arch/x86/include/asm/xen/hypervisor.h	2011-04-13 14:08:57.000000000 +0200
@@ -58,7 +58,7 @@ static inline uint32_t xen_cpuid_base(vo
 	return 0;
 }
 
-#ifdef CONFIG_XEN
+#ifdef CONFIG_PARAVIRT_XEN
 extern bool xen_hvm_need_lapic(void);
 
 static inline bool xen_x2apic_para_available(void)
--- head.orig/arch/x86/include/mach-xen/asm/fixmap.h	2011-02-01 15:09:47.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/fixmap.h	2011-02-01 15:41:35.000000000 +0100
@@ -123,7 +123,11 @@ enum fixed_addresses {
 #endif
 	FIX_TEXT_POKE1,	/* reserve 2 pages for text_poke() */
 	FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
+#ifdef	CONFIG_X86_MRST
+	FIX_LNW_VRTC,
+#endif
 	__end_of_permanent_fixed_addresses,
+
 	/*
 	 * 256 temporary boot-time mappings, used by early_ioremap(),
 	 * before ioremap() is functional.
--- head.orig/arch/x86/include/mach-xen/asm/mach_traps.h	2007-06-12 13:14:02.000000000 +0200
+++ head/arch/x86/include/mach-xen/asm/mach_traps.h	2011-02-07 12:21:00.000000000 +0100
@@ -9,7 +9,11 @@
 #include <linux/bitops.h>
 #include <xen/interface/nmi.h>
 
-static inline void clear_mem_error(unsigned char reason) {}
+#define NMI_REASON_SERR		0x80
+#define NMI_REASON_IOCHK	0x40
+#define NMI_REASON_MASK		(NMI_REASON_SERR | NMI_REASON_IOCHK)
+
+static inline void clear_serr_error(unsigned char reason) {}
 static inline void clear_io_check_error(unsigned char reason) {}
 
 static inline unsigned char get_nmi_reason(void)
@@ -21,9 +25,9 @@ static inline unsigned char get_nmi_reas
 	 * port 0x61.
 	 */
 	if (test_bit(_XEN_NMIREASON_io_error, &s->arch.nmi_reason))
-		reason |= 0x40;
+		reason |= NMI_REASON_IOCHK;
 	if (test_bit(_XEN_NMIREASON_parity_error, &s->arch.nmi_reason))
-		reason |= 0x80;
+		reason |= NMI_REASON_SERR;
 
         return reason;
 }
--- head.orig/arch/x86/include/mach-xen/asm/mmu_context.h	2011-02-01 14:54:13.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/mmu_context.h	2011-02-08 10:25:49.000000000 +0100
@@ -87,8 +87,6 @@ static inline void switch_mm(struct mm_s
 		BUG_ON(!xen_feature(XENFEAT_writable_page_tables) &&
 		       !PagePinned(virt_to_page(next->pgd)));
 
-		/* stop flush ipis for the previous mm */
-		cpumask_clear_cpu(cpu, mm_cpumask(prev));
 #if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */
 		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 		percpu_write(cpu_tlbstate.active_mm, next);
@@ -119,6 +117,9 @@ static inline void switch_mm(struct mm_s
 		}
 
 		BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF));
+
+		/* stop TLB flushes for the previous mm */
+		cpumask_clear_cpu(cpu, mm_cpumask(prev));
 	}
 #if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */
 	else {
--- head.orig/arch/x86/include/mach-xen/asm/pci.h	2011-02-01 15:09:47.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pci.h	2011-02-01 15:41:35.000000000 +0100
@@ -71,6 +71,7 @@ extern unsigned long pci_mem_start;
 
 #define PCIBIOS_MIN_CARDBUS_IO	0x4000
 
+extern int pcibios_enabled;
 void pcibios_config_init(void);
 struct pci_bus *pcibios_scan_root(int bus);
 
--- head.orig/arch/x86/include/mach-xen/asm/perf_event.h	2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/perf_event.h	2011-02-01 15:41:55.000000000 +0100
@@ -34,6 +34,4 @@
 
 #endif
 
-static inline void init_hw_perf_events(void) {}
-
 #endif /* _ASM_X86_PERF_EVENT_H */
--- head.orig/arch/x86/include/mach-xen/asm/pgalloc.h	2011-02-01 15:03:03.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgalloc.h	2011-02-01 15:41:35.000000000 +0100
@@ -106,7 +106,7 @@ static inline void pmd_free(struct mm_st
 extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
 
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
-				  unsigned long adddress)
+				  unsigned long address)
 {
 	___pmd_free_tlb(tlb, pmd);
 }
--- head.orig/arch/x86/include/mach-xen/asm/pgtable.h	2011-03-23 10:02:30.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable.h	2011-03-23 10:10:00.000000000 +0100
@@ -32,6 +32,7 @@ extern struct mm_struct *pgd_page_get_mm
 
 #define set_pte(ptep, pte)		xen_set_pte(ptep, pte)
 #define set_pte_at(mm, addr, ptep, pte)	xen_set_pte_at(mm, addr, ptep, pte)
+#define set_pmd_at(mm, addr, pmdp, pmd)	xen_set_pmd_at(mm, addr, pmdp, pmd)
 
 #define set_pmd(pmdp, pmd)		xen_set_pmd(pmdp, pmd)
 
@@ -53,6 +54,8 @@ extern struct mm_struct *pgd_page_get_mm
 
 #define pte_update(mm, addr, ptep)              do { } while (0)
 #define pte_update_defer(mm, addr, ptep)        do { } while (0)
+#define pmd_update(mm, addr, ptep)              do { } while (0)
+#define pmd_update_defer(mm, addr, ptep)        do { } while (0)
 
 #define pgd_val(x)	xen_pgd_val(x)
 #define __pgd(x)	xen_make_pgd(x)
@@ -86,6 +89,11 @@ static inline int pte_young(pte_t pte)
 	return pte_flags(pte) & _PAGE_ACCESSED;
 }
 
+static inline int pmd_young(pmd_t pmd)
+{
+	return pmd_flags(pmd) & _PAGE_ACCESSED;
+}
+
 static inline int pte_write(pte_t pte)
 {
 	return pte_flags(pte) & _PAGE_RW;
@@ -136,6 +144,23 @@ static inline int pmd_large(pmd_t pte)
 		(_PAGE_PSE | _PAGE_PRESENT);
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+	return pmd_val(pmd) & _PAGE_SPLITTING;
+}
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	return pmd_val(pmd) & _PAGE_PSE;
+}
+
+static inline int has_transparent_hugepage(void)
+{
+	return cpu_has_pse;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
 static inline pte_t pte_set_flags(pte_t pte, pteval_t set)
 {
 	pteval_t v = __pte_val(pte);
@@ -210,6 +235,57 @@ static inline pte_t pte_mkspecial(pte_t 
 	return pte_set_flags(pte, _PAGE_SPECIAL);
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
+{
+	pmdval_t v = native_pmd_val(pmd);
+
+	return __pmd(v | set);
+}
+
+static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
+{
+	pmdval_t v = native_pmd_val(pmd);
+
+	return __pmd(v & ~clear);
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+	return pmd_clear_flags(pmd, _PAGE_ACCESSED);
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+	return pmd_clear_flags(pmd, _PAGE_RW);
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+	return pmd_set_flags(pmd, _PAGE_DIRTY);
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+	return pmd_set_flags(pmd, _PAGE_PSE);
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+	return pmd_set_flags(pmd, _PAGE_ACCESSED);
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+	return pmd_set_flags(pmd, _PAGE_RW);
+}
+
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+	return pmd_clear_flags(pmd, _PAGE_PRESENT);
+}
+#endif
+
 /*
  * Mask out unsupported bits in a present pgprot.  Non-present pgprots
  * can use those bits for other purposes, so leave them be.
@@ -250,6 +326,18 @@ static inline pte_t pte_modify(pte_t pte
 	return __pte(val);
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	pmdval_t val = pmd_val(pmd);
+
+	val &= _HPAGE_CHG_MASK;
+	val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+
+	return __pmd(val);
+}
+#endif
+
 /* mprotect needs to preserve PAT bits when updating vm_page_prot */
 #define pgprot_modify pgprot_modify
 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
@@ -350,7 +438,7 @@ static inline unsigned long pmd_page_vad
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pmd_page(pmd)	pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
+#define pmd_page(pmd)	pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT)
 
 /*
  * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
@@ -526,6 +614,14 @@ static inline pte_t xen_local_ptep_get_a
 	return res;
 }
 
+static inline pmd_t xen_local_pmdp_get_and_clear(pmd_t *pmdp)
+{
+	pmd_t res = *pmdp;
+
+	xen_pmd_clear(pmdp);
+	return res;
+}
+
 static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 				  pte_t *ptep , pte_t pte)
 {
@@ -534,6 +630,12 @@ static inline void xen_set_pte_at(struct
 		xen_set_pte(ptep, pte);
 }
 
+static inline void xen_set_pmd_at(struct mm_struct *mm, unsigned long addr,
+				  pmd_t *pmdp , pmd_t pmd)
+{
+	xen_set_pmd(pmdp, pmd);
+}
+
 static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr,
 				 pte_t *ptep)
 {
@@ -638,6 +740,53 @@ static inline void ptep_set_wrprotect(st
 
 #define flush_tlb_fix_spurious_fault(vma, address)
 
+#define mk_pmd(page, pgprot)   pfn_pmd(page_to_pfn(page), (pgprot))
+
+#define  __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+extern int pmdp_set_access_flags(struct vm_area_struct *vma,
+				 unsigned long address, pmd_t *pmdp,
+				 pmd_t entry, int dirty);
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+				     unsigned long addr, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
+				  unsigned long address, pmd_t *pmdp);
+
+
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+extern void pmdp_splitting_flush(struct vm_area_struct *vma,
+				 unsigned long addr, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMD_WRITE
+static inline int pmd_write(pmd_t pmd)
+{
+	return pmd_flags(pmd) & _PAGE_RW;
+}
+
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, unsigned long addr,
+				       pmd_t *pmdp)
+{
+	pmd_t pmd = xen_pmdp_get_and_clear(pmdp);
+	pmd_update(mm, addr, pmdp);
+	return pmd;
+}
+#endif
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+				      unsigned long addr, pmd_t *pmdp)
+{
+	clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
+	pmd_update(mm, addr, pmdp);
+}
+#endif
+
 /*
  * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
  *
--- head.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h	2011-03-23 10:02:02.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable-3level.h	2011-03-23 10:10:03.000000000 +0100
@@ -101,6 +101,31 @@ static inline pte_t xen_ptep_get_and_cle
 #define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \
 			 ((_pte).pte_high << (32-PAGE_SHIFT)))
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifdef CONFIG_SMP
+union split_pmd {
+	struct {
+		u32 pmd_low;
+		u32 pmd_high;
+	};
+	pmd_t pmd;
+};
+static inline pmd_t xen_pmdp_get_and_clear(pmd_t *pmdp)
+{
+	union split_pmd res, *orig = (union split_pmd *)pmdp;
+
+	/* xchg acts as a barrier before setting of the high bits */
+	res.pmd_low = xchg(&orig->pmd_low, 0);
+	res.pmd_high = orig->pmd_high;
+	orig->pmd_high = 0;
+
+	return res.pmd;
+}
+#else
+#define xen_pmdp_get_and_clear(xp) xen_local_pmdp_get_and_clear(xp)
+#endif
+#endif
+
 /*
  * Bits 0, 6 and 7 are taken in the low part of the pte,
  * put the 32 bits of offset into the high part.
--- head.orig/arch/x86/include/mach-xen/asm/pgtable_64.h	2011-03-23 10:02:27.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable_64.h	2011-03-23 10:10:05.000000000 +0100
@@ -60,6 +60,16 @@ static inline void xen_set_pte(pte_t *pt
 	*ptep = pte;
 }
 
+static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+	xen_l2_entry_update(pmdp, pmd);
+}
+
+static inline void xen_pmd_clear(pmd_t *pmd)
+{
+	xen_set_pmd(pmd, xen_make_pmd(0));
+}
+
 #ifdef CONFIG_SMP
 static inline pte_t xen_ptep_get_and_clear(pte_t *xp, pte_t ret)
 {
@@ -69,15 +79,16 @@ static inline pte_t xen_ptep_get_and_cle
 #define xen_ptep_get_and_clear(xp, pte) xen_local_ptep_get_and_clear(xp, pte)
 #endif
 
-static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd)
-{
-	xen_l2_entry_update(pmdp, pmd);
-}
-
-static inline void xen_pmd_clear(pmd_t *pmd)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifdef CONFIG_SMP
+static inline pmd_t xen_pmdp_get_and_clear(pmd_t *xp)
 {
-	xen_set_pmd(pmd, xen_make_pmd(0));
+	return xen_make_pmd(xchg(&xp->pmd, 0));
 }
+#else
+#define xen_pmdp_get_and_clear(xp) xen_local_pmdp_get_and_clear(xp)
+#endif
+#endif
 
 static inline void xen_set_pud(pud_t *pudp, pud_t pud)
 {
@@ -170,6 +181,7 @@ extern void cleanup_highmap(void);
 #define	kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK)
 
 #define __HAVE_ARCH_PTE_SAME
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_PGTABLE_64_H */
--- head.orig/arch/x86/include/mach-xen/asm/pgtable_types.h	2011-02-01 14:54:13.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable_types.h	2011-02-01 15:41:35.000000000 +0100
@@ -22,6 +22,7 @@
 #define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
 #define _PAGE_BIT_SPECIAL	_PAGE_BIT_UNUSED1
 #define _PAGE_BIT_CPA_TEST	_PAGE_BIT_UNUSED1
+#define _PAGE_BIT_SPLITTING	_PAGE_BIT_UNUSED1 /* only valid on a PSE pmd */
 #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
 
 /* If _PAGE_BIT_PRESENT is clear, we use these: */
@@ -45,6 +46,7 @@
 #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
 #define _PAGE_SPECIAL	(_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
 #define _PAGE_CPA_TEST	(_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
+#define _PAGE_SPLITTING	(_AT(pteval_t, 1) << _PAGE_BIT_SPLITTING)
 #define __HAVE_ARCH_PTE_SPECIAL
 
 #ifdef CONFIG_KMEMCHECK
@@ -78,6 +80,7 @@ extern unsigned int __kernel_page_user;
 /* Set of bits not changed in pte_modify */
 #define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_CACHE_MASK | _PAGE_IOMAP | \
 			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
 
 /*
  * PAT settings are part of the hypervisor interface, which sets the
--- head.orig/arch/x86/include/mach-xen/asm/processor.h	2011-03-03 16:47:48.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/processor.h	2011-03-03 16:47:59.000000000 +0100
@@ -152,10 +152,9 @@ extern __u32			cpu_caps_set[NCAPINTS];
 #ifdef CONFIG_SMP
 DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
 #define cpu_data(cpu)		per_cpu(cpu_info, cpu)
-#define current_cpu_data	__get_cpu_var(cpu_info)
 #else
+#define cpu_info		boot_cpu_data
 #define cpu_data(cpu)		boot_cpu_data
-#define current_cpu_data	boot_cpu_data
 #endif
 
 extern const struct seq_operations cpuinfo_op;
@@ -716,10 +715,11 @@ extern void select_idle_routine(const st
 extern void init_c1e_mask(void);
 
 extern unsigned long		boot_option_idle_override;
-extern unsigned long		idle_halt;
-extern unsigned long		idle_nomwait;
 extern bool			c1e_detected;
 
+enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
+			 IDLE_POLL, IDLE_FORCE_MWAIT};
+
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);
 
@@ -856,7 +856,7 @@ extern unsigned long thread_saved_pc(str
 /*
  * The below -8 is to reserve 8 bytes on top of the ring0 stack.
  * This is necessary to guarantee that the entire "struct pt_regs"
- * is accessable even if the CPU haven't stored the SS/ESP registers
+ * is accessible even if the CPU haven't stored the SS/ESP registers
  * on the stack (interrupt gate does not save these registers
  * when switching to the same priv ring).
  * Therefore beware: accessing the ss/esp fields of the
--- head.orig/arch/x86/include/mach-xen/asm/smp.h	2011-03-03 16:12:15.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/smp.h	2011-03-03 16:12:54.000000000 +0100
@@ -47,10 +47,7 @@ DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_
 #ifndef CONFIG_XEN
 
 /* Static state in head.S used to set up a CPU */
-extern struct {
-	void *sp;
-	unsigned short ss;
-} stack_start;
+extern unsigned long stack_start; /* Initial stack pointer address */
 
 struct smp_ops {
 	void (*smp_prepare_boot_cpu)(void);
--- head.orig/arch/x86/kernel/acpi/boot.c	2012-05-08 10:51:48.000000000 +0200
+++ head/arch/x86/kernel/acpi/boot.c	2011-04-13 14:08:57.000000000 +0200
@@ -73,10 +73,11 @@ int acpi_sci_override_gsi __initdata;
 #ifndef CONFIG_XEN
 int acpi_skip_timer_override __initdata;
 int acpi_use_timer_override __initdata;
+int acpi_fix_pin2_polarity __initdata;
 #else
 #define acpi_skip_timer_override 0
+#define acpi_fix_pin2_polarity 0
 #endif
-int acpi_fix_pin2_polarity __initdata;
 
 #ifdef CONFIG_X86_LOCAL_APIC
 static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
--- head.orig/arch/x86/kernel/apic/Makefile	2012-02-08 12:23:16.000000000 +0100
+++ head/arch/x86/kernel/apic/Makefile	2012-02-08 12:51:05.000000000 +0100
@@ -26,8 +26,6 @@ obj-$(CONFIG_X86_ES7000)	+= es7000_32.o
 # For 32bit, probe_32 need to be listed last
 obj-$(CONFIG_X86_LOCAL_APIC)	+= probe_$(BITS).o
 
-obj-$(CONFIG_XEN)		+= nmi.o
-
 probe_64-$(CONFIG_XEN)		:= probe_32.o
 
 disabled-obj-$(CONFIG_XEN)	:= apic_%.o
--- head.orig/arch/x86/kernel/apic/io_apic-xen.c	2011-02-01 15:09:47.000000000 +0100
+++ head/arch/x86/kernel/apic/io_apic-xen.c	2011-02-17 10:30:00.000000000 +0100
@@ -50,7 +50,6 @@
 #include <asm/dma.h>
 #include <asm/timer.h>
 #include <asm/i8259.h>
-#include <asm/nmi.h>
 #include <asm/setup.h>
 #include <asm/hw_irq.h>
 
@@ -138,6 +137,26 @@ static int __init parse_noapic(char *str
 }
 early_param("noapic", parse_noapic);
 
+/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
+void mp_save_irq(struct mpc_intsrc *m)
+{
+	int i;
+
+	apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
+		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
+		m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
+		m->srcbusirq, m->dstapic, m->dstirq);
+
+	for (i = 0; i < mp_irq_entries; i++) {
+		if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
+			return;
+	}
+
+	memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m));
+	if (++mp_irq_entries == MAX_IRQ_SOURCES)
+		panic("Max # of irq sources exceeded!!\n");
+}
+
 #ifndef CONFIG_XEN
 struct irq_pin_list {
 	int apic, pin;
@@ -149,6 +168,7 @@ static struct irq_pin_list *alloc_irq_pi
 	return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
 }
 
+
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 #ifdef CONFIG_SPARSE_IRQ
 static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
@@ -2014,8 +2034,7 @@ void disable_IO_APIC(void)
  *
  * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
  */
-
-void __init setup_ioapic_ids_from_mpc(void)
+void __init setup_ioapic_ids_from_mpc_nocheck(void)
 {
 	union IO_APIC_reg_00 reg_00;
 	physid_mask_t phys_id_present_map;
@@ -2024,15 +2043,6 @@ void __init setup_ioapic_ids_from_mpc(vo
 	unsigned char old_id;
 	unsigned long flags;
 
-	if (acpi_ioapic)
-		return;
-	/*
-	 * Don't check I/O APIC IDs for xAPIC systems.  They have
-	 * no meaning without the serial APIC bus.
-	 */
-	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-		return;
 	/*
 	 * This is broken; anything with a real cpu count has to
 	 * circumvent this idiocy regardless.
@@ -2086,7 +2096,6 @@ void __init setup_ioapic_ids_from_mpc(vo
 			physids_or(phys_id_present_map, phys_id_present_map, tmp);
 		}
 
-
 		/*
 		 * We need to adjust the IRQ routing table
 		 * if the ID changed.
@@ -2098,9 +2107,12 @@ void __init setup_ioapic_ids_from_mpc(vo
 						= mp_ioapics[apic_id].apicid;
 
 		/*
-		 * Read the right value from the MPC table and
-		 * write it into the ID register.
+		 * Update the ID register according to the right value
+		 * from the MPC table if they are different.
 		 */
+		if (mp_ioapics[apic_id].apicid == reg_00.bits.ID)
+			continue;
+
 		apic_printk(APIC_VERBOSE, KERN_INFO
 			"...changing IO-APIC physical APIC ID to %d ...",
 			mp_ioapics[apic_id].apicid);
@@ -2122,6 +2134,21 @@ void __init setup_ioapic_ids_from_mpc(vo
 			apic_printk(APIC_VERBOSE, " ok.\n");
 	}
 }
+
+void __init setup_ioapic_ids_from_mpc(void)
+{
+
+	if (acpi_ioapic)
+		return;
+	/*
+	 * Don't check I/O APIC IDs for xAPIC systems.  They have
+	 * no meaning without the serial APIC bus.
+	 */
+	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		return;
+	setup_ioapic_ids_from_mpc_nocheck();
+}
 #endif
 
 int no_timer_check __initdata;
@@ -2382,7 +2409,7 @@ asmlinkage void smp_irq_move_cleanup_int
 		unsigned int irr;
 		struct irq_desc *desc;
 		struct irq_cfg *cfg;
-		irq = __get_cpu_var(vector_irq)[vector];
+		irq = __this_cpu_read(vector_irq[vector]);
 
 		if (irq == -1)
 			continue;
@@ -2416,7 +2443,7 @@ asmlinkage void smp_irq_move_cleanup_int
 			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
 			goto unlock;
 		}
-		__get_cpu_var(vector_irq)[vector] = -1;
+		__this_cpu_write(vector_irq[vector], -1);
 unlock:
 		raw_spin_unlock(&desc->lock);
 	}
@@ -2728,24 +2755,6 @@ static void lapic_register_intr(int irq)
 				      "edge");
 }
 
-static void __init setup_nmi(void)
-{
-	/*
-	 * Dirty trick to enable the NMI watchdog ...
-	 * We put the 8259A master into AEOI mode and
-	 * unmask on all local APICs LVT0 as NMI.
-	 *
-	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
-	 * is from Maciej W. Rozycki - so we do not have to EOI from
-	 * the NMI handler or the timer interrupt.
-	 */
-	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
-	enable_NMI_through_LVT0();
-
-	apic_printk(APIC_VERBOSE, " done.\n");
-}
-
 /*
  * This looks a bit hackish but it's about the only one way of sending
  * a few INTA cycles to 8259As and any associated glue logic.  ICR does
@@ -2851,15 +2860,6 @@ static inline void __init check_timer(vo
 	 */
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	legacy_pic->init(1);
-#ifdef CONFIG_X86_32
-	{
-		unsigned int ver;
-
-		ver = apic_read(APIC_LVR);
-		ver = GET_APIC_VERSION(ver);
-		timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
-	}
-#endif
 
 	pin1  = find_isa_irq_pin(0, mp_INT);
 	apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2907,10 +2907,6 @@ static inline void __init check_timer(vo
 				unmask_ioapic(cfg);
 		}
 		if (timer_irq_works()) {
-			if (nmi_watchdog == NMI_IO_APIC) {
-				setup_nmi();
-				legacy_pic->unmask(0);
-			}
 			if (disable_timer_pin_1 > 0)
 				clear_IO_APIC_pin(0, pin1);
 			goto out;
@@ -2936,11 +2932,6 @@ static inline void __init check_timer(vo
 		if (timer_irq_works()) {
 			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
 			timer_through_8259 = 1;
-			if (nmi_watchdog == NMI_IO_APIC) {
-				legacy_pic->mask(0);
-				setup_nmi();
-				legacy_pic->unmask(0);
-			}
 			goto out;
 		}
 		/*
@@ -2952,15 +2943,6 @@ static inline void __init check_timer(vo
 		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
 	}
 
-	if (nmi_watchdog == NMI_IO_APIC) {
-		apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
-			    "through the IO-APIC - disabling NMI Watchdog!\n");
-		nmi_watchdog = NMI_NONE;
-	}
-#ifdef CONFIG_X86_32
-	timer_ack = 0;
-#endif
-
 	apic_printk(APIC_QUIET, KERN_INFO
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
 
@@ -3741,7 +3723,7 @@ int __init io_apic_get_redir_entries (in
 }
 
 #ifndef CONFIG_XEN
-void __init probe_nr_irqs_gsi(void)
+static void __init probe_nr_irqs_gsi(void)
 {
 	int nr;
 
@@ -4069,7 +4051,7 @@ static struct resource * __init ioapic_s
 	return res;
 }
 
-void __init ioapic_init_mappings(void)
+void __init ioapic_and_gsi_init(void)
 {
 	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
 	struct resource *ioapic_res;
@@ -4107,6 +4089,8 @@ fake_ioapic_page:
 		ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
 		ioapic_res++;
 	}
+
+	probe_nr_irqs_gsi();
 }
 
 void __init ioapic_insert_resources(void)
@@ -4132,6 +4116,9 @@ int mp_find_ioapic(u32 gsi)
 {
 	int i = 0;
 
+	if (nr_ioapics == 0)
+		return -1;
+
 	/* Find the IOAPIC that manages this GSI. */
 	for (i = 0; i < nr_ioapics; i++) {
 		if ((gsi >= mp_gsi_routing[i].gsi_base)
@@ -4220,7 +4207,8 @@ void __init pre_init_apic_IRQ0(void)
 
 	printk(KERN_INFO "Early APIC setup for system timer0\n");
 #ifndef CONFIG_SMP
-	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+	physid_set_mask_of_physid(boot_cpu_physical_apicid,
+					 &phys_cpu_present_map);
 #endif
 	/* Make sure the irq descriptor is set up */
 	cfg = alloc_irq_and_cfg_at(0, 0);
--- head.orig/arch/x86/kernel/apic/ipi-xen.c	2011-03-18 11:29:10.000000000 +0100
+++ head/arch/x86/kernel/apic/ipi-xen.c	2012-02-09 14:33:23.000000000 +0100
@@ -14,12 +14,11 @@ static inline void __send_IPI_one(unsign
 	int irq = per_cpu(ipi_to_irq, cpu)[vector];
 
 	if (vector == NMI_VECTOR) {
-		static int __read_mostly printed;
 		int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL);
 
-		if (rc && !printed)
-			pr_warning("Unable (%d) to send NMI to CPU#%u\n",
-				   printed = rc, cpu);
+		if (rc)
+			pr_warn_once("Unable (%d) to send NMI to CPU#%u\n",
+				     rc, cpu);
 		return;
 	}
 	BUG_ON(irq < 0);
--- head.orig/arch/x86/kernel/cpu/common-xen.c	2012-04-20 15:14:29.000000000 +0200
+++ head/arch/x86/kernel/cpu/common-xen.c	2011-05-18 10:47:16.000000000 +0200
@@ -935,7 +935,6 @@ void __init identify_boot_cpu(void)
 #else
 	vgetcpu_set_mode();
 #endif
-	init_hw_perf_events();
 }
 
 #ifdef CONFIG_XEN
--- head.orig/arch/x86/kernel/cpu/intel_cacheinfo.c	2011-11-17 11:47:29.000000000 +0100
+++ head/arch/x86/kernel/cpu/intel_cacheinfo.c	2012-03-12 13:53:27.000000000 +0100
@@ -987,7 +987,7 @@ static struct attribute *default_attrs[]
 	NULL
 };
 
-#ifdef CONFIG_AMD_NB
+#if defined(CONFIG_AMD_NB) && !defined(CONFIG_XEN)
 static struct attribute ** __cpuinit amd_l3_attrs(void)
 {
 	static struct attribute **attrs;
@@ -1133,7 +1133,7 @@ static int __cpuinit cache_add_dev(struc
 		this_leaf = CPUID4_INFO_IDX(cpu, i);
 
 		ktype_cache.default_attrs = default_attrs;
-#ifdef CONFIG_AMD_NB
+#if defined(CONFIG_AMD_NB) && !defined(CONFIG_XEN)
 		if (this_leaf->base.nb)
 			ktype_cache.default_attrs = amd_l3_attrs();
 #endif
--- head.orig/arch/x86/kernel/e820-xen.c	2011-09-23 15:58:51.000000000 +0200
+++ head/arch/x86/kernel/e820-xen.c	2011-04-26 09:19:42.000000000 +0200
@@ -14,6 +14,7 @@
 #include <linux/bootmem.h>
 #include <linux/pfn.h>
 #include <linux/suspend.h>
+#include <linux/acpi.h>
 #include <linux/firmware-map.h>
 #include <linux/memblock.h>
 
--- head.orig/arch/x86/kernel/early_printk-xen.c	2011-02-01 15:09:47.000000000 +0100
+++ head/arch/x86/kernel/early_printk-xen.c	2011-02-01 15:41:35.000000000 +0100
@@ -272,7 +272,7 @@ static int __init setup_early_printk(cha
 		if (!strncmp(buf, "xen", 3))
 			early_console_register(&xenboot_console, keep);
 #endif
-#ifdef CONFIG_X86_MRST_EARLY_PRINTK
+#ifdef CONFIG_EARLY_PRINTK_MRST
 		if (!strncmp(buf, "mrst", 4)) {
 			mrst_early_console_init();
 			early_console_register(&early_mrst_console, keep);
@@ -282,7 +282,6 @@ static int __init setup_early_printk(cha
 			hsu_early_console_init();
 			early_console_register(&early_hsu_console, keep);
 		}
-
 #endif
 		buf++;
 	}
--- head.orig/arch/x86/kernel/entry_32-xen.S	2012-02-29 14:18:40.000000000 +0100
+++ head/arch/x86/kernel/entry_32-xen.S	2012-02-29 14:18:49.000000000 +0100
@@ -1682,6 +1682,16 @@ ENTRY(general_protection)
 	CFI_ENDPROC
 END(general_protection)
 
+#ifdef CONFIG_KVM_GUEST
+ENTRY(async_page_fault)
+	RING0_EC_FRAME
+	pushl $do_async_page_fault
+	CFI_ADJUST_CFA_OFFSET 4
+	jmp error_code
+	CFI_ENDPROC
+END(apf_page_fault)
+#endif
+
 /*
  * End of kprobes section
  */
--- head.orig/arch/x86/kernel/entry_64-xen.S	2011-10-07 11:40:25.000000000 +0200
+++ head/arch/x86/kernel/entry_64-xen.S	2011-10-07 11:41:33.000000000 +0200
@@ -333,17 +333,21 @@ NMI_MASK = 0x80000000
 ENTRY(save_args)
 	XCPT_FRAME
 	cld
-	movq_cfi rdi, RDI+16-ARGOFFSET
-	movq_cfi rsi, RSI+16-ARGOFFSET
-	movq_cfi rdx, RDX+16-ARGOFFSET
-	movq_cfi rcx, RCX+16-ARGOFFSET
-	movq_cfi rax, RAX+16-ARGOFFSET
-	movq_cfi  r8,  R8+16-ARGOFFSET
-	movq_cfi  r9,  R9+16-ARGOFFSET
-	movq_cfi r10, R10+16-ARGOFFSET
-	movq_cfi r11, R11+16-ARGOFFSET
+	/*
+	 * start from rbp in pt_regs and jump over
+	 * return address.
+	 */
+	movq_cfi rdi, RDI+8-RBP
+	movq_cfi rsi, RSI+8-RBP
+	movq_cfi rdx, RDX+8-RBP
+	movq_cfi rcx, RCX+8-RBP
+	movq_cfi rax, RAX+8-RBP
+	movq_cfi  r8,  R8+8-RBP
+	movq_cfi  r9,  R9+8-RBP
+	movq_cfi r10, R10+8-RBP
+	movq_cfi r11, R11+8-RBP
 
-	leaq -ARGOFFSET+16(%rsp),%rdi	/* arg1 for handler */
+	leaq -RBP+8(%rsp),%rdi	/* arg1 for handler */
 	movq_cfi rbp, 8		/* push %rbp */
 	leaq 8(%rsp), %rbp		/* mov %rsp, %ebp */
 	testl $3, CS(%rdi)
@@ -1145,6 +1149,9 @@ paranoidzeroentry_ist int3 do_int3 DEBUG
 paranoiderrorentry stack_segment do_stack_segment
 errorentry general_protection do_general_protection
 errorentry page_fault do_page_fault
+#ifdef CONFIG_KVM_GUEST
+errorentry async_page_fault do_async_page_fault
+#endif
 #ifdef CONFIG_X86_MCE
 paranoidzeroentry machine_check *machine_check_vector(%rip)
 #endif
--- head.orig/arch/x86/kernel/head32-xen.c	2011-05-09 11:42:39.000000000 +0200
+++ head/arch/x86/kernel/head32-xen.c	2011-05-09 11:42:46.000000000 +0200
@@ -79,6 +79,9 @@ void __init i386_start_kernel(void)
 	case X86_SUBARCH_MRST:
 		x86_mrst_early_setup();
 		break;
+	case X86_SUBARCH_CE4100:
+		x86_ce4100_early_setup();
+		break;
 	default:
 		i386_default_early_setup();
 		break;
--- head.orig/arch/x86/kernel/irq-xen.c	2011-02-01 15:09:47.000000000 +0100
+++ head/arch/x86/kernel/irq-xen.c	2011-02-18 15:17:23.000000000 +0100
@@ -4,6 +4,7 @@
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
+#include <linux/of.h>
 #include <linux/seq_file.h>
 #include <linux/smp.h>
 #include <linux/ftrace.h>
@@ -254,7 +255,7 @@ unsigned int __irq_entry do_IRQ(struct p
 	exit_idle();
 	irq_enter();
 
-	irq = __get_cpu_var(vector_irq)[vector];
+	irq = __this_cpu_read(vector_irq[vector]);
 
 	if (!handle_irq(irq, regs)) {
 		ack_APIC_irq();
@@ -294,6 +295,15 @@ void smp_x86_platform_ipi(struct pt_regs
 }
 #endif
 
+#ifdef CONFIG_OF
+unsigned int irq_create_of_mapping(struct device_node *controller,
+		const u32 *intspec, unsigned int intsize)
+{
+	return intspec[0];
+}
+EXPORT_SYMBOL_GPL(irq_create_of_mapping);
+#endif
+
 #ifdef CONFIG_HOTPLUG_CPU
 #include <xen/evtchn.h>
 /* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
@@ -369,7 +379,8 @@ void fixup_irqs(void)
 			continue;
 
 		if (xen_test_irq_pending(irq)) {
-			data = irq_get_irq_data(irq);
+			desc = irq_to_desc(irq);
+			data = &desc->irq_data;
 			raw_spin_lock(&desc->lock);
 			if (data->chip->irq_retrigger)
 				data->chip->irq_retrigger(data);
--- head.orig/arch/x86/kernel/mpparse-xen.c	2011-02-01 15:09:47.000000000 +0100
+++ head/arch/x86/kernel/mpparse-xen.c	2011-02-01 16:09:24.000000000 +0100
@@ -130,21 +130,8 @@ static void __init MP_bus_info(struct mp
 
 static void __init MP_ioapic_info(struct mpc_ioapic *m)
 {
-	if (!(m->flags & MPC_APIC_USABLE))
-		return;
-
-	printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
-	       m->apicid, m->apicver, m->apicaddr);
-
-	mp_register_ioapic(m->apicid, m->apicaddr, gsi_top);
-}
-
-static void print_MP_intsrc_info(struct mpc_intsrc *m)
-{
-	apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
-		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
-		m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
-		m->srcbusirq, m->dstapic, m->dstirq);
+	if (m->flags & MPC_APIC_USABLE)
+		mp_register_ioapic(m->apicid, m->apicaddr, gsi_top);
 }
 
 static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
@@ -156,73 +143,11 @@ static void __init print_mp_irq_info(str
 		mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
 }
 
-static void __init assign_to_mp_irq(struct mpc_intsrc *m,
-				    struct mpc_intsrc *mp_irq)
-{
-	mp_irq->dstapic = m->dstapic;
-	mp_irq->type = m->type;
-	mp_irq->irqtype = m->irqtype;
-	mp_irq->irqflag = m->irqflag;
-	mp_irq->srcbus = m->srcbus;
-	mp_irq->srcbusirq = m->srcbusirq;
-	mp_irq->dstirq = m->dstirq;
-}
-
-static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
-					struct mpc_intsrc *m)
-{
-	m->dstapic = mp_irq->dstapic;
-	m->type = mp_irq->type;
-	m->irqtype = mp_irq->irqtype;
-	m->irqflag = mp_irq->irqflag;
-	m->srcbus = mp_irq->srcbus;
-	m->srcbusirq = mp_irq->srcbusirq;
-	m->dstirq = mp_irq->dstirq;
-}
-
-static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
-					struct mpc_intsrc *m)
-{
-	if (mp_irq->dstapic != m->dstapic)
-		return 1;
-	if (mp_irq->type != m->type)
-		return 2;
-	if (mp_irq->irqtype != m->irqtype)
-		return 3;
-	if (mp_irq->irqflag != m->irqflag)
-		return 4;
-	if (mp_irq->srcbus != m->srcbus)
-		return 5;
-	if (mp_irq->srcbusirq != m->srcbusirq)
-		return 6;
-	if (mp_irq->dstirq != m->dstirq)
-		return 7;
-
-	return 0;
-}
-
-static void __init MP_intsrc_info(struct mpc_intsrc *m)
-{
-	int i;
-
-	print_MP_intsrc_info(m);
-
-	for (i = 0; i < mp_irq_entries; i++) {
-		if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
-			return;
-	}
-
-	assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
-	if (++mp_irq_entries == MAX_IRQ_SOURCES)
-		panic("Max # of irq sources exceeded!!\n");
-}
 #else /* CONFIG_X86_IO_APIC */
 static inline void __init MP_bus_info(struct mpc_bus *m) {}
 static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {}
-static inline void __init MP_intsrc_info(struct mpc_intsrc *m) {}
 #endif /* CONFIG_X86_IO_APIC */
 
-
 static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
 {
 	apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
@@ -234,7 +159,6 @@ static void __init MP_lintsrc_info(struc
 /*
  * Read/parse the MPC
  */
-
 static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
 {
 
@@ -289,20 +213,6 @@ static void __init smp_dump_mptable(stru
 
 void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { }
 
-static void __init smp_register_lapic_address(unsigned long address)
-{
-#ifndef CONFIG_XEN
-	mp_lapic_addr = address;
-
-	set_fixmap_nocache(FIX_APIC_BASE, address);
-	if (boot_cpu_physical_apicid == -1U) {
-		boot_cpu_physical_apicid  = read_apic_id();
-		apic_version[boot_cpu_physical_apicid] =
-			 GET_APIC_VERSION(apic_read(APIC_LVR));
-	}
-#endif
-}
-
 static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 {
 	char str[16];
@@ -318,18 +228,14 @@ static int __init smp_read_mpc(struct mp
 #ifdef CONFIG_X86_32
 	generic_mps_oem_check(mpc, oem, str);
 #endif
-	/* save the local APIC address, it might be non-default */
+	/* Initialize the lapic mapping */
 	if (!acpi_lapic)
-		mp_lapic_addr = mpc->lapic;
+		register_lapic_address(mpc->lapic);
 #endif
 
 	if (early)
 		return 1;
 
-	/* Initialize the lapic mapping */
-	if (!acpi_lapic)
-		smp_register_lapic_address(mpc->lapic);
-
 	if (mpc->oemptr)
 		x86_init.mpparse.smp_read_mpc_oem(mpc);
 
@@ -355,7 +261,7 @@ static int __init smp_read_mpc(struct mp
 			skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
 			break;
 		case MP_INTSRC:
-			MP_intsrc_info((struct mpc_intsrc *)mpt);
+			mp_save_irq((struct mpc_intsrc *)mpt);
 			skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
 			break;
 		case MP_LINTSRC:
@@ -447,13 +353,13 @@ static void __init construct_default_ioi
 
 		intsrc.srcbusirq = i;
 		intsrc.dstirq = i ? i : 2;	/* IRQ0 to INTIN2 */
-		MP_intsrc_info(&intsrc);
+		mp_save_irq(&intsrc);
 	}
 
 	intsrc.irqtype = mp_ExtINT;
 	intsrc.srcbusirq = 0;
 	intsrc.dstirq = 0;	/* 8259A to INTIN0 */
-	MP_intsrc_info(&intsrc);
+	mp_save_irq(&intsrc);
 }
 
 
@@ -824,11 +730,11 @@ static void __init check_irq_src(struct 
 	int i;
 
 	apic_printk(APIC_VERBOSE, "OLD ");
-	print_MP_intsrc_info(m);
+	print_mp_irq_info(m);
 
 	i = get_MP_intsrc_index(m);
 	if (i > 0) {
-		assign_to_mpc_intsrc(&mp_irqs[i], m);
+		memcpy(m, &mp_irqs[i], sizeof(*m));
 		apic_printk(APIC_VERBOSE, "NEW ");
 		print_mp_irq_info(&mp_irqs[i]);
 		return;
@@ -915,14 +821,14 @@ static int  __init replace_intsrc_all(st
 		if (nr_m_spare > 0) {
 			apic_printk(APIC_VERBOSE, "*NEW* found\n");
 			nr_m_spare--;
-			assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
+			memcpy(m_spare[nr_m_spare], &mp_irqs[i], sizeof(mp_irqs[i]));
 			m_spare[nr_m_spare] = NULL;
 		} else {
 			struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
 			count += sizeof(struct mpc_intsrc);
 			if (check_slot(mpc_new_phys, mpc_new_length, count) < 0)
 				goto out;
-			assign_to_mpc_intsrc(&mp_irqs[i], m);
+			memcpy(m, &mp_irqs[i], sizeof(*m));
 			mpc->length = count;
 			mpt += sizeof(struct mpc_intsrc);
 		}
--- head.orig/arch/x86/kernel/process-xen.c	2011-03-03 16:11:01.000000000 +0100
+++ head/arch/x86/kernel/process-xen.c	2011-03-03 16:13:18.000000000 +0100
@@ -14,6 +14,7 @@
 #include <linux/utsname.h>
 #include <trace/events/power.h>
 #include <linux/hw_breakpoint.h>
+#include <asm/cpu.h>
 #include <asm/system.h>
 #include <asm/apic.h>
 #include <asm/syscalls.h>
@@ -23,11 +24,6 @@
 #include <asm/debugreg.h>
 #include <xen/evtchn.h>
 
-unsigned long idle_halt;
-EXPORT_SYMBOL(idle_halt);
-unsigned long idle_nomwait;
-EXPORT_SYMBOL(idle_nomwait);
-
 struct kmem_cache *task_xstate_cachep;
 EXPORT_SYMBOL_GPL(task_xstate_cachep);
 
@@ -93,27 +89,36 @@ void exit_thread(void)
 void show_regs(struct pt_regs *regs)
 {
 	show_registers(regs);
-	show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs),
-		   regs->bp);
+	show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
 }
 
 void show_regs_common(void)
 {
-	const char *board, *product;
+	const char *vendor, *product, *board;
 
-	board = dmi_get_system_info(DMI_BOARD_NAME);
-	if (!board)
-		board = "";
+	vendor = dmi_get_system_info(DMI_SYS_VENDOR);
+	if (!vendor)
+		vendor = "";
 	product = dmi_get_system_info(DMI_PRODUCT_NAME);
 	if (!product)
 		product = "";
 
+	/* Board Name is optional */
+	board = dmi_get_system_info(DMI_BOARD_NAME);
+
 	printk(KERN_CONT "\n");
-	printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n",
+	printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s",
 		current->pid, current->comm, print_tainted(),
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
-		init_utsname()->version, board, product);
+		init_utsname()->version);
+	printk(KERN_CONT " ");
+	printk(KERN_CONT "%s %s", vendor, product);
+	if (board) {
+		printk(KERN_CONT "/");
+		printk(KERN_CONT "%s", board);
+	}
+	printk(KERN_CONT "\n");
 }
 
 void flush_thread(void)
@@ -315,7 +320,7 @@ long sys_execve(const char __user *name,
 /*
  * Idle related variables and functions
  */
-unsigned long boot_option_idle_override = 0;
+unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
 
 /*
@@ -331,6 +336,7 @@ EXPORT_SYMBOL(pm_idle);
 void xen_idle(void)
 {
 	trace_power_start(POWER_CSTATE, 1, smp_processor_id());
+	trace_cpu_idle(1, smp_processor_id());
 	current_thread_info()->status &= ~TS_POLLING;
 	/*
 	 * TS_POLLING-cleared state must be visible before we
@@ -343,6 +349,8 @@ void xen_idle(void)
 	else
 		local_irq_enable();
 	current_thread_info()->status |= TS_POLLING;
+	trace_power_end(smp_processor_id());
+	trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
 }
 #ifdef CONFIG_APM_MODULE
 EXPORT_SYMBOL(default_idle);
@@ -396,9 +404,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
  */
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
-	trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
 	if (!need_resched()) {
-		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
+		if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -413,7 +420,8 @@ static void mwait_idle(void)
 {
 	if (!need_resched()) {
 		trace_power_start(POWER_CSTATE, 1, smp_processor_id());
-		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
+		trace_cpu_idle(1, smp_processor_id());
+		if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -422,6 +430,8 @@ static void mwait_idle(void)
 			__sti_mwait(0, 0);
 		else
 			local_irq_enable();
+		trace_power_end(smp_processor_id());
+		trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
 	} else
 		local_irq_enable();
 }
@@ -435,10 +445,12 @@ static void mwait_idle(void)
 static void poll_idle(void)
 {
 	trace_power_start(POWER_CSTATE, 0, smp_processor_id());
+	trace_cpu_idle(0, smp_processor_id());
 	local_irq_enable();
 	while (!need_resched())
 		cpu_relax();
-	trace_power_end(0);
+	trace_power_end(smp_processor_id());
+	trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
 }
 
 #ifndef CONFIG_XEN
@@ -454,17 +466,16 @@ static void poll_idle(void)
  *
  * idle=mwait overrides this decision and forces the usage of mwait.
  */
-static int __cpuinitdata force_mwait;
 
 #define MWAIT_INFO			0x05
 #define MWAIT_ECX_EXTENDED_INFO		0x01
 #define MWAIT_EDX_C1			0xf0
 
-static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
+int mwait_usable(const struct cpuinfo_x86 *c)
 {
 	u32 eax, ebx, ecx, edx;
 
-	if (force_mwait)
+	if (boot_option_idle_override == IDLE_FORCE_MWAIT)
 		return 1;
 
 	if (c->cpuid_level < MWAIT_INFO)
@@ -589,10 +600,11 @@ static int __init idle_setup(char *str)
 	if (!strcmp(str, "poll")) {
 		printk("using polling idle threads.\n");
 		pm_idle = poll_idle;
+		boot_option_idle_override = IDLE_POLL;
 #ifndef CONFIG_XEN
-	} else if (!strcmp(str, "mwait"))
-		force_mwait = 1;
-	else if (!strcmp(str, "halt")) {
+	} else if (!strcmp(str, "mwait")) {
+		boot_option_idle_override = IDLE_FORCE_MWAIT;
+	} else if (!strcmp(str, "halt")) {
 		/*
 		 * When the boot option of idle=halt is added, halt is
 		 * forced to be used for CPU idle. In such case CPU C2/C3
@@ -601,8 +613,7 @@ static int __init idle_setup(char *str)
 		 * the boot_option_idle_override.
 		 */
 		pm_idle = default_idle;
-		idle_halt = 1;
-		return 0;
+		boot_option_idle_override = IDLE_HALT;
 	} else if (!strcmp(str, "nomwait")) {
 		/*
 		 * If the boot option of "idle=nomwait" is added,
@@ -610,13 +621,11 @@ static int __init idle_setup(char *str)
 		 * states. In such case it won't touch the variable
 		 * of boot_option_idle_override.
 		 */
-		idle_nomwait = 1;
-		return 0;
+		boot_option_idle_override = IDLE_NOMWAIT;
 #endif
 	} else
 		return -1;
 
-	boot_option_idle_override = 1;
 	return 0;
 }
 early_param("idle", idle_setup);
--- head.orig/arch/x86/kernel/process_32-xen.c	2012-02-29 14:18:16.000000000 +0100
+++ head/arch/x86/kernel/process_32-xen.c	2012-02-29 14:18:55.000000000 +0100
@@ -59,8 +59,6 @@
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
 
-#include <trace/events/power.h>
-
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork");
 
@@ -116,8 +114,6 @@ void cpu_idle(void)
 			stop_critical_timings();
 			xen_idle();
 			start_critical_timings();
-
-			trace_power_end(smp_processor_id());
 		}
 		tick_nohz_restart_sched_tick();
 		preempt_enable_no_resched();
--- head.orig/arch/x86/kernel/process_64-xen.c	2011-02-02 08:47:56.000000000 +0100
+++ head/arch/x86/kernel/process_64-xen.c	2011-02-02 08:48:24.000000000 +0100
@@ -56,8 +56,6 @@
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
 
-#include <trace/events/power.h>
-
 asmlinkage extern void ret_from_fork(void);
 
 static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -145,8 +143,6 @@ void cpu_idle(void)
 			xen_idle();
 			start_critical_timings();
 
-			trace_power_end(smp_processor_id());
-
 			/* In many cases the interrupt that ended idle
 			   has already called exit_idle. But some idle
 			   loops can be woken up without interrupt. */
--- head.orig/arch/x86/kernel/setup-xen.c	2012-06-08 10:35:42.000000000 +0200
+++ head/arch/x86/kernel/setup-xen.c	2012-06-08 10:35:55.000000000 +0200
@@ -777,7 +777,7 @@ static u64 __init get_max_mapped(void)
 void __init setup_arch(char **cmdline_p)
 {
 	int acpi = 0;
-	int k8 = 0;
+	int amd = 0;
 	unsigned long flags;
 #ifdef CONFIG_XEN
 	unsigned int i;
@@ -1125,12 +1125,12 @@ void __init setup_arch(char **cmdline_p)
 	acpi = acpi_numa_init();
 #endif
 
-#ifdef CONFIG_K8_NUMA
+#ifdef CONFIG_AMD_NUMA
 	if (!acpi)
-		k8 = !k8_numa_init(0, max_pfn);
+		amd = !amd_numa_init(0, max_pfn);
 #endif
 
-	initmem_init(0, max_pfn, acpi, k8);
+	initmem_init(0, max_pfn, acpi, amd);
 	memblock_find_dma_reserve();
 	dma32_reserve_bootmem();
 
@@ -1255,10 +1255,7 @@ void __init setup_arch(char **cmdline_p)
 
 #ifndef CONFIG_XEN
 	init_apic_mappings();
-	ioapic_init_mappings();
-
-	/* need to wait for io_apic is mapped */
-	probe_nr_irqs_gsi();
+	ioapic_and_gsi_init();
 
 	kvm_guest_init();
 
--- head.orig/arch/x86/kernel/traps-xen.c	2011-02-01 15:09:47.000000000 +0100
+++ head/arch/x86/kernel/traps-xen.c	2011-07-21 12:21:48.000000000 +0200
@@ -87,6 +87,13 @@ EXPORT_SYMBOL_GPL(used_vectors);
 
 static int ignore_nmis;
 
+int unknown_nmi_panic;
+/*
+ * Prevent NMI reason port (0x61) being accessed simultaneously, can
+ * only be used in NMI handler.
+ */
+static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
+
 static inline void conditional_sti(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
@@ -304,16 +311,23 @@ gp_in_kernel:
 	die("general protection fault", regs, error_code);
 }
 
-static notrace __kprobes void
-mem_parity_error(unsigned char reason, struct pt_regs *regs)
+static int __init setup_unknown_nmi_panic(char *str)
 {
-	printk(KERN_EMERG
-		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
-			reason, smp_processor_id());
+	unknown_nmi_panic = 1;
+	return 1;
+}
+__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
 
-	printk(KERN_EMERG
-		"You have some hardware problem, likely on the PCI bus.\n");
+static notrace __kprobes void
+pci_serr_error(unsigned char reason, struct pt_regs *regs)
+{
+	pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
+		 reason, smp_processor_id());
 
+	/*
+	 * On some machines, PCI SERR line is used to report memory
+	 * errors. EDAC makes use of it.
+	 */
 #if defined(CONFIG_EDAC)
 	if (edac_handler_set()) {
 		edac_atomic_assert_error();
@@ -324,16 +338,18 @@ mem_parity_error(unsigned char reason, s
 	if (panic_on_unrecovered_nmi)
 		panic("NMI: Not continuing");
 
-	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+	pr_emerg("Dazed and confused, but trying to continue\n");
 
-	/* Clear and disable the memory parity error line. */
-	clear_mem_error(reason);
+	/* Clear and disable the PCI SERR error line. */
+	clear_serr_error(reason);
 }
 
 static notrace __kprobes void
 io_check_error(unsigned char reason, struct pt_regs *regs)
 {
-	printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
+	pr_emerg(
+	"NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
+		 reason, smp_processor_id());
 	show_registers(regs);
 
 	if (panic_on_io_nmi)
@@ -359,69 +375,50 @@ unknown_nmi_error(unsigned char reason, 
 		return;
 	}
 #endif
-	printk(KERN_EMERG
-		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
-			reason, smp_processor_id());
+	pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+		 reason, smp_processor_id());
 
-	printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
-	if (panic_on_unrecovered_nmi)
+	pr_emerg("Do you have a strange power saving mode enabled?\n");
+	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
 		panic("NMI: Not continuing");
 
-	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+	pr_emerg("Dazed and confused, but trying to continue\n");
 }
 
 static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
-	int cpu;
-
-	cpu = smp_processor_id();
-
-	/* Only the BSP gets external NMIs from the system. */
-	if (!cpu)
-		reason = get_nmi_reason();
-
-	if (!(reason & 0xc0)) {
-		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
-								== NOTIFY_STOP)
-			return;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-		if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
-							== NOTIFY_STOP)
-			return;
 
-#ifndef CONFIG_LOCKUP_DETECTOR
+	/*
+	 * CPU-specific NMI must be processed before non-CPU-specific
+	 * NMI, otherwise we may lose it, because the CPU-specific
+	 * NMI can not be detected/processed on other CPUs.
+	 */
+	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
+		return;
+
+	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
+	raw_spin_lock(&nmi_reason_lock);
+	reason = get_nmi_reason();
+
+	if (reason & NMI_REASON_MASK) {
+		if (reason & NMI_REASON_SERR)
+			pci_serr_error(reason, regs);
+		else if (reason & NMI_REASON_IOCHK)
+			io_check_error(reason, regs);
+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
 		/*
-		 * Ok, so this is none of the documented NMI sources,
-		 * so it must be the NMI watchdog.
+		 * Reassert NMI in case it became active
+		 * meanwhile as it's edge-triggered:
 		 */
-		if (nmi_watchdog_tick(regs, reason))
-			return;
-		if (!do_nmi_callback(regs, cpu))
-#endif /* !CONFIG_LOCKUP_DETECTOR */
-			unknown_nmi_error(reason, regs);
-#else
-		unknown_nmi_error(reason, regs);
+		reassert_nmi();
 #endif
-
+		raw_spin_unlock(&nmi_reason_lock);
 		return;
 	}
-	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
-		return;
+	raw_spin_unlock(&nmi_reason_lock);
 
-	/* AK: following checks seem to be broken on modern chipsets. FIXME */
-	if (reason & 0x80)
-		mem_parity_error(reason, regs);
-	if (reason & 0x40)
-		io_check_error(reason, regs);
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
-	/*
-	 * Reassert NMI in case it became active meanwhile
-	 * as it's edge-triggered:
-	 */
-	reassert_nmi();
-#endif
+	unknown_nmi_error(reason, regs);
 }
 
 dotraplinkage notrace __kprobes void
@@ -439,14 +436,12 @@ do_nmi(struct pt_regs *regs, long error_
 
 void stop_nmi(void)
 {
-	acpi_nmi_disable();
 	ignore_nmis++;
 }
 
 void restart_nmi(void)
 {
 	ignore_nmis--;
-	acpi_nmi_enable();
 }
 
 /* May run on IST stack. */
--- head.orig/arch/x86/mm/fault-xen.c	2011-08-15 11:05:47.000000000 +0200
+++ head/arch/x86/mm/fault-xen.c	2011-08-15 11:06:03.000000000 +0200
@@ -238,15 +238,14 @@ void vmalloc_sync_all(void)
 	for (address = VMALLOC_START & PMD_MASK;
 	     address >= TASK_SIZE && address < FIXADDR_TOP;
 	     address += PMD_SIZE) {
-
-		unsigned long flags;
 		struct page *page;
 
-		spin_lock_irqsave(&pgd_lock, flags);
+		spin_lock(&pgd_lock);
 		list_for_each_entry(page, &pgd_list, lru) {
 			spinlock_t *pgt_lock;
 			pmd_t *ret;
 
+			/* the pgt_lock only for Xen */
 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 
 			spin_lock(pgt_lock);
@@ -256,7 +255,7 @@ void vmalloc_sync_all(void)
 			if (!ret)
 				break;
 		}
-		spin_unlock_irqrestore(&pgd_lock, flags);
+		spin_unlock(&pgd_lock);
 	}
 }
 
@@ -838,6 +837,13 @@ mm_fault_error(struct pt_regs *regs, uns
 	       unsigned long address, unsigned int fault)
 {
 	if (fault & VM_FAULT_OOM) {
+		/* Kernel mode? Handle exceptions or die: */
+		if (!(error_code & PF_USER)) {
+			up_read(&current->mm->mmap_sem);
+			no_context(regs, error_code, address);
+			return;
+		}
+
 		out_of_memory(regs, error_code, address);
 	} else {
 		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
--- head.orig/arch/x86/mm/init-xen.c	2011-02-01 15:09:47.000000000 +0100
+++ head/arch/x86/mm/init-xen.c	2011-02-01 15:41:35.000000000 +0100
@@ -404,8 +404,9 @@ void free_init_pages(char *what, unsigne
 	/*
 	 * We just marked the kernel text read only above, now that
 	 * we are going to free part of that, we need to make that
-	 * writeable first.
+	 * writeable and non-executable first.
 	 */
+	set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
 	set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
 
 	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
--- head.orig/arch/x86/mm/init_32-xen.c	2011-02-01 15:09:47.000000000 +0100
+++ head/arch/x86/mm/init_32-xen.c	2011-02-01 15:41:35.000000000 +0100
@@ -47,6 +47,7 @@
 #include <asm/bugs.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
+#include <asm/olpc_ofw.h>
 #include <asm/pgalloc.h>
 #include <asm/sections.h>
 #include <asm/hypervisor.h>
@@ -242,7 +243,7 @@ page_table_range_init(unsigned long star
 
 static inline int is_kernel_text(unsigned long addr)
 {
-	if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end)
+	if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
 		return 1;
 	return 0;
 }
@@ -775,6 +776,7 @@ void __init paging_init(void)
 	/*
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 */
+	olpc_dt_build_devicetree();
 	sparse_init();
 	zone_sizes_init();
 }
@@ -980,6 +982,23 @@ void set_kernel_text_ro(void)
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 }
 
+static void mark_nxdata_nx(void)
+{
+	/*
+	 * When this called, init has already been executed and released,
+	 * so everything past _etext sould be NX.
+	 */
+	unsigned long start = PFN_ALIGN(_etext);
+	/*
+	 * This comes from is_kernel_text upper limit. Also HPAGE where used:
+	 */
+	unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;
+
+	if (__supported_pte_mask & _PAGE_NX)
+		printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10);
+	set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT);
+}
+
 void mark_rodata_ro(void)
 {
 	unsigned long start = PFN_ALIGN(_text);
@@ -1014,6 +1033,7 @@ void mark_rodata_ro(void)
 	printk(KERN_INFO "Testing CPA: write protecting again\n");
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 #endif
+	mark_nxdata_nx();
 }
 #endif
 
--- head.orig/arch/x86/mm/init_64-xen.c	2011-06-30 17:18:57.000000000 +0200
+++ head/arch/x86/mm/init_64-xen.c	2011-03-17 14:22:21.000000000 +0100
@@ -173,18 +173,18 @@ void sync_global_pgds(unsigned long star
 
 	for (address = start; address <= end; address += PGDIR_SIZE) {
 		const pgd_t *pgd_ref = pgd_offset_k(address);
-		unsigned long flags;
 		struct page *page;
 
 		if (pgd_none(*pgd_ref))
 			continue;
 
-		spin_lock_irqsave(&pgd_lock, flags);
+		spin_lock(&pgd_lock);
 		list_for_each_entry(page, &pgd_list, lru) {
 			pgd_t *pgd;
 			spinlock_t *pgt_lock;
 
 			pgd = (pgd_t *)page_address(page) + pgd_index(address);
+			/* the pgt_lock only for Xen */
 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 			spin_lock(pgt_lock);
 
@@ -196,7 +196,7 @@ void sync_global_pgds(unsigned long star
 
 			spin_unlock(pgt_lock);
 		}
-		spin_unlock_irqrestore(&pgd_lock, flags);
+		spin_unlock(&pgd_lock);
 	}
 }
 
--- head.orig/arch/x86/mm/ioremap-xen.c	2011-05-09 11:42:30.000000000 +0200
+++ head/arch/x86/mm/ioremap-xen.c	2011-05-09 11:42:49.000000000 +0200
@@ -158,6 +158,16 @@ int create_lookup_pte_addr(struct mm_str
 
 EXPORT_SYMBOL(create_lookup_pte_addr);
 
+#ifdef CONFIG_MODULES
+/*
+ * Force the implementation of ioremap_page_range() to be pulled in from
+ * lib/lib.a even if there is no other reference from the core kernel to it
+ * (native uses it in __ioremap_caller()), so that it gets exported.
+ */
+static void *const __section(.discard.ioremap) __used
+_ioremap_page_range = ioremap_page_range;
+#endif
+
 /*
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
--- head.orig/arch/x86/mm/pageattr-xen.c	2011-03-23 10:01:34.000000000 +0100
+++ head/arch/x86/mm/pageattr-xen.c	2011-03-23 10:10:15.000000000 +0100
@@ -13,6 +13,7 @@
 #include <linux/pfn.h>
 #include <linux/percpu.h>
 #include <linux/gfp.h>
+#include <linux/pci.h>
 
 #include <asm/e820.h>
 #include <asm/processor.h>
@@ -56,12 +57,10 @@ static unsigned long direct_pages_count[
 
 void update_page_count(int level, unsigned long pages)
 {
-	unsigned long flags;
-
 	/* Protect against CPA */
-	spin_lock_irqsave(&pgd_lock, flags);
+	spin_lock(&pgd_lock);
 	direct_pages_count[level] += pages;
-	spin_unlock_irqrestore(&pgd_lock, flags);
+	spin_unlock(&pgd_lock);
 }
 
 static void split_page_count(int level)
@@ -256,12 +255,12 @@ static inline pgprot_t static_protection
 {
 	pgprot_t forbidden = __pgprot(0);
 
-#ifndef CONFIG_XEN
 	/*
 	 * The BIOS area between 640k and 1Mb needs to be executable for
 	 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
 	 */
-	if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
+#ifdef CONFIG_PCI_BIOS
+	if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
 		pgprot_val(forbidden) |= _PAGE_NX;
 #endif
 
@@ -405,16 +404,16 @@ static int
 try_preserve_large_page(pte_t *kpte, unsigned long address,
 			struct cpa_data *cpa)
 {
-	unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
+	unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
 	pte_t new_pte, old_pte, *tmp;
-	pgprot_t old_prot, new_prot;
+	pgprot_t old_prot, new_prot, req_prot;
 	int i, do_split = 1;
 	unsigned int level;
 
 	if (cpa->force_split)
 		return 1;
 
-	spin_lock_irqsave(&pgd_lock, flags);
+	spin_lock(&pgd_lock);
 	/*
 	 * Check for races, another CPU might have split this page
 	 * up already:
@@ -452,10 +451,10 @@ try_preserve_large_page(pte_t *kpte, uns
 	 * We are safe now. Check whether the new pgprot is the same:
 	 */
 	old_pte = *kpte;
-	old_prot = new_prot = pte_pgprot(old_pte);
+	old_prot = new_prot = req_prot = pte_pgprot(old_pte);
 
-	pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
-	pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
+	pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
+	pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
 
 	/*
 	 * old_pte points to the large page base address. So we need
@@ -464,22 +463,21 @@ try_preserve_large_page(pte_t *kpte, uns
 	pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
 	cpa->pfn = pfn;
 
-	new_prot = static_protections(new_prot, address, pfn);
+	new_prot = static_protections(req_prot, address, pfn);
 
 	/*
 	 * We need to check the full range, whether
 	 * static_protection() requires a different pgprot for one of
 	 * the pages in the range we try to preserve:
 	 */
-	if (pfn < max_mapnr) {
-		addr = address + PAGE_SIZE;
-		for (i = 1; i < cpa->numpages && ++pfn < max_mapnr;
-		     i++, addr += PAGE_SIZE) {
-			pgprot_t chk_prot = static_protections(new_prot, addr, pfn);
+	addr = address & pmask;
+	pfn = pte_pfn(old_pte);
+	for (i = 0; i < (psize >> PAGE_SHIFT) && pfn < max_mapnr;
+	     i++, addr += PAGE_SIZE, pfn++) {
+		pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
 
-			if (pgprot_val(chk_prot) != pgprot_val(new_prot))
-				goto out_unlock;
-		}
+		if (pgprot_val(chk_prot) != pgprot_val(new_prot))
+			goto out_unlock;
 	}
 
 	/*
@@ -499,7 +497,7 @@ try_preserve_large_page(pte_t *kpte, uns
 	 * that we limited the number of possible pages already to
 	 * the number of pages in the large page.
 	 */
-	if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
+	if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) {
 		/*
 		 * The address is aligned and the number of pages
 		 * covers the full page.
@@ -511,14 +509,14 @@ try_preserve_large_page(pte_t *kpte, uns
 	}
 
 out_unlock:
-	spin_unlock_irqrestore(&pgd_lock, flags);
+	spin_unlock(&pgd_lock);
 
 	return do_split;
 }
 
 static int split_large_page(pte_t *kpte, unsigned long address)
 {
-	unsigned long flags, mfn, mfninc = 1;
+	unsigned long mfn, mfninc = 1;
 	unsigned int i, level;
 	pte_t *pbase, *tmp;
 	pgprot_t ref_prot;
@@ -532,7 +530,7 @@ static int split_large_page(pte_t *kpte,
 	if (!base)
 		return -ENOMEM;
 
-	spin_lock_irqsave(&pgd_lock, flags);
+	spin_lock(&pgd_lock);
 	/*
 	 * Check for races, another CPU might have split this page
 	 * up for us already:
@@ -608,7 +606,7 @@ out_unlock:
 	 */
 	if (base)
 		__free_page(base);
-	spin_unlock_irqrestore(&pgd_lock, flags);
+	spin_unlock(&pgd_lock);
 
 	return 0;
 }
--- head.orig/arch/x86/mm/pgtable-xen.c	2011-02-01 15:09:47.000000000 +0100
+++ head/arch/x86/mm/pgtable-xen.c	2011-03-17 14:26:03.000000000 +0100
@@ -358,23 +358,23 @@ void mm_unpin(struct mm_struct *mm)
 void mm_pin_all(void)
 {
 	struct page *page;
-	unsigned long flags;
 
 	if (xen_feature(XENFEAT_writable_page_tables))
 		return;
 
 	/*
 	 * Allow uninterrupted access to the pgd_list. Also protects
-	 * __pgd_pin() by disabling preemption.
+	 * __pgd_pin() by ensuring preemption is disabled.
 	 * All other CPUs must be at a safe point (e.g., in stop_machine
 	 * or offlined entirely).
 	 */
-	spin_lock_irqsave(&pgd_lock, flags);
+	BUG_ON(!irqs_disabled());
+	spin_lock(&pgd_lock);
 	list_for_each_entry(page, &pgd_list, lru) {
 		if (!PagePinned(page))
 			__pgd_pin((pgd_t *)page_address(page));
 	}
-	spin_unlock_irqrestore(&pgd_lock, flags);
+	spin_unlock(&pgd_lock);
 }
 
 void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
@@ -470,12 +470,10 @@ static void pgd_ctor(struct mm_struct *m
 
 static void pgd_dtor(pgd_t *pgd)
 {
-	unsigned long flags; /* can be called from interrupt context */
-
 	if (!SHARED_KERNEL_PMD) {
-		spin_lock_irqsave(&pgd_lock, flags);
+		spin_lock(&pgd_lock);
 		pgd_list_del(pgd);
-		spin_unlock_irqrestore(&pgd_lock, flags);
+		spin_unlock(&pgd_lock);
 	}
 
 	pgd_test_and_unpin(pgd);
@@ -641,7 +639,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *pgd;
 	pmd_t *pmds[PREALLOCATED_PMDS];
-	unsigned long flags;
 
 	pgd = (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ORDER);
 
@@ -661,13 +658,13 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	 * respect to anything walking the pgd_list, so that they
 	 * never see a partially populated pgd.
 	 */
-	spin_lock_irqsave(&pgd_lock, flags);
+	spin_lock(&pgd_lock);
 
 #ifdef CONFIG_X86_PAE
 	/* Protect against save/restore: move below 4GB under pgd_lock. */
 	if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
 	    && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
-		spin_unlock_irqrestore(&pgd_lock, flags);
+		spin_unlock(&pgd_lock);
 		goto out_free_pmds;
 	}
 #endif
@@ -675,7 +672,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	pgd_ctor(mm, pgd);
 	pgd_prepopulate_pmd(mm, pgd, pmds);
 
-	spin_unlock_irqrestore(&pgd_lock, flags);
+	spin_unlock(&pgd_lock);
 
 	return pgd;
 
@@ -735,6 +732,25 @@ int ptep_set_access_flags(struct vm_area
 	return changed;
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+int pmdp_set_access_flags(struct vm_area_struct *vma,
+			  unsigned long address, pmd_t *pmdp,
+			  pmd_t entry, int dirty)
+{
+	int changed = !pmd_same(*pmdp, entry);
+
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+	if (changed && dirty) {
+		*pmdp = entry;
+		pmd_update_defer(vma->vm_mm, address, pmdp);
+		flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+	}
+
+	return changed;
+}
+#endif
+
 int ptep_test_and_clear_young(struct vm_area_struct *vma,
 			      unsigned long addr, pte_t *ptep)
 {
@@ -750,6 +766,23 @@ int ptep_test_and_clear_young(struct vm_
 	return ret;
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+			      unsigned long addr, pmd_t *pmdp)
+{
+	int ret = 0;
+
+	if (pmd_young(*pmdp))
+		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
+					 (unsigned long *)pmdp);
+
+	if (ret)
+		pmd_update(vma->vm_mm, addr, pmdp);
+
+	return ret;
+}
+#endif
+
 int ptep_clear_flush_young(struct vm_area_struct *vma,
 			   unsigned long address, pte_t *ptep)
 {
@@ -765,6 +798,36 @@ int ptep_clear_flush_young(struct vm_are
 	return young;
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+int pmdp_clear_flush_young(struct vm_area_struct *vma,
+			   unsigned long address, pmd_t *pmdp)
+{
+	int young;
+
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+	young = pmdp_test_and_clear_young(vma, address, pmdp);
+	if (young)
+		flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+
+	return young;
+}
+
+void pmdp_splitting_flush(struct vm_area_struct *vma,
+			  unsigned long address, pmd_t *pmdp)
+{
+	int set;
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+	set = !test_and_set_bit(_PAGE_BIT_SPLITTING,
+				(unsigned long *)pmdp);
+	if (set) {
+		pmd_update(vma->vm_mm, address, pmdp);
+		/* need tlb flush only to serialize against gup-fast */
+		flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+	}
+}
+#endif
+
 /**
  * reserve_top_address - reserves a hole in the top of kernel address space
  * @reserve - size of hole to reserve
--- head.orig/drivers/hwmon/coretemp-xen.c	2011-02-01 15:04:27.000000000 +0100
+++ head/drivers/hwmon/coretemp-xen.c	2011-02-01 16:38:02.000000000 +0100
@@ -20,6 +20,8 @@
  * 02110-1301 USA.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -454,8 +456,8 @@ static int coretemp_device_add(unsigned 
 	 * without thermal sensors will be filtered out.
 	 */
 	if (!(info.cpuid_6_eax & 0x1)) {
-		printk(KERN_INFO DRVNAME ": CPU (model=0x%x)"
-		       " has no thermal sensor.\n", info.pdev_entry->x86_model);
+		pr_info("CPU (model=0x%x) has no thermal sensor\n",
+			info.pdev_entry->x86_model);
 		goto exit_entry_free;
 	}
 
@@ -478,7 +480,7 @@ static int coretemp_device_add(unsigned 
 	pdev = platform_device_alloc(DRVNAME, cpu);
 	if (!pdev) {
 		err = -ENOMEM;
-		printk(KERN_ERR DRVNAME ": Device allocation failed\n");
+		pr_err("Device allocation failed\n");
 		goto exit;
 	}
 
@@ -488,8 +490,7 @@ static int coretemp_device_add(unsigned 
 
 	err = platform_device_add(pdev);
 	if (err) {
-		printk(KERN_ERR DRVNAME ": Device addition failed (%d)\n",
-		       err);
+		pr_err("Device addition failed (%d)\n", err);
 		goto exit_device_put;
 	}
 
--- head.orig/drivers/hwmon/via-cputemp-xen.c	2011-02-01 15:04:27.000000000 +0100
+++ head/drivers/hwmon/via-cputemp-xen.c	2011-02-01 16:40:53.000000000 +0100
@@ -21,6 +21,8 @@
  * 02110-1301 USA.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -224,15 +226,14 @@ static int via_cputemp_device_add(unsign
 		goto exit_entry_free;
 
 	if (pdev_entry->x86_model > 0x0f) {
-		printk(KERN_WARNING DRVNAME ": Unknown CPU "
-		       "model 0x%x\n", pdev_entry->x86_model);
+		pr_warn("Unknown CPU model 0x%x\n", pdev_entry->x86_model);
 		goto exit_entry_free;
 	}
 
 	pdev = platform_device_alloc(DRVNAME, cpu);
 	if (!pdev) {
 		err = -ENOMEM;
-		printk(KERN_ERR DRVNAME ": Device allocation failed\n");
+		pr_err("Device allocation failed\n");
 		goto exit_entry_free;
 	}
 
@@ -241,8 +242,7 @@ static int via_cputemp_device_add(unsign
 
 	err = platform_device_add(pdev);
 	if (err) {
-		printk(KERN_ERR DRVNAME ": Device addition failed (%d)\n",
-		       err);
+		pr_err("Device addition failed (%d)\n", err);
 		goto exit_device_put;
 	}
 
@@ -268,8 +268,9 @@ static void via_cputemp_device_remove(un
 		if (p->pdev->id == cpu) {
 			platform_device_unregister(p->pdev);
 			list_del(&p->list);
+			mutex_unlock(&pdev_list_mutex);
 			kfree(p);
-			break;
+			return;
 		}
 	}
 	mutex_unlock(&pdev_list_mutex);
--- head.orig/drivers/scsi/arcmsr/arcmsr.h	2012-06-12 15:05:54.000000000 +0200
+++ head/drivers/scsi/arcmsr/arcmsr.h	2011-04-13 14:08:57.000000000 +0200
@@ -46,7 +46,7 @@
 struct device_attribute;
 /*The limit of outstanding scsi command that firmware can handle*/
 #define ARCMSR_MAX_OUTSTANDING_CMD						256
-#ifdef CONFIG_XEN
+#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN)
 	#define ARCMSR_MAX_FREECCB_NUM	160
 #else
 	#define ARCMSR_MAX_FREECCB_NUM	320
--- head.orig/drivers/xen/Kconfig	2012-02-17 14:34:57.000000000 +0100
+++ head/drivers/xen/Kconfig	2012-04-03 13:15:48.000000000 +0200
@@ -447,7 +447,7 @@ config XEN_DEV_EVTCHN
 	  firing.
 	  If in doubt, say yes.
 
-config XEN_BACKEND
+config PARAVIRT_XEN_BACKEND
 	bool "Backend driver support"
 	depends on XEN_DOM0
 	default y
@@ -495,7 +495,7 @@ config XEN_XENBUS_FRONTEND
 
 config XEN_GNTDEV
 	tristate "userspace grant access device driver"
-	depends on XEN
+	depends on PARAVIRT_XEN
 	default m
 	select MMU_NOTIFIER
 	help
--- head.orig/drivers/xen/Makefile	2011-11-03 12:50:01.000000000 +0100
+++ head/drivers/xen/Makefile	2011-08-18 11:16:13.000000000 +0200
@@ -27,13 +27,17 @@ obj-$(CONFIG_HOTPLUG_CPU)		+= $(xen-hotp
 obj-$(CONFIG_XEN_XENCOMM)		+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)		+= $(xen-balloon-y)
 obj-$(CONFIG_XEN_DEV_EVTCHN)		+= $(xen-evtchn-name-y).o
+obj-$(CONFIG_XEN_GNTDEV)		+= xen-gntdev.o
 obj-$(CONFIG_XENFS)			+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
-obj-$(CONFIG_XEN_PLATFORM_PCI)		+= platform-pci.o
+obj-$(CONFIG_XEN_PLATFORM_PCI)		+= xen-platform-pci.o
 obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
 obj-$(CONFIG_XEN_DOM0)			+= pci.o
 
 xen-evtchn-y				:= evtchn.o
+xen-gntdev-y				:= gntdev.o
+
+xen-platform-pci-y			:= platform-pci.o
 
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
 obj-$(CONFIG_XEN_BLKDEV_TAP)		+= blktap/
--- head.orig/drivers/xen/blkback/common.h	2012-06-08 10:35:34.000000000 +0200
+++ head/drivers/xen/blkback/common.h	2012-06-08 10:35:57.000000000 +0200
@@ -46,7 +46,7 @@
 
 struct vbd {
 	blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
-	unsigned char  readonly;    /* Non-zero -> read-only */
+	fmode_t        mode;        /* FMODE_xxx */
 	unsigned char  type;        /* VDISK_xxx */
 	bool           flush_support;
 	u32            pdevice;     /* phys device that this vbd maps to */
@@ -120,11 +120,10 @@ void vbd_resize(blkif_t *blkif);
 
 /* Create a vbd. */
 int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major,
-	       unsigned minor, int readonly, int cdrom);
+	       unsigned minor, fmode_t mode, bool cdrom);
 void vbd_free(struct vbd *vbd);
 
 unsigned long long vbd_size(struct vbd *vbd);
-unsigned int vbd_info(struct vbd *vbd);
 unsigned long vbd_secsize(struct vbd *vbd);
 
 struct phys_req {
--- head.orig/drivers/xen/blkback/vbd.c	2011-09-07 12:35:54.000000000 +0200
+++ head/drivers/xen/blkback/vbd.c	2012-02-24 15:15:19.000000000 +0100
@@ -40,18 +40,13 @@ unsigned long long vbd_size(struct vbd *
 	return vbd_sz(vbd);
 }
 
-unsigned int vbd_info(struct vbd *vbd)
-{
-	return vbd->type | (vbd->readonly?VDISK_READONLY:0);
-}
-
 unsigned long vbd_secsize(struct vbd *vbd)
 {
 	return bdev_logical_block_size(vbd->bdev);
 }
 
 int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major,
-	       unsigned minor, int readonly, int cdrom)
+	       unsigned minor, fmode_t mode, bool cdrom)
 {
 	struct vbd *vbd;
 	struct block_device *bdev;
@@ -59,13 +54,17 @@ int vbd_create(blkif_t *blkif, blkif_vde
 
 	vbd = &blkif->vbd;
 	vbd->handle   = handle; 
-	vbd->readonly = readonly;
 	vbd->type     = 0;
 
+	if (!(mode & FMODE_WRITE)) {
+		mode &= ~FMODE_EXCL; /* xend doesn't even allow mode="r!" */
+		vbd->type |= VDISK_READONLY;
+	}
+	vbd->mode = mode;
+
 	vbd->pdevice  = MKDEV(major, minor);
 
-	bdev = open_by_devnum(vbd->pdevice,
-			      vbd->readonly ? FMODE_READ : FMODE_WRITE);
+	bdev = blkdev_get_by_dev(vbd->pdevice, mode, blkif);
 
 	if (IS_ERR(bdev)) {
 		DPRINTK("vbd_creat: device %08x could not be opened.\n",
@@ -101,8 +100,7 @@ int vbd_create(blkif_t *blkif, blkif_vde
 void vbd_free(struct vbd *vbd)
 {
 	if (vbd->bdev)
-		blkdev_put(vbd->bdev,
-			   vbd->readonly ? FMODE_READ : FMODE_WRITE);
+		blkdev_put(vbd->bdev, vbd->mode);
 	vbd->bdev = NULL;
 }
 
@@ -111,7 +109,7 @@ int vbd_translate(struct phys_req *req, 
 	struct vbd *vbd = &blkif->vbd;
 	int rc = -EACCES;
 
-	if ((operation != READ) && vbd->readonly)
+	if ((operation != READ) && !(vbd->mode & FMODE_WRITE))
 		goto out;
 
 	if (likely(req->nr_sects)) {
--- head.orig/drivers/xen/blkback/xenbus.c	2012-03-19 11:42:06.000000000 +0100
+++ head/drivers/xen/blkback/xenbus.c	2012-03-22 14:19:12.000000000 +0100
@@ -334,7 +334,10 @@ static void backend_changed(struct xenbu
 		be->minor = minor;
 
 		err = vbd_create(be->blkif, handle, major, minor,
-				 (NULL == strchr(be->mode, 'w')), cdrom);
+				 FMODE_READ
+				 | (strchr(be->mode, 'w') ? FMODE_WRITE : 0)
+				 | (strchr(be->mode, '!') ? 0 : FMODE_EXCL),
+				 cdrom);
 		if (err) {
 			be->major = be->minor = 0;
 			xenbus_dev_fatal(dev, err, "creating vbd structure");
@@ -451,7 +454,7 @@ again:
 
 	/* FIXME: use a typename instead */
 	err = xenbus_printf(xbt, dev->nodename, "info", "%u",
-			    vbd_info(&be->blkif->vbd));
+			    be->blkif->vbd.type);
 	if (err) {
 		xenbus_dev_fatal(dev, err, "writing %s/info",
 				 dev->nodename);
--- head.orig/drivers/xen/blkfront/blkfront.c	2012-06-12 15:30:20.000000000 +0200
+++ head/drivers/xen/blkfront/blkfront.c	2012-06-12 15:35:27.000000000 +0200
@@ -456,7 +456,7 @@ static void blkfront_closing(struct blkf
 	spin_unlock_irqrestore(&info->io_lock, flags);
 
 	/* Flush gnttab callback work. Must be done with no locks held. */
-	flush_scheduled_work();
+	flush_work_sync(&info->work);
 
 	xlvbd_sysfs_delif(info);
 
@@ -1022,7 +1022,7 @@ static void blkif_free(struct blkfront_i
 	spin_unlock_irq(&info->io_lock);
 
 	/* Flush gnttab callback work. Must be done with no locks held. */
-	flush_scheduled_work();
+	flush_work_sync(&info->work);
 
 	/* Free resources associated with old device channel. */
 	if (info->ring_ref != GRANT_INVALID_REF) {
--- head.orig/drivers/xen/blktap2/device.c	2012-02-16 13:43:41.000000000 +0100
+++ head/drivers/xen/blktap2/device.c	2012-02-16 13:44:17.000000000 +0100
@@ -732,7 +732,7 @@ blktap_device_close_bdev(struct blktap *
 	dev = &tap->device;
 
 	if (dev->bdev)
-		blkdev_put(dev->bdev, FMODE_WRITE);
+		blkdev_put(dev->bdev, FMODE_WRITE|FMODE_EXCL);
 
 	dev->bdev = NULL;
 	clear_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
@@ -746,7 +746,7 @@ blktap_device_open_bdev(struct blktap *t
 
 	dev = &tap->device;
 
-	bdev = open_by_devnum(pdev, FMODE_WRITE);
+	bdev = blkdev_get_by_dev(pdev, FMODE_WRITE|FMODE_EXCL, tap);
 	if (IS_ERR(bdev)) {
 		BTERR("opening device %x:%x failed: %ld\n",
 		      MAJOR(pdev), MINOR(pdev), PTR_ERR(bdev));
@@ -756,7 +756,7 @@ blktap_device_open_bdev(struct blktap *t
 	if (!bdev->bd_disk) {
 		BTERR("device %x:%x doesn't exist\n",
 		      MAJOR(pdev), MINOR(pdev));
-		blkdev_put(bdev, FMODE_WRITE);
+		blkdev_put(bdev, FMODE_WRITE|FMODE_EXCL);
 		return -ENOENT;
 	}
 
--- head.orig/drivers/xen/core/evtchn.c	2012-06-08 10:35:25.000000000 +0200
+++ head/drivers/xen/core/evtchn.c	2012-04-03 17:02:16.000000000 +0200
@@ -283,7 +283,11 @@ void force_evtchn_callback(void)
 /* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
 EXPORT_SYMBOL(force_evtchn_callback);
 
-static DEFINE_PER_CPU(unsigned int, upcall_count);
+#define UPC_INACTIVE 0
+#define UPC_ACTIVE 1
+#define UPC_NESTED_LATCH 2
+#define UPC_RESTART (UPC_ACTIVE|UPC_NESTED_LATCH)
+static DEFINE_PER_CPU(unsigned int, upcall_state);
 static DEFINE_PER_CPU(unsigned int, current_l1i);
 static DEFINE_PER_CPU(unsigned int, current_l2i);
 
@@ -292,18 +296,19 @@ asmlinkage void __irq_entry evtchn_do_up
 {
 	unsigned long       l1, l2;
 	unsigned long       masked_l1, masked_l2;
-	unsigned int        l1i, l2i, start_l1i, start_l2i, port, count, i;
+	unsigned int        l1i, l2i, start_l1i, start_l2i, port, i;
 	int                 irq;
 	vcpu_info_t        *vcpu_info = current_vcpu_info();
 	struct pt_regs     *old_regs;
 
-	/* Avoid a callback storm when we reenable delivery. */
-	vcpu_info->evtchn_upcall_pending = 0;
-
 	/* Nested invocations bail immediately. */
-	percpu_add(upcall_count, 1);
-	if (unlikely(percpu_read(upcall_count) != 1))
+	if (unlikely(__this_cpu_cmpxchg(upcall_state, UPC_INACTIVE,
+					UPC_ACTIVE) != UPC_INACTIVE)) {
+		__this_cpu_or(upcall_state, UPC_NESTED_LATCH);
+		/* Avoid a callback storm when we reenable delivery. */
+		vcpu_info->evtchn_upcall_pending = 0;
 		return;
+	}
 
 	old_regs = set_irq_regs(regs);
 	xen_spin_irq_enter();
@@ -398,11 +403,10 @@ asmlinkage void __irq_entry evtchn_do_up
 		}
 
 		/* If there were nested callbacks then we have more to do. */
-		count = percpu_read(upcall_count);
-		percpu_write(upcall_count, 1);
-	} while (unlikely(count != 1));
+	} while (unlikely(__this_cpu_cmpxchg(upcall_state, UPC_RESTART,
+					     UPC_ACTIVE) == UPC_RESTART));
 
-	percpu_write(upcall_count, 0);
+	__this_cpu_write(upcall_state, UPC_INACTIVE);
 	irq_exit();
 	xen_spin_irq_exit();
 	set_irq_regs(old_regs);
@@ -625,12 +629,16 @@ static void unbind_from_irq(unsigned int
 		cfg->info = IRQ_UNBOUND;
 
 		/* Zap stats across IRQ changes of use. */
-		for_each_possible_cpu(cpu)
+		for_each_possible_cpu(cpu) {
 #ifdef CONFIG_GENERIC_HARDIRQS
-			irq_to_desc(irq)->kstat_irqs[cpu] = 0;
+			struct irq_desc *desc = irq_to_desc(irq);
+
+			if (desc->kstat_irqs)
+				*per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
 #else
 			kstat_cpu(cpu).irqs[irq] = 0;
 #endif
+		}
 	}
 
 	spin_unlock(&irq_mapping_update_lock);
--- head.orig/drivers/xen/core/smpboot.c	2012-03-22 16:22:20.000000000 +0100
+++ head/drivers/xen/core/smpboot.c	2012-03-22 16:22:34.000000000 +0100
@@ -187,7 +187,7 @@ static void __cpuinit cpu_bringup(void)
 	unsigned int cpu;
 
 	cpu_init();
-	identify_secondary_cpu(&current_cpu_data);
+	identify_secondary_cpu(__this_cpu_ptr(&cpu_info));
 	touch_softlockup_watchdog();
 	preempt_disable();
 	cpu = smp_processor_id();
--- head.orig/drivers/xen/core/spinlock.c	2012-02-07 11:59:01.000000000 +0100
+++ head/drivers/xen/core/spinlock.c	2012-01-31 18:18:39.000000000 +0100
@@ -190,7 +190,7 @@ void xen_spin_irq_exit(void)
 	 * We're guaranteed to see another invocation of xen_spin_irq_enter()
 	 * if any of the tickets need to be dropped again.
 	 */
-	unsigned int irq_count = --__get_cpu_var(_irq_count);
+	unsigned int irq_count = this_cpu_dec_return(_irq_count);
 
 	/*
 	 * Make sure all xen_spin_kick() instances which may still have seen
--- head.orig/drivers/xen/fbfront/xenfb.c	2011-04-13 14:12:22.000000000 +0200
+++ head/drivers/xen/fbfront/xenfb.c	2011-02-08 10:37:50.000000000 +0100
@@ -555,12 +555,12 @@ xenfb_make_preferred_console(void)
 	if (console_set_on_cmdline)
 		return;
 
-	acquire_console_sem();
-	for (c = console_drivers; c; c = c->next) {
+	console_lock();
+	for_each_console(c) {
 		if (!strcmp(c->name, "tty") && c->index == 0)
 			break;
 	}
-	release_console_sem();
+	console_unlock();
 	if (c) {
 		unregister_console(c);
 		c->flags |= CON_CONSDEV;
--- head.orig/drivers/xen/netfront/netfront.c	2012-06-06 14:04:40.000000000 +0200
+++ head/drivers/xen/netfront/netfront.c	2012-03-12 13:53:17.000000000 +0100
@@ -132,17 +132,18 @@ static inline int skb_gso_ok(struct sk_b
         return (features & NETIF_F_TSO);
 }
 
-static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
+#define netif_skb_features(skb) ((skb)->dev->features)
+static inline int netif_needs_gso(struct sk_buff *skb, int features)
 {
         return skb_is_gso(skb) &&
-               (!skb_gso_ok(skb, dev->features) ||
+               (!skb_gso_ok(skb, features) ||
                 unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
 }
 #else
 #define HAVE_GSO			0
 #define HAVE_TSO			0
 #define HAVE_CSUM_OFFLOAD		0
-#define netif_needs_gso(dev, skb)	0
+#define netif_needs_gso(skb, feat)	0
 #define dev_disable_gso_features(dev)	((void)0)
 #define ethtool_op_set_tso(dev, data)	(-ENOSYS)
 #endif
@@ -954,7 +955,7 @@ static int network_start_xmit(struct sk_
 
 	if (unlikely(!netfront_carrier_ok(np) ||
 		     (frags > 1 && !xennet_can_sg(dev)) ||
-		     netif_needs_gso(dev, skb))) {
+		     netif_needs_gso(skb, netif_skb_features(skb)))) {
 		spin_unlock_irq(&np->tx_lock);
 		goto drop;
 	}
--- head.orig/drivers/xen/pcifront/xenbus.c	2012-03-12 13:49:37.000000000 +0100
+++ head/drivers/xen/pcifront/xenbus.c	2012-03-12 13:53:21.000000000 +0100
@@ -60,7 +60,7 @@ static void free_pdev(struct pcifront_de
 	pcifront_free_roots(pdev);
 
 	/*For PCIE_AER error handling job*/
-	flush_scheduled_work();
+	flush_work_sync(&pdev->op_work);
 
 	if (pdev->irq > 0)
 		unbind_from_irqhandler(pdev->irq, pdev);
--- head.orig/drivers/xen/sfc_netback/accel_xenbus.c	2011-02-01 14:50:44.000000000 +0100
+++ head/drivers/xen/sfc_netback/accel_xenbus.c	2011-02-03 12:38:43.000000000 +0100
@@ -701,7 +701,7 @@ fail_config_watch:
 	 * Flush the scheduled work queue before freeing bend to get
 	 * rid of any pending netback_accel_msg_rx_handler()
 	 */
-	flush_scheduled_work();
+	flush_work_sync(&bend->handle_msg);
 
 	mutex_lock(&bend->bend_mutex);
 	net_accel_update_state(dev, XenbusStateUnknown);
@@ -781,7 +781,7 @@ int netback_accel_remove(struct xenbus_d
 	 * Flush the scheduled work queue before freeing bend to get
 	 * rid of any pending netback_accel_msg_rx_handler()
 	 */
-	flush_scheduled_work();
+	flush_work_sync(&bend->handle_msg);
 
 	mutex_lock(&bend->bend_mutex);
 
--- head.orig/drivers/xen/xenbus/Makefile	2011-02-02 17:06:11.000000000 +0100
+++ head/drivers/xen/xenbus/Makefile	2011-02-02 17:08:58.000000000 +0100
@@ -7,3 +7,6 @@ xenbus_be-objs += xenbus_backend_client.
 xenbus-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
 obj-y += $(xenbus-y) $(xenbus-m)
 obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o
+
+obj-$(CONFIG_PARAVIRT_XEN_BACKEND) += xenbus_probe_backend.o
+obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o
--- head.orig/drivers/xen/xenbus/xenbus_probe.c	2012-03-12 13:52:29.000000000 +0100
+++ head/drivers/xen/xenbus/xenbus_probe.c	2012-03-22 14:19:07.000000000 +0100
@@ -68,8 +68,6 @@
 #include <xen/events.h>
 #include <xen/page.h>
 
-#include <xen/platform_pci.h>
-
 #define PARAVIRT_EXPORT_SYMBOL EXPORT_SYMBOL_GPL
 #endif
 
@@ -96,15 +94,8 @@ extern struct mutex xenwatch_mutex;
 
 static BLOCKING_NOTIFIER_HEAD(xenstore_chain);
 
+#if defined(CONFIG_XEN) || defined(MODULE)
 static void wait_for_devices(struct xenbus_driver *xendrv);
-
-static int xenbus_probe_frontend(const char *type, const char *name);
-
-static void xenbus_dev_shutdown(struct device *_dev);
-
-#if !defined(CONFIG_XEN) && !defined(MODULE)
-static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
-static int xenbus_dev_resume(struct device *dev);
 #endif
 
 /* If something in array of ids matches this device, return it. */
@@ -127,24 +118,7 @@ int xenbus_match(struct device *_dev, st
 
 	return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
 }
-
-/* device/<type>/<id> => <type>-<id> */
-static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
-{
-	nodename = strchr(nodename, '/');
-	if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) {
-		pr_warning("XENBUS: bad frontend %s\n", nodename);
-		return -EINVAL;
-	}
-
-	strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE);
-	if (!strchr(bus_id, '/')) {
-		pr_warning("XENBUS: bus_id %s no slash\n", bus_id);
-		return -EINVAL;
-	}
-	*strchr(bus_id, '/') = '-';
-	return 0;
-}
+PARAVIRT_EXPORT_SYMBOL(xenbus_match);
 
 
 static void free_otherend_details(struct xenbus_device *dev)
@@ -164,7 +138,7 @@ static void free_otherend_watch(struct x
 }
 
 
-int read_otherend_details(struct xenbus_device *xendev,
+int xenbus_read_otherend_details(struct xenbus_device *xendev,
 				 char *id_node, char *path_node)
 {
 	int err = xenbus_gather(XBT_NIL, xendev->nodename,
@@ -189,74 +163,22 @@ int read_otherend_details(struct xenbus_
 
 	return 0;
 }
+PARAVIRT_EXPORT_SYMBOL(xenbus_read_otherend_details);
 
+#if defined(CONFIG_XEN) || defined(MODULE)
 
 static int read_backend_details(struct xenbus_device *xendev)
 {
-	return read_otherend_details(xendev, "backend-id", "backend");
+	return xenbus_read_otherend_details(xendev, "backend-id", "backend");
 }
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
-static int xenbus_uevent_frontend(struct device *dev, struct kobj_uevent_env *env)
-{
-	struct xenbus_device *xdev;
-
-	if (dev == NULL)
-		return -ENODEV;
-	xdev = to_xenbus_device(dev);
-	if (xdev == NULL)
-		return -ENODEV;
-
-	/* stuff we want to pass to /sbin/hotplug */
-#if defined(CONFIG_XEN) || defined(MODULE)
-	add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype);
-	add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename);
-#endif
-	add_uevent_var(env, "MODALIAS=xen:%s", xdev->devicetype);
-
-	return 0;
-}
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
-static struct device_attribute xenbus_dev_attrs[] = {
-	__ATTR_NULL
-};
-#endif
-
-/* Bus type for frontend drivers. */
-static struct xen_bus_type xenbus_frontend = {
-	.root = "device",
-	.levels = 2, 		/* device/type/<id> */
-	.get_bus_id = frontend_bus_id,
-	.probe = xenbus_probe_frontend,
-	.error = -ENODEV,
-	.bus = {
-		.name      = "xen",
-		.match     = xenbus_match,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
-		.probe     = xenbus_dev_probe,
-		.remove    = xenbus_dev_remove,
-		.shutdown  = xenbus_dev_shutdown,
-		.uevent    = xenbus_uevent_frontend,
-#endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
-		.dev_attrs = xenbus_dev_attrs,
-#endif
-#if !defined(CONFIG_XEN) && !defined(MODULE)
-		.suspend   = xenbus_dev_suspend,
-		.resume    = xenbus_dev_resume,
-#endif
-	},
-#if defined(CONFIG_XEN) || defined(MODULE)
-	.dev = {
-		.init_name = "xen",
-	},
-#endif
-};
-
 static void otherend_changed(struct xenbus_watch *watch,
 			     const char **vec, unsigned int len)
+#else /* !CONFIG_XEN && !MODULE */
+void xenbus_otherend_changed(struct xenbus_watch *watch,
+			     const char **vec, unsigned int len,
+			     int ignore_on_shutdown)
+#endif /* CONFIG_XEN || MODULE */
 {
 	struct xenbus_device *dev =
 		container_of(watch, struct xenbus_device, otherend_watch);
@@ -284,11 +206,15 @@ static void otherend_changed(struct xenb
 	 * work that can fail e.g., when the rootfs is gone.
 	 */
 	if (system_state > SYSTEM_RUNNING) {
-		struct xen_bus_type *bus = bus;
-		bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
 		/* If we're frontend, drive the state machine to Closed. */
 		/* This should cause the backend to release our resources. */
-		if ((bus == &xenbus_frontend) && (state == XenbusStateClosing))
+# if defined(CONFIG_XEN) || defined(MODULE)
+		const struct xen_bus_type *bus =
+			container_of(dev->dev.bus, struct xen_bus_type, bus);
+		int ignore_on_shutdown = (bus->levels == 2);
+# endif
+
+		if (ignore_on_shutdown && (state == XenbusStateClosing))
 			xenbus_frontend_closed(dev);
 		return;
 	}
@@ -297,6 +223,7 @@ static void otherend_changed(struct xenb
 	if (drv->otherend_changed)
 		drv->otherend_changed(dev, state);
 }
+PARAVIRT_EXPORT_SYMBOL(xenbus_otherend_changed);
 
 
 static int talk_to_otherend(struct xenbus_device *dev)
@@ -317,7 +244,11 @@ static int watch_otherend(struct xenbus_
 	return xenbus_watch_path2(dev, dev->otherend, "state",
 				  &dev->otherend_watch, otherend_changed);
 #else
-	return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed,
+	struct xen_bus_type *bus =
+		container_of(dev->dev.bus, struct xen_bus_type, bus);
+
+	return xenbus_watch_pathfmt(dev, &dev->otherend_watch,
+				    bus->otherend_changed,
 				    "%s/%s", dev->otherend, "state");
 #endif
 }
@@ -367,8 +298,13 @@ int xenbus_dev_probe(struct device *_dev
 fail:
 	xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
 	xenbus_switch_state(dev, XenbusStateClosed);
+#if defined(CONFIG_XEN) || defined(MODULE)
 	return -ENODEV;
+#else
+	return err;
+#endif
 }
+PARAVIRT_EXPORT_SYMBOL(xenbus_dev_probe);
 
 int xenbus_dev_remove(struct device *_dev)
 {
@@ -387,8 +323,9 @@ int xenbus_dev_remove(struct device *_de
 	xenbus_switch_state(dev, XenbusStateClosed);
 	return 0;
 }
+PARAVIRT_EXPORT_SYMBOL(xenbus_dev_remove);
 
-static void xenbus_dev_shutdown(struct device *_dev)
+void xenbus_dev_shutdown(struct device *_dev)
 {
 	struct xenbus_device *dev = to_xenbus_device(_dev);
 	unsigned long timeout = 5*HZ;
@@ -421,6 +358,7 @@ static void xenbus_dev_shutdown(struct d
  out:
 	put_device(&dev->dev);
 }
+PARAVIRT_EXPORT_SYMBOL(xenbus_dev_shutdown);
 
 int xenbus_register_driver_common(struct xenbus_driver *drv,
 				  struct xen_bus_type *bus)
@@ -442,23 +380,7 @@ int xenbus_register_driver_common(struct
 	mutex_unlock(&xenwatch_mutex);
 	return ret;
 }
-
-int xenbus_register_frontend(struct xenbus_driver *drv)
-{
-	int ret;
-
-	drv->read_otherend_details = read_backend_details;
-
-	ret = xenbus_register_driver_common(drv, &xenbus_frontend);
-	if (ret)
-		return ret;
-
-	/* If this driver is loaded as a module wait for devices to attach. */
-	wait_for_devices(drv);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(xenbus_register_frontend);
+PARAVIRT_EXPORT_SYMBOL(xenbus_register_driver_common);
 
 void xenbus_unregister_driver(struct xenbus_driver *drv)
 {
@@ -650,9 +572,31 @@ fail:
 	kfree(xendev);
 	return err;
 }
+PARAVIRT_EXPORT_SYMBOL(xenbus_probe_node);
+
+#if defined(CONFIG_XEN) || defined(MODULE)
+
+/* device/<type>/<id> => <type>-<id> */
+static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
+{
+	nodename = strchr(nodename, '/');
+	if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) {
+		pr_warning("XENBUS: bad frontend %s\n", nodename);
+		return -EINVAL;
+	}
+
+	strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE);
+	if (!strchr(bus_id, '/')) {
+		pr_warning("XENBUS: bus_id %s no slash\n", bus_id);
+		return -EINVAL;
+	}
+	*strchr(bus_id, '/') = '-';
+	return 0;
+}
 
 /* device/<typename>/<name> */
-static int xenbus_probe_frontend(const char *type, const char *name)
+static int xenbus_probe_frontend(struct xen_bus_type *bus, const char *type,
+				 const char *name)
 {
 	char *nodename;
 	int err;
@@ -660,18 +604,88 @@ static int xenbus_probe_frontend(const c
 	if (!strcmp(type, "console"))
 		return 0;
 
-	nodename = kasprintf(GFP_KERNEL, "%s/%s/%s",
-			     xenbus_frontend.root, type, name);
+	nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, name);
 	if (!nodename)
 		return -ENOMEM;
 
 	DPRINTK("%s", nodename);
 
-	err = xenbus_probe_node(&xenbus_frontend, type, nodename);
+	err = xenbus_probe_node(bus, type, nodename);
 	kfree(nodename);
 	return err;
 }
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+static int xenbus_uevent_frontend(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct xenbus_device *xdev;
+
+	if (dev == NULL)
+		return -ENODEV;
+	xdev = to_xenbus_device(dev);
+	if (xdev == NULL)
+		return -ENODEV;
+
+	/* stuff we want to pass to /sbin/hotplug */
+	if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype) ||
+	    add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename) ||
+	    add_uevent_var(env, "MODALIAS=xen:%s", xdev->devicetype))
+		return -ENOMEM;
+
+	return 0;
+}
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
+static struct device_attribute xenbus_dev_attrs[] = {
+	__ATTR_NULL
+};
+#endif
+
+/* Bus type for frontend drivers. */
+static struct xen_bus_type xenbus_frontend = {
+	.root = "device",
+	.levels = 2, 		/* device/type/<id> */
+	.get_bus_id = frontend_bus_id,
+	.probe = xenbus_probe_frontend,
+	.error = -ENODEV,
+	.bus = {
+		.name      = "xen",
+		.match     = xenbus_match,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+		.probe     = xenbus_dev_probe,
+		.remove    = xenbus_dev_remove,
+		.shutdown  = xenbus_dev_shutdown,
+		.uevent    = xenbus_uevent_frontend,
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
+		.dev_attrs = xenbus_dev_attrs,
+#endif
+	},
+	.dev = {
+		.init_name = "xen",
+	},
+};
+
+int xenbus_register_frontend(struct xenbus_driver *drv)
+{
+	int ret;
+
+	drv->read_otherend_details = read_backend_details;
+
+	ret = xenbus_register_driver_common(drv, &xenbus_frontend);
+	if (ret)
+		return ret;
+
+	/* If this driver is loaded as a module wait for devices to attach. */
+	wait_for_devices(drv);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_register_frontend);
+
+#endif
+
 static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
 {
 	int err = 0;
@@ -684,7 +698,7 @@ static int xenbus_probe_device_type(stru
 		return PTR_ERR(dir);
 
 	for (i = 0; i < dir_n; i++) {
-		err = bus->probe(type, dir[i]);
+		err = bus->probe(bus, type, dir[i]);
 		if (err)
 			break;
 	}
@@ -715,6 +729,7 @@ int xenbus_probe_devices(struct xen_bus_
 	kfree(dir);
 	return err;
 }
+PARAVIRT_EXPORT_SYMBOL(xenbus_probe_devices);
 
 static unsigned int char_count(const char *str, char c)
 {
@@ -775,10 +790,9 @@ void xenbus_dev_changed(const char *node
 
 	kfree(root);
 }
-#if !defined(CONFIG_XEN) && !defined(MODULE)
-EXPORT_SYMBOL_GPL(xenbus_dev_changed);
-#endif
+PARAVIRT_EXPORT_SYMBOL(xenbus_dev_changed);
 
+#if defined(CONFIG_XEN) || defined(MODULE)
 static void frontend_changed(struct xenbus_watch *watch,
 			     const char **vec, unsigned int len)
 {
@@ -793,22 +807,21 @@ static struct xenbus_watch fe_watch = {
 	.callback = frontend_changed,
 };
 
-#if !defined(CONFIG_XEN) && !defined(MODULE)
-static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
-#else
 static int __maybe_unused suspend_dev(struct device *dev, void *data)
+#else
+int xenbus_dev_suspend(struct device *dev, pm_message_t state)
 #endif
 {
 	int err = 0;
 	struct xenbus_driver *drv;
-	struct xenbus_device *xdev;
+	struct xenbus_device *xdev
+		= container_of(dev, struct xenbus_device, dev);
 
-	DPRINTK("");
+	DPRINTK("%s", xdev->nodename);
 
 	if (dev->driver == NULL)
 		return 0;
 	drv = to_xenbus_driver(dev->driver);
-	xdev = container_of(dev, struct xenbus_device, dev);
 	if (drv->suspend)
 #if !defined(CONFIG_XEN) && !defined(MODULE)
 		err = drv->suspend(xdev, state);
@@ -820,6 +833,7 @@ static int __maybe_unused suspend_dev(st
 			   dev_name(dev), err);
 	return 0;
 }
+PARAVIRT_EXPORT_SYMBOL(xenbus_dev_suspend);
 
 #if defined(CONFIG_XEN) || defined(MODULE)
 static int __maybe_unused suspend_cancel_dev(struct device *dev, void *data)
@@ -841,26 +855,22 @@ static int __maybe_unused suspend_cancel
 			   dev_name(dev), err);
 	return 0;
 }
-#endif
 
-#if !defined(CONFIG_XEN) && !defined(MODULE)
-static int xenbus_dev_resume(struct device *dev)
-#else
 static int __maybe_unused resume_dev(struct device *dev, void *data)
+#else
+int xenbus_dev_resume(struct device *dev)
 #endif
 {
 	int err;
 	struct xenbus_driver *drv;
-	struct xenbus_device *xdev;
+	struct xenbus_device *xdev
+		= container_of(dev, struct xenbus_device, dev);
 
-	DPRINTK("");
+	DPRINTK("%s", xdev->nodename);
 
 	if (dev->driver == NULL)
 		return 0;
-
 	drv = to_xenbus_driver(dev->driver);
-	xdev = container_of(dev, struct xenbus_device, dev);
-
 	err = talk_to_otherend(xdev);
 	if (err) {
 		pr_warning("xenbus: resume (talk_to_otherend) %s failed: %i\n",
@@ -888,6 +898,7 @@ static int __maybe_unused resume_dev(str
 
 	return 0;
 }
+PARAVIRT_EXPORT_SYMBOL(xenbus_dev_resume);
 
 #if (defined(CONFIG_XEN) && defined(CONFIG_PM_SLEEP)) || defined(MODULE)
 void xenbus_suspend(void)
@@ -1077,17 +1088,19 @@ void xenbus_probe(struct work_struct *un
 	xenbus_reset_state();
 #endif
 
+#if defined(CONFIG_XEN) || defined(MODULE)
 	/* Enumerate devices in xenstore and watch for changes. */
 	xenbus_probe_devices(&xenbus_frontend);
 	register_xenbus_watch(&fe_watch);
 	xenbus_backend_probe_and_watch();
+#endif
 
 	/* Notify others that xenstore is up */
 	blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
 }
-#if !defined(CONFIG_XEN) && !defined(MODULE)
-EXPORT_SYMBOL_GPL(xenbus_probe);
+PARAVIRT_EXPORT_SYMBOL(xenbus_probe);
 
+#if !defined(CONFIG_XEN) && !defined(MODULE)
 static int __init xenbus_probe_initcall(void)
 {
 	if (!xen_domain())
@@ -1231,12 +1244,14 @@ xenbus_init(void)
 	if (!is_running_on_xen())
 		return -ENODEV;
 
+#if defined(CONFIG_XEN) || defined(MODULE)
 	/* Register ourselves with the kernel bus subsystem */
 	xenbus_frontend.error = bus_register(&xenbus_frontend.bus);
 	if (xenbus_frontend.error)
 		pr_warning("XENBUS: Error registering frontend bus: %i\n",
 			   xenbus_frontend.error);
 	xenbus_backend_bus_register();
+#endif
 
 	/*
 	 * Domain0 doesn't have a store_evtchn or store_mfn yet.
@@ -1340,10 +1355,8 @@ xenbus_init(void)
 				   " %d\n", xenbus_frontend.error);
 		}
 	}
-#endif
 	xenbus_backend_device_register();
 
-#if defined(CONFIG_XEN) || defined(MODULE)
 	if (!is_initial_xendomain())
 		xenbus_probe(NULL);
 #endif
@@ -1367,6 +1380,7 @@ xenbus_init(void)
 
 	if (page != 0)
 		free_page(page);
+
 	return err;
 }
 
@@ -1379,6 +1393,8 @@ MODULE_LICENSE("GPL");
 #endif
 #endif
 
+#if defined(CONFIG_XEN) || defined(MODULE)
+
 static int is_device_connecting(struct device *dev, void *data)
 {
 	struct xenbus_device *xendev = to_xenbus_device(dev);
@@ -1514,3 +1530,5 @@ int xenbus_for_each_frontend(void *arg, 
 	return bus_for_each_dev(&xenbus_frontend.bus, NULL, arg, fn);
 }
 EXPORT_SYMBOL_GPL(xenbus_for_each_frontend);
+
+#endif /* CONFIG_XEN || MODULE */
--- head.orig/drivers/xen/xenbus/xenbus_probe.h	2011-02-07 14:42:39.000000000 +0100
+++ head/drivers/xen/xenbus/xenbus_probe.h	2011-12-21 11:24:56.000000000 +0100
@@ -66,11 +66,15 @@ struct xen_bus_type {
 	int error;
 	unsigned int levels;
 	int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename);
-	int (*probe)(const char *type, const char *dir);
-	struct bus_type bus;
-#if defined(CONFIG_XEN) || defined(MODULE)
+	int (*probe)(struct xen_bus_type *bus, const char *type,
+		     const char *dir);
+#if !defined(CONFIG_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H)
+	void (*otherend_changed)(struct xenbus_watch *watch, const char **vec,
+				 unsigned int len);
+#else
 	struct device dev;
 #endif
+	struct bus_type bus;
 };
 
 extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
@@ -85,4 +89,16 @@ extern int xenbus_probe_devices(struct x
 
 extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
 
+extern void xenbus_dev_shutdown(struct device *_dev);
+
+extern int xenbus_dev_suspend(struct device *dev, pm_message_t state);
+extern int xenbus_dev_resume(struct device *dev);
+
+extern void xenbus_otherend_changed(struct xenbus_watch *watch,
+				    const char **vec, unsigned int len,
+				    int ignore_on_shutdown);
+
+extern int xenbus_read_otherend_details(struct xenbus_device *xendev,
+					char *id_node, char *path_node);
+
 #endif
--- head.orig/drivers/xen/xenbus/xenbus_probe_backend.c	2011-02-01 15:03:03.000000000 +0100
+++ head/drivers/xen/xenbus/xenbus_probe_backend.c	2012-03-22 14:19:05.000000000 +0100
@@ -33,7 +33,7 @@
 
 #define DPRINTK(fmt, args...)				\
 	pr_debug("xenbus_probe (%s:%d) " fmt ".\n",	\
-		 __FUNCTION__, __LINE__, ##args)
+		 __func__, __LINE__, ##args)
 
 #include <linux/kernel.h>
 #include <linux/version.h>
@@ -45,14 +45,17 @@
 #include <linux/slab.h>
 #include <linux/notifier.h>
 
-#include <asm/io.h>
 #include <asm/page.h>
-#include <asm/maddr.h>
 #include <asm/pgtable.h>
+#ifndef CONFIG_XEN
+#include <asm/xen/hypervisor.h>
+#endif
 #include <asm/hypervisor.h>
 #include <xen/xenbus.h>
+#ifdef CONFIG_XEN
 #include <xen/xen_proc.h>
 #include <xen/evtchn.h>
+#endif
 #include <xen/features.h>
 
 #include "xenbus_comms.h"
@@ -62,17 +65,6 @@
 #include <xen/platform-compat.h>
 #endif
 
-static int xenbus_uevent_backend(struct device *dev, struct kobj_uevent_env *env);
-static int xenbus_probe_backend(const char *type, const char *domid);
-
-extern int read_otherend_details(struct xenbus_device *xendev,
-				 char *id_node, char *path_node);
-
-static int read_frontend_details(struct xenbus_device *xendev)
-{
-	return read_otherend_details(xendev, "frontend-id", "frontend");
-}
-
 /* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
 static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
 {
@@ -110,34 +102,12 @@ static int backend_bus_id(char bus_id[XE
 	return 0;
 }
 
-static struct device_attribute xenbus_backend_attrs[] = {
-	__ATTR_NULL
-};
-
-static struct xen_bus_type xenbus_backend = {
-	.root = "backend",
-	.levels = 3, 		/* backend/type/<frontend>/<id> */
-	.get_bus_id = backend_bus_id,
-	.probe = xenbus_probe_backend,
-	.error = -ENODEV,
-	.bus = {
-		.name      = "xen-backend",
-		.match     = xenbus_match,
-		.probe     = xenbus_dev_probe,
-		.remove    = xenbus_dev_remove,
-//		.shutdown  = xenbus_dev_shutdown,
-		.uevent    = xenbus_uevent_backend,
-		.dev_attrs = xenbus_backend_attrs,
-	},
-	.dev = {
-		.init_name = "xen-backend",
-	},
-};
-
-static int xenbus_uevent_backend(struct device *dev, struct kobj_uevent_env *env)
+static int xenbus_uevent_backend(struct device *dev,
+				 struct kobj_uevent_env *env)
 {
 	struct xenbus_device *xdev;
 	struct xenbus_driver *drv;
+	struct xen_bus_type *bus;
 
 	DPRINTK("");
 
@@ -145,15 +115,19 @@ static int xenbus_uevent_backend(struct 
 		return -ENODEV;
 
 	xdev = to_xenbus_device(dev);
+	bus = container_of(xdev->dev.bus, struct xen_bus_type, bus);
 	if (xdev == NULL)
 		return -ENODEV;
 
 	/* stuff we want to pass to /sbin/hotplug */
-	add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype);
+	if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype))
+		return -ENOMEM;
 
-	add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename);
+	if (add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename))
+		return -ENOMEM;
 
-	add_uevent_var(env, "XENBUS_BASE_PATH=%s", xenbus_backend.root);
+	if (add_uevent_var(env, "XENBUS_BASE_PATH=%s", bus->root))
+		return -ENOMEM;
 
 	if (dev->driver) {
 		drv = to_xenbus_driver(dev->driver);
@@ -164,16 +138,9 @@ static int xenbus_uevent_backend(struct 
 	return 0;
 }
 
-int xenbus_register_backend(struct xenbus_driver *drv)
-{
-	drv->read_otherend_details = read_frontend_details;
-
-	return xenbus_register_driver_common(drv, &xenbus_backend);
-}
-EXPORT_SYMBOL_GPL(xenbus_register_backend);
-
 /* backend/<typename>/<frontend-uuid>/<name> */
-static int xenbus_probe_backend_unit(const char *dir,
+static int xenbus_probe_backend_unit(struct xen_bus_type *bus,
+				     const char *dir,
 				     const char *type,
 				     const char *name)
 {
@@ -186,13 +153,14 @@ static int xenbus_probe_backend_unit(con
 
 	DPRINTK("%s\n", nodename);
 
-	err = xenbus_probe_node(&xenbus_backend, type, nodename);
+	err = xenbus_probe_node(bus, type, nodename);
 	kfree(nodename);
 	return err;
 }
 
 /* backend/<typename>/<frontend-domid> */
-static int xenbus_probe_backend(const char *type, const char *domid)
+static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type,
+				const char *domid)
 {
 	char *nodename;
 	int err = 0;
@@ -201,7 +169,7 @@ static int xenbus_probe_backend(const ch
 
 	DPRINTK("");
 
-	nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_backend.root, type, domid);
+	nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, domid);
 	if (!nodename)
 		return -ENOMEM;
 
@@ -212,7 +180,7 @@ static int xenbus_probe_backend(const ch
 	}
 
 	for (i = 0; i < dir_n; i++) {
-		err = xenbus_probe_backend_unit(nodename, type, dir[i]);
+		err = xenbus_probe_backend_unit(bus, nodename, type, dir[i]);
 		if (err)
 			break;
 	}
@@ -221,6 +189,44 @@ static int xenbus_probe_backend(const ch
 	return err;
 }
 
+#ifndef CONFIG_XEN
+static void frontend_changed(struct xenbus_watch *watch,
+			    const char **vec, unsigned int len)
+{
+	xenbus_otherend_changed(watch, vec, len, 0);
+}
+#endif
+
+static struct device_attribute xenbus_backend_dev_attrs[] = {
+	__ATTR_NULL
+};
+
+static struct xen_bus_type xenbus_backend = {
+	.root = "backend",
+	.levels = 3,		/* backend/type/<frontend>/<id> */
+	.get_bus_id = backend_bus_id,
+	.probe = xenbus_probe_backend,
+#ifndef CONFIG_XEN
+	.otherend_changed = frontend_changed,
+#else
+	.dev = {
+		.init_name = "xen-backend",
+	},
+#endif
+	.error = -ENODEV,
+	.bus = {
+		.name		= "xen-backend",
+		.match		= xenbus_match,
+		.uevent		= xenbus_uevent_backend,
+		.probe		= xenbus_dev_probe,
+		.remove		= xenbus_dev_remove,
+#ifdef CONFIG_XEN
+		.shutdown	= xenbus_dev_shutdown,
+#endif
+		.dev_attrs	= xenbus_backend_dev_attrs,
+	},
+};
+
 static void backend_changed(struct xenbus_watch *watch,
 			    const char **vec, unsigned int len)
 {
@@ -234,7 +240,34 @@ static struct xenbus_watch be_watch = {
 	.callback = backend_changed,
 };
 
-#ifdef CONFIG_PM_SLEEP
+static int read_frontend_details(struct xenbus_device *xendev)
+{
+	return xenbus_read_otherend_details(xendev, "frontend-id", "frontend");
+}
+
+#ifndef CONFIG_XEN
+int xenbus_dev_is_online(struct xenbus_device *dev)
+{
+	int rc, val;
+
+	rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val);
+	if (rc != 1)
+		val = 0; /* no online node present */
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
+#endif
+
+int xenbus_register_backend(struct xenbus_driver *drv)
+{
+	drv->read_otherend_details = read_frontend_details;
+
+	return xenbus_register_driver_common(drv, &xenbus_backend);
+}
+EXPORT_SYMBOL_GPL(xenbus_register_backend);
+
+#if defined(CONFIG_XEN) && defined(CONFIG_PM_SLEEP)
 void xenbus_backend_suspend(int (*fn)(struct device *, void *))
 {
 	DPRINTK("");
@@ -250,12 +283,47 @@ void xenbus_backend_resume(int (*fn)(str
 }
 #endif
 
+#ifndef CONFIG_XEN
+static int backend_probe_and_watch(struct notifier_block *notifier,
+				   unsigned long event,
+				   void *data)
+#else
 void xenbus_backend_probe_and_watch(void)
+#endif
 {
+	/* Enumerate devices in xenstore and watch for changes. */
 	xenbus_probe_devices(&xenbus_backend);
 	register_xenbus_watch(&be_watch);
+
+#ifndef CONFIG_XEN
+	return NOTIFY_DONE;
+#endif
 }
 
+#ifndef CONFIG_XEN
+
+static int __init xenbus_probe_backend_init(void)
+{
+	static struct notifier_block xenstore_notifier = {
+		.notifier_call = backend_probe_and_watch
+	};
+	int err;
+
+	DPRINTK("");
+
+	/* Register ourselves with the kernel bus subsystem */
+	err = bus_register(&xenbus_backend.bus);
+	if (err)
+		return err;
+
+	register_xenstore_notifier(&xenstore_notifier);
+
+	return 0;
+}
+subsys_initcall(xenbus_probe_backend_init);
+
+#else
+
 void xenbus_backend_bus_register(void)
 {
 	xenbus_backend.error = bus_register(&xenbus_backend.bus);
@@ -282,3 +350,5 @@ int xenbus_for_each_backend(void *arg, i
 	return bus_for_each_dev(&xenbus_backend.bus, NULL, arg, fn);
 }
 EXPORT_SYMBOL_GPL(xenbus_for_each_backend);
+
+#endif
--- head.orig/include/xen/gntdev.h	2012-06-12 15:05:54.000000000 +0200
+++ head/include/xen/gntdev.h	2011-04-13 14:08:57.000000000 +0200
@@ -1,150 +1,3 @@
-/******************************************************************************
- * gntdev.h
- * 
- * Interface to /dev/xen/gntdev.
- * 
- * Copyright (c) 2007, D G Murray
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef __LINUX_PUBLIC_GNTDEV_H__
-#define __LINUX_PUBLIC_GNTDEV_H__
-
-struct ioctl_gntdev_grant_ref {
-	/* The domain ID of the grant to be mapped. */
-	uint32_t domid;
-	/* The grant reference of the grant to be mapped. */
-	uint32_t ref;
-};
-
-/*
- * Inserts the grant references into the mapping table of an instance
- * of gntdev. N.B. This does not perform the mapping, which is deferred
- * until mmap() is called with @index as the offset.
- */
-#define IOCTL_GNTDEV_MAP_GRANT_REF \
-_IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref))
-struct ioctl_gntdev_map_grant_ref {
-	/* IN parameters */
-	/* The number of grants to be mapped. */
-	uint32_t count;
-	uint32_t pad;
-	/* OUT parameters */
-	/* The offset to be used on a subsequent call to mmap(). */
-	uint64_t index;
-	/* Variable IN parameter. */
-	/* Array of grant references, of size @count. */
-	struct ioctl_gntdev_grant_ref refs[1];
-};
-
-/*
- * Removes the grant references from the mapping table of an instance of
- * of gntdev. N.B. munmap() must be called on the relevant virtual address(es)
- * before this ioctl is called, or an error will result.
- */
-#define IOCTL_GNTDEV_UNMAP_GRANT_REF \
-_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))
-struct ioctl_gntdev_unmap_grant_ref {
-	/* IN parameters */
-	/* The offset was returned by the corresponding map operation. */
-	uint64_t index;
-	/* The number of pages to be unmapped. */
-	uint32_t count;
-	uint32_t pad;
-};
-
-/*
- * Returns the offset in the driver's address space that corresponds
- * to @vaddr. This can be used to perform a munmap(), followed by an
- * UNMAP_GRANT_REF ioctl, where no state about the offset is retained by
- * the caller. The number of pages that were allocated at the same time as
- * @vaddr is returned in @count.
- *
- * N.B. Where more than one page has been mapped into a contiguous range, the
- *      supplied @vaddr must correspond to the start of the range; otherwise
- *      an error will result. It is only possible to munmap() the entire
- *      contiguously-allocated range at once, and not any subrange thereof.
- */
-#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \
-_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntdev_get_offset_for_vaddr))
-struct ioctl_gntdev_get_offset_for_vaddr {
-	/* IN parameters */
-	/* The virtual address of the first mapped page in a range. */
-	uint64_t vaddr;
-	/* OUT parameters */
-	/* The offset that was used in the initial mmap() operation. */
-	uint64_t offset;
-	/* The number of pages mapped in the VM area that begins at @vaddr. */
-	uint32_t count;
-	uint32_t pad;
-};
-
-/*
- * Sets the maximum number of grants that may mapped at once by this gntdev
- * instance.
- *
- * N.B. This must be called before any other ioctl is performed on the device.
- */
-#define IOCTL_GNTDEV_SET_MAX_GRANTS \
-_IOC(_IOC_NONE, 'G', 3, sizeof(struct ioctl_gntdev_set_max_grants))
-struct ioctl_gntdev_set_max_grants {
-	/* IN parameter */
-	/* The maximum number of grants that may be mapped at once. */
-	uint32_t count;
-};
-
-/*
- * Sets up an unmap notification within the page, so that the other side can do
- * cleanup if this side crashes. Required to implement cross-domain robust
- * mutexes or close notification on communication channels.
- *
- * Each mapped page only supports one notification; multiple calls referring to
- * the same page overwrite the previous notification. You must clear the
- * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
- * to occur.
- */
-#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
-_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
-struct ioctl_gntdev_unmap_notify {
-	/* IN parameters */
-	/* Offset in the file descriptor for a byte within the page (same as
-	 * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
-	 * be cleared. Otherwise, it can be any byte in the page whose
-	 * notification we are adjusting.
-	 */
-	uint64_t index;
-	/* Action(s) to take on unmap */
-	uint32_t action;
-	/* Event channel to notify */
-	uint32_t event_channel_port;
-};
-
-/* Clear (set to zero) the byte specified by index */
-#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
-/* Send an interrupt on the indicated event channel */
-#define UNMAP_NOTIFY_SEND_EVENT 0x2
-
-#endif /* __LINUX_PUBLIC_GNTDEV_H__ */
+#if defined(CONFIG_PARAVIRT_XEN) || !defined(__KERNEL__)
+#include "public/gntdev.h"
+#endif
--- head.orig/include/xen/public/gntdev.h	2008-04-02 12:34:02.000000000 +0200
+++ head/include/xen/public/gntdev.h	2011-02-03 13:52:28.000000000 +0100
@@ -66,7 +66,7 @@ struct ioctl_gntdev_map_grant_ref {
  * before this ioctl is called, or an error will result.
  */
 #define IOCTL_GNTDEV_UNMAP_GRANT_REF \
-_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))       
+_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))
 struct ioctl_gntdev_unmap_grant_ref {
 	/* IN parameters */
 	/* The offset was returned by the corresponding map operation. */
--- head.orig/lib/swiotlb-xen.c	2011-02-01 15:09:47.000000000 +0100
+++ head/lib/swiotlb-xen.c	2011-03-11 11:06:22.000000000 +0100
@@ -48,7 +48,7 @@ int swiotlb_force;
 static char *io_tlb_start, *io_tlb_end;
 
 /*
- * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
+ * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
  * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
  */
 static unsigned long io_tlb_nslabs;
@@ -567,6 +567,15 @@ dma_addr_t swiotlb_map_page(struct devic
 	}
 
 	dev_addr = swiotlb_virt_to_bus(dev, map);
+
+	/*
+	 * Ensure that the address returned is DMA'ble
+	 */
+	if (!dma_capable(dev, dev_addr, size)) {
+		swiotlb_tbl_unmap_single(dev, map, size, dir);
+		dev_addr = swiotlb_virt_to_bus(dev, io_tlb_overflow_buffer);
+	}
+
 	return dev_addr;
 }
 EXPORT_SYMBOL_GPL(swiotlb_map_page);
--- head.orig/mm/Kconfig	2012-05-23 12:48:43.000000000 +0200
+++ head/mm/Kconfig	2012-02-08 12:50:54.000000000 +0100
@@ -313,7 +313,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
 
 config TRANSPARENT_HUGEPAGE
 	bool "Transparent Hugepage Support"
-	depends on X86 && MMU
+	depends on X86 && !XEN && MMU
 	select COMPACTION
 	help
 	  Transparent Hugepages allows the kernel to use huge pages and