Blob Blame History Raw
From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.32
Patch-mainline: 2.6.32

 This patch contains the differences between 2.6.31 and 2.6.32.

Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches.py

2.6.34/drivers/staging/vt6656/ttype.h (VOID no longer used)

--- 12.2.orig/arch/x86/ia32/ia32entry-xen.S	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/ia32/ia32entry-xen.S	2011-02-01 14:54:13.000000000 +0100
@@ -20,18 +20,15 @@
 #define AUDIT_ARCH_I386		(EM_386|__AUDIT_ARCH_LE)
 #define __AUDIT_ARCH_LE	   0x40000000
 
-#ifndef CONFIG_AUDITSYSCALL
-#define sysexit_audit int_ret_from_sys_call
-#define sysretl_audit int_ret_from_sys_call
-#endif
-
 #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
 
 	.macro IA32_ARG_FIXUP noebp=0
 	movl	%edi,%r8d
 	.if \noebp
+	jmp	.Lia32_common
 	.else
 	movl	%ebp,%r9d
+.Lia32_common:
 	.endif
 	xchg	%ecx,%esi
 	movl	%ebx,%edi
@@ -39,12 +36,12 @@
 	.endm 
 
 	/* clobbers %eax */	
-	.macro  CLEAR_RREGS _r9=rax
+	.macro  CLEAR_RREGS offset=0, _r9=rax
 	xorl 	%eax,%eax
-	movq	%rax,R11(%rsp)
-	movq	%rax,R10(%rsp)
-	movq	%\_r9,R9(%rsp)
-	movq	%rax,R8(%rsp)
+	movq	%rax,\offset+R11(%rsp)
+	movq	%rax,\offset+R10(%rsp)
+	movq	%\_r9,\offset+R9(%rsp)
+	movq	%rax,\offset+R8(%rsp)
 	.endm
 
 	/*
@@ -142,19 +139,7 @@ ENTRY(ia32_sysenter_target)
 	orl    $TS_COMPAT,TI_status(%r10)
 	testl  $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
 	jnz  sysenter_tracesys
-	cmpl	$(IA32_NR_syscalls-1),%eax
-	ja	ia32_badsys
-sysenter_do_call:
-	IA32_ARG_FIXUP
-sysenter_dispatch:
-	call	*ia32_sys_call_table(,%rax,8)
-	movq	%rax,RAX-ARGOFFSET(%rsp)
-	GET_THREAD_INFO(%r10)
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	testl	$_TIF_ALLWORK_MASK,TI_flags(%r10)
-	jnz	sysexit_audit
-	jmp int_ret_from_sys_call
+	jmp .Lia32_check_call
 
 #ifdef CONFIG_AUDITSYSCALL
 	.macro auditsys_entry_common
@@ -175,48 +160,11 @@ sysenter_dispatch:
 	movl RDI-ARGOFFSET(%rsp),%r8d	/* reload 5th syscall arg */
 	.endm
 
-	.macro auditsys_exit exit,ebpsave=RBP
-	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
-	jnz int_ret_from_sys_call
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	movl %eax,%esi		/* second arg, syscall return value */
-	cmpl $0,%eax		/* is it < 0? */
-	setl %al		/* 1 if so, 0 if not */
-	movzbl %al,%edi		/* zero-extend that into %edi */
-	inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
-	call audit_syscall_exit
-	movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */
-	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	jmp int_with_check
-	.endm
-
 sysenter_auditsys:
 	auditsys_entry_common
 	movl %ebp,%r9d			/* reload 6th syscall arg */
-	jmp sysenter_dispatch
-
-sysexit_audit:
-	auditsys_exit sysexit_from_sys_call
-#endif
-
-sysenter_tracesys:
-#ifdef CONFIG_AUDITSYSCALL
-	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
-	jz	sysenter_auditsys
+	jmp .Lia32_dispatch
 #endif
-	SAVE_REST
-	CLEAR_RREGS
-	movq	$-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
-	movq	%rsp,%rdi        /* &pt_regs -> arg1 */
-	call	syscall_trace_enter
-	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
-	cmpl	$(IA32_NR_syscalls-1),%eax
-	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
-	jmp	sysenter_do_call
 	CFI_ENDPROC
 ENDPROC(ia32_sysenter_target)
 
@@ -272,24 +220,13 @@ ENTRY(ia32_cstar_target)
 	ja  ia32_badsys
 cstar_do_call:
 	IA32_ARG_FIXUP 1
-cstar_dispatch:
-	call *ia32_sys_call_table(,%rax,8)
-	movq %rax,RAX-ARGOFFSET(%rsp)
-	GET_THREAD_INFO(%r10)
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
-	jnz sysretl_audit
-	jmp int_ret_from_sys_call
 	
 #ifdef CONFIG_AUDITSYSCALL
 cstar_auditsys:
 	movl %r9d,R9-ARGOFFSET(%rsp)	/* register to be clobbered by call */
 	auditsys_entry_common
 	movl R9-ARGOFFSET(%rsp),%r9d	/* reload 6th syscall arg */
-	jmp cstar_dispatch
-
-sysretl_audit:
-	auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */
+	jmp .Lia32_dispatch
 #endif
 
 cstar_tracesys:
@@ -299,7 +236,7 @@ cstar_tracesys:
 #endif
 	xchgl %r9d,%ebp
 	SAVE_REST
-	CLEAR_RREGS r9
+	CLEAR_RREGS 0, r9
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
@@ -363,15 +300,23 @@ ENTRY(ia32_syscall)
 	orl   $TS_COMPAT,TI_status(%r10)
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
 	jnz ia32_tracesys
+.Lia32_check_call:
 	cmpl $(IA32_NR_syscalls-1),%eax
 	ja ia32_badsys
 ia32_do_call:
 	IA32_ARG_FIXUP
+.Lia32_dispatch:
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
 	movq %rax,RAX-ARGOFFSET(%rsp)
+	CLEAR_RREGS -ARGOFFSET
 	jmp int_ret_from_sys_call 
 
+sysenter_tracesys:
+#ifdef CONFIG_AUDITSYSCALL
+	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
+	jz sysenter_auditsys
+#endif
 ia32_tracesys:			 
 	SAVE_REST
 	CLEAR_RREGS
@@ -387,8 +332,8 @@ END(ia32_syscall)
 
 ia32_badsys:
 	movq $0,ORIG_RAX-ARGOFFSET(%rsp)
-	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
-	jmp int_ret_from_sys_call
+	movq $-ENOSYS,%rax
+	jmp ia32_sysret
 
 quiet_ni_syscall:
 	movq $-ENOSYS,%rax
@@ -482,7 +427,7 @@ ia32_sys_call_table:
 	.quad sys_mkdir
 	.quad sys_rmdir		/* 40 */
 	.quad sys_dup
-	.quad sys32_pipe
+	.quad sys_pipe
 	.quad compat_sys_times
 	.quad quiet_ni_syscall			/* old prof syscall holder */
 	.quad sys_brk		/* 45 */
@@ -776,5 +721,5 @@ ia32_sys_call_table:
 	.quad compat_sys_preadv
 	.quad compat_sys_pwritev
 	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
-	.quad sys_perf_counter_open
+	.quad sys_perf_event_open
 ia32_syscall_end:
--- 12.2.orig/arch/x86/include/asm/nmi.h	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/arch/x86/include/asm/nmi.h	2011-04-13 13:55:46.000000000 +0200
@@ -17,7 +17,10 @@ struct ctl_table;
 extern int proc_nmi_enabled(struct ctl_table *, int ,
 			void __user *, size_t *, loff_t *);
 extern int unknown_nmi_panic;
+#endif
 
+#if (defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)) || \
+    (defined(CONFIG_XEN_SMPBOOT) && CONFIG_XEN_COMPAT >= 0x030200)
 void arch_trigger_all_cpu_backtrace(void);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
 #endif
--- 12.2.orig/arch/x86/include/asm/uv/uv_hub.h	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/arch/x86/include/asm/uv/uv_hub.h	2011-02-01 14:54:13.000000000 +0100
@@ -11,7 +11,7 @@
 #ifndef _ASM_X86_UV_UV_HUB_H
 #define _ASM_X86_UV_UV_HUB_H
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_UV
 #include <linux/numa.h>
 #include <linux/percpu.h>
 #include <linux/timer.h>
--- 12.2.orig/arch/x86/include/mach-xen/asm/agp.h	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/agp.h	2011-02-01 14:54:13.000000000 +0100
@@ -45,10 +45,7 @@
  */
 #define flush_agp_cache() wbinvd()
 
-/* Convert a physical address to an address suitable for the GART. */
-#define phys_to_gart(x) phys_to_machine(x)
-#define gart_to_phys(x) machine_to_phys(x)
-#define page_to_gart(x) phys_to_gart(page_to_pseudophys(x))
+#define virt_to_gart virt_to_machine
 
 /* GATT allocation. Returns/accepts GATT kernel virtual address. */
 #define alloc_gatt_pages(order)	({                                          \
--- 12.2.orig/arch/x86/include/mach-xen/asm/desc.h	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/desc.h	2011-02-01 14:54:13.000000000 +0100
@@ -312,7 +312,14 @@ static inline void load_LDT(mm_context_t
 
 static inline unsigned long get_desc_base(const struct desc_struct *desc)
 {
-	return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24);
+	return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
+{
+	desc->base0 = base & 0xffff;
+	desc->base1 = (base >> 16) & 0xff;
+	desc->base2 = (base >> 24) & 0xff;
 }
 
 static inline unsigned long get_desc_limit(const struct desc_struct *desc)
@@ -320,6 +327,12 @@ static inline unsigned long get_desc_lim
 	return desc->limit0 | (desc->limit << 16);
 }
 
+static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
+{
+	desc->limit0 = limit & 0xffff;
+	desc->limit = (limit >> 16) & 0xf;
+}
+
 #ifndef CONFIG_X86_NO_IDT
 static inline void _set_gate(int gate, unsigned type, void *addr,
 			     unsigned dpl, unsigned ist, unsigned seg)
--- 12.2.orig/arch/x86/include/mach-xen/asm/dma-mapping.h	2011-02-01 14:39:24.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/dma-mapping.h	2011-02-01 14:54:13.000000000 +0100
@@ -1,11 +1,24 @@
 #ifndef _ASM_X86_DMA_MAPPING_H_
 
+#define phys_to_dma _phys_to_dma_
+#define dma_to_phys _dma_to_phys_
+
 #include_next <asm/dma-mapping.h>
 
-void dma_generic_free_coherent(struct device *, size_t, void *, dma_addr_t);
+#undef phys_to_dma
+#undef dma_to_phys
+
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return phys_to_machine(paddr);
+}
 
-#define address_needs_mapping(hwdev, addr, size) \
-	!is_buffer_dma_capable(dma_get_mask(hwdev), addr, size)
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	return machine_to_phys(daddr);
+}
+
+void dma_generic_free_coherent(struct device *, size_t, void *, dma_addr_t);
 
 extern int range_straddles_page_boundary(paddr_t p, size_t size);
 
--- 12.2.orig/arch/x86/include/mach-xen/asm/fixmap.h	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/fixmap.h	2011-02-01 14:54:13.000000000 +0100
@@ -139,6 +139,9 @@ enum fixed_addresses {
 #ifdef CONFIG_X86_32
 	FIX_WP_TEST,
 #endif
+#ifdef CONFIG_INTEL_TXT
+	FIX_TBOOT_BASE,
+#endif
 	__end_of_fixed_addresses
 };
 
--- 12.2.orig/arch/x86/include/mach-xen/asm/hypervisor.h	2012-06-06 14:02:48.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/hypervisor.h	2012-06-06 14:03:13.000000000 +0200
@@ -65,6 +65,7 @@ extern start_info_t *xen_start_info;
 #endif
 
 #define init_hypervisor(c) ((void)((c)->x86_hyper_vendor = X86_HYPER_VENDOR_XEN))
+#define init_hypervisor_platform() init_hypervisor(&boot_cpu_data)
 
 DECLARE_PER_CPU(struct vcpu_runstate_info, runstate);
 struct vcpu_runstate_info *setup_runstate_area(unsigned int cpu);
@@ -377,7 +378,7 @@ static inline void MULTI_bug(multicall_e
 
 #endif
 
-#define uvm_multi(cpumask) ((unsigned long)cpus_addr(cpumask) | UVMF_MULTI)
+#define uvm_multi(cpumask) ((unsigned long)cpumask_bits(cpumask) | UVMF_MULTI)
 
 #ifdef LINUX
 /* drivers/staging/ use Windows-style types, including VOID */
--- 12.2.orig/arch/x86/include/mach-xen/asm/irqflags.h	2012-05-24 09:11:56.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/irqflags.h	2011-02-01 14:54:13.000000000 +0100
@@ -1,7 +1,7 @@
 #ifndef _X86_IRQFLAGS_H_
 #define _X86_IRQFLAGS_H_
 
-#include <asm/processor-flags.h>
+#include <asm/smp-processor-id.h>
 
 #ifndef __ASSEMBLY__
 /*
--- 12.2.orig/arch/x86/include/mach-xen/asm/mmu_context.h	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/mmu_context.h	2011-02-01 14:54:13.000000000 +0100
@@ -88,12 +88,12 @@ static inline void switch_mm(struct mm_s
 		       !PagePinned(virt_to_page(next->pgd)));
 
 		/* stop flush ipis for the previous mm */
-		cpu_clear(cpu, prev->cpu_vm_mask);
+		cpumask_clear_cpu(cpu, mm_cpumask(prev));
 #if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */
 		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 		percpu_write(cpu_tlbstate.active_mm, next);
 #endif
-		cpu_set(cpu, next->cpu_vm_mask);
+		cpumask_set_cpu(cpu, mm_cpumask(next));
 
 		/* Re-load page tables: load_cr3(next->pgd) */
 		op->cmd = MMUEXT_NEW_BASEPTR;
@@ -125,7 +125,7 @@ static inline void switch_mm(struct mm_s
 		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 		BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
 
-		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+		if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
 			/* We were in lazy tlb mode and leave_mm disabled
 			 * tlb flush IPI delivery. We must reload CR3
 			 * to make sure to use no freed page tables.
--- 12.2.orig/arch/x86/include/mach-xen/asm/pci.h	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pci.h	2011-02-01 14:54:13.000000000 +0100
@@ -151,7 +151,11 @@ static inline int __pcibus_to_node(const
 static inline const struct cpumask *
 cpumask_of_pcibus(const struct pci_bus *bus)
 {
-	return cpumask_of_node(__pcibus_to_node(bus));
+	int node;
+
+	node = __pcibus_to_node(bus);
+	return (node == -1) ? cpu_online_mask :
+			      cpumask_of_node(node);
 }
 #endif
 
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgtable.h	2011-03-23 09:59:16.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pgtable.h	2011-03-23 10:00:14.000000000 +0100
@@ -50,16 +50,6 @@ extern struct list_head pgd_list;
 #define pte_update(mm, addr, ptep)              do { } while (0)
 #define pte_update_defer(mm, addr, ptep)        do { } while (0)
 
-static inline void __init paravirt_pagetable_setup_start(pgd_t *base)
-{
-	xen_pagetable_setup_start(base);
-}
-
-static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
-{
-	xen_pagetable_setup_done(base);
-}
-
 #define pgd_val(x)	xen_pgd_val(x)
 #define __pgd(x)	xen_make_pgd(x)
 
@@ -131,6 +121,11 @@ static inline int pte_special(pte_t pte)
 
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
 
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+	return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
 static inline int pmd_large(pmd_t pte)
 {
 	return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
@@ -359,7 +354,7 @@ static inline unsigned long pmd_page_vad
  * this macro returns the index of the entry in the pmd page which would
  * control the given virtual address
  */
-static inline unsigned pmd_index(unsigned long address)
+static inline unsigned long pmd_index(unsigned long address)
 {
 	return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
 }
@@ -379,7 +374,7 @@ static inline unsigned pmd_index(unsigne
  * this function returns the index of the entry in the pte page which would
  * control the given virtual address
  */
-static inline unsigned pte_index(unsigned long address)
+static inline unsigned long pte_index(unsigned long address)
 {
 	return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
 }
@@ -435,11 +430,6 @@ static inline pmd_t *pmd_offset(pud_t *p
 	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
 }
 
-static inline unsigned long pmd_pfn(pmd_t pmd)
-{
-	return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
-}
-
 static inline int pud_large(pud_t pud)
 {
 	return (__pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
@@ -475,7 +465,7 @@ static inline unsigned long pgd_page_vad
 #define pgd_page(pgd)		pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
 
 /* to find an entry in a page-table-directory. */
-static inline unsigned pud_index(unsigned long address)
+static inline unsigned long pud_index(unsigned long address)
 {
 	return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
 }
@@ -596,7 +586,7 @@ extern int ptep_clear_flush_young(struct
 	if (!pte_none(__res) &&					\
 	    ((vma)->vm_mm != current->mm ||			\
 	     HYPERVISOR_update_va_mapping(addr,	__pte(0),	\
-			uvm_multi((vma)->vm_mm->cpu_vm_mask) |	\
+			uvm_multi(mm_cpumask((vma)->vm_mm)) |	\
 				UVMF_INVLPG))) {		\
 		__xen_pte_clear(__ptep);			\
 		flush_tlb_page(vma, addr);			\
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgtable_types.h	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pgtable_types.h	2011-02-01 14:54:13.000000000 +0100
@@ -334,6 +334,7 @@ static inline pteval_t pte_flags(pte_t p
 typedef struct page *pgtable_t;
 
 extern pteval_t __supported_pte_mask;
+extern void set_nx(void);
 extern int nx_enabled;
 
 #define pgprot_writecombine	pgprot_writecombine
@@ -354,14 +355,6 @@ int phys_mem_access_prot_allowed(struct 
 /* Install a pte for a particular vaddr in kernel space. */
 void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 
-#ifndef CONFIG_XEN
-extern void native_pagetable_setup_start(pgd_t *base);
-extern void native_pagetable_setup_done(pgd_t *base);
-#else
-static inline void xen_pagetable_setup_start(pgd_t *base) {}
-static inline void xen_pagetable_setup_done(pgd_t *base) {}
-#endif
-
 struct seq_file;
 extern void arch_report_meminfo(struct seq_file *m);
 
--- 12.2.orig/arch/x86/include/mach-xen/asm/processor.h	2011-03-03 16:46:07.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/processor.h	2012-05-23 09:35:22.000000000 +0200
@@ -27,6 +27,7 @@ struct mm_struct;
 #include <linux/cpumask.h>
 #include <linux/cache.h>
 #include <linux/threads.h>
+#include <linux/math64.h>
 #include <linux/init.h>
 #include <xen/interface/physdev.h>
 
@@ -421,7 +422,17 @@ extern unsigned long kernel_eflags;
 extern asmlinkage void ignore_sysret(void);
 #else	/* X86_64 */
 #ifdef CONFIG_CC_STACKPROTECTOR
-DECLARE_PER_CPU(unsigned long, stack_canary);
+/*
+ * Make sure stack canary segment base is cached-aligned:
+ *   "For Intel Atom processors, avoid non zero segment base address
+ *    that is not aligned to cache line boundary at all cost."
+ * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
+ */
+struct stack_canary {
+	char __pad[20];		/* canary at %gs:20 */
+	unsigned long canary;
+};
+DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
 #endif	/* X86_64 */
 
@@ -657,13 +668,23 @@ static inline void cpu_relax(void)
 	rep_nop();
 }
 
-/* Stop speculative execution: */
+/* Stop speculative execution and prefetching of modified code. */
 static inline void sync_core(void)
 {
 	int tmp;
 
-	asm volatile("cpuid" : "=a" (tmp) : "0" (1)
-		     : "ebx", "ecx", "edx", "memory");
+#if defined(CONFIG_M386) || defined(CONFIG_M486)
+	if (boot_cpu_data.x86 < 5)
+		/* There is no speculative execution.
+		 * jmp is a barrier to prefetching. */
+		asm volatile("jmp 1f\n1:\n" ::: "memory");
+	else
+#endif
+		/* cpuid is a barrier to speculative execution.
+		 * Prefetched instructions are automatically
+		 * invalidated when modified. */
+		asm volatile("cpuid" : "=a" (tmp) : "0" (1)
+			     : "ebx", "ecx", "edx", "memory");
 }
 
 static inline void __monitor(const void *eax, unsigned long ecx,
@@ -954,4 +975,37 @@ extern void start_thread(struct pt_regs 
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
+extern int amd_get_nb_id(int cpu);
+
+#ifndef CONFIG_XEN
+struct aperfmperf {
+	u64 aperf, mperf;
+};
+
+static inline void get_aperfmperf(struct aperfmperf *am)
+{
+	WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF));
+
+	rdmsrl(MSR_IA32_APERF, am->aperf);
+	rdmsrl(MSR_IA32_MPERF, am->mperf);
+}
+
+#define APERFMPERF_SHIFT 10
+
+static inline
+unsigned long calc_aperfmperf_ratio(struct aperfmperf *old,
+				    struct aperfmperf *new)
+{
+	u64 aperf = new->aperf - old->aperf;
+	u64 mperf = new->mperf - old->mperf;
+	unsigned long ratio = aperf;
+
+	mperf >>= APERFMPERF_SHIFT;
+	if (mperf)
+		ratio = div64_u64(aperf, mperf);
+
+	return ratio;
+}
+#endif
+
 #endif /* _ASM_X86_PROCESSOR_H */
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ 12.2/arch/x86/include/mach-xen/asm/setup.h	2011-02-01 14:54:13.000000000 +0100
@@ -0,0 +1,8 @@
+#ifndef __ASSEMBLY__
+
+void xen_start_kernel(void);
+void xen_arch_setup(void);
+
+#endif
+
+#include_next <asm/setup.h>
--- 12.2.orig/arch/x86/include/mach-xen/asm/smp.h	2011-04-13 13:55:08.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/smp.h	2011-03-03 16:08:16.000000000 +0100
@@ -24,8 +24,8 @@ extern unsigned int num_processors;
 DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
 DECLARE_PER_CPU(u16, cpu_llc_id);
-#endif
 DECLARE_PER_CPU(int, cpu_number);
+#endif
 
 static inline const struct cpumask *cpu_sibling_mask(int cpu)
 {
@@ -124,7 +124,6 @@ static inline void arch_send_call_functi
 	smp_ops.send_call_func_single_ipi(cpu);
 }
 
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
 static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 {
 	smp_ops.send_call_func_ipi(mask);
@@ -170,27 +169,7 @@ static inline int num_booting_cpus(void)
 
 extern unsigned disabled_cpus __cpuinitdata;
 
-#ifdef CONFIG_X86_32_SMP
-/*
- * This function is needed by all SMP systems. It must _always_ be valid
- * from the initial startup. We map APIC_BASE very early in page_setup(),
- * so this is correct in the x86 case.
- */
-#define raw_smp_processor_id() (percpu_read(cpu_number))
-#define safe_smp_processor_id() smp_processor_id()
-
-#elif defined(CONFIG_X86_64_SMP)
-#define raw_smp_processor_id() (percpu_read(cpu_number))
-
-#define stack_smp_processor_id()					\
-({								\
-	struct thread_info *ti;						\
-	__asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));	\
-	ti->cpu;							\
-})
-#define safe_smp_processor_id()		smp_processor_id()
-
-#endif
+#include <asm/smp-processor-id.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ 12.2/arch/x86/include/mach-xen/asm/smp-processor-id.h	2011-02-01 14:54:13.000000000 +0100
@@ -0,0 +1,36 @@
+#ifndef _ASM_X86_SMP_PROCESSOR_ID_H
+#define _ASM_X86_SMP_PROCESSOR_ID_H
+
+#if defined(CONFIG_SMP) && !defined(__ASSEMBLY__)
+
+#include <asm/percpu.h>
+
+DECLARE_PER_CPU(int, cpu_number);
+
+/*
+ * This function is needed by all SMP systems. It must _always_ be valid
+ * from the initial startup. We map APIC_BASE very early in page_setup(),
+ * so this is correct in the x86 case.
+ */
+#define raw_smp_processor_id() percpu_read(cpu_number)
+#define safe_smp_processor_id() smp_processor_id()
+
+#ifdef CONFIG_X86_64_SMP
+#define stack_smp_processor_id()					\
+({									\
+	struct thread_info *ti;						\
+	__asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));	\
+	ti->cpu;							\
+})
+#endif
+
+#ifdef CONFIG_DEBUG_PREEMPT
+extern unsigned int debug_smp_processor_id(void);
+# define smp_processor_id() debug_smp_processor_id()
+#else
+# define smp_processor_id() raw_smp_processor_id()
+#endif
+
+#endif /* SMP && !__ASSEMBLY__ */
+
+#endif /* _ASM_X86_SMP_PROCESSOR_ID_H */
--- 12.2.orig/arch/x86/include/mach-xen/asm/time.h	2011-07-11 11:48:04.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/time.h	2011-07-11 12:25:07.000000000 +0200
@@ -2,6 +2,7 @@
 #define _XEN_ASM_TIME_H
 
 unsigned long xen_read_wallclock(void);
+int xen_write_wallclock(unsigned long);
 
 struct timespec;
 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
--- 12.2.orig/arch/x86/include/mach-xen/asm/tlbflush.h	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/tlbflush.h	2011-02-01 14:54:13.000000000 +0100
@@ -74,9 +74,9 @@ static inline void reset_lazy_tlbstate(v
 #define local_flush_tlb() __flush_tlb()
 
 #define flush_tlb_all xen_tlb_flush_all
-#define flush_tlb_current_task() xen_tlb_flush_mask(&current->mm->cpu_vm_mask)
-#define flush_tlb_mm(mm) xen_tlb_flush_mask(&(mm)->cpu_vm_mask)
-#define flush_tlb_page(vma, va) xen_invlpg_mask(&(vma)->vm_mm->cpu_vm_mask, va)
+#define flush_tlb_current_task() xen_tlb_flush_mask(mm_cpumask(current->mm))
+#define flush_tlb_mm(mm) xen_tlb_flush_mask(mm_cpumask(mm))
+#define flush_tlb_page(vma, va) xen_invlpg_mask(mm_cpumask((vma)->vm_mm), va)
 
 #define flush_tlb()	flush_tlb_current_task()
 
--- 12.2.orig/arch/x86/kernel/Makefile	2012-04-10 17:00:42.000000000 +0200
+++ 12.2/arch/x86/kernel/Makefile	2012-04-10 17:01:49.000000000 +0200
@@ -114,8 +114,6 @@ ifeq ($(CONFIG_X86_64),y)
 
 	obj-$(CONFIG_PCI_MMCONFIG)	+= mmconf-fam10h_64.o
 	obj-y				+= vsmp_64.o
-
-	time_64-$(CONFIG_XEN)		+= time_32.o
 endif
 
 disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8237.o \
--- 12.2.orig/arch/x86/kernel/apic/hw_nmi.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/arch/x86/kernel/apic/hw_nmi.c	2011-04-13 13:55:59.000000000 +0200
@@ -26,6 +26,10 @@ u64 hw_nmi_get_sample_period(int watchdo
 #endif
 
 #ifdef arch_trigger_all_cpu_backtrace
+#ifdef CONFIG_XEN
+#include <asm/ipi.h>
+#endif
+
 /* For reliability, we're prepared to waste bits here. */
 static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
 
@@ -46,7 +50,11 @@ void arch_trigger_all_cpu_backtrace(void
 	cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
 
 	printk(KERN_INFO "sending NMI to all CPUs:\n");
+#ifndef CONFIG_XEN
 	apic->send_IPI_all(NMI_VECTOR);
+#else /* this works even without CONFIG_X86_LOCAL_APIC */
+	xen_send_IPI_all(NMI_VECTOR);
+#endif
 
 	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
 	for (i = 0; i < 10 * 1000; i++) {
--- 12.2.orig/arch/x86/kernel/apic/io_apic-xen.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/kernel/apic/io_apic-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -69,6 +69,8 @@ unsigned long io_apic_irqs;
 #endif /* CONFIG_XEN */
 
 #define __apicdebuginit(type) static type __init
+#define for_each_irq_pin(entry, head) \
+	for (entry = head; entry; entry = entry->next)
 
 /*
  *      Is the SiS APIC rmw bug present ?
@@ -90,12 +92,24 @@ int nr_ioapic_registers[MAX_IO_APICS];
 struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
 
+/* IO APIC gsi routing info */
+struct mp_ioapic_gsi  mp_gsi_routing[MAX_IO_APICS];
+
 /* MP IRQ source entries */
 struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
 /* # of MP IRQ source entries */
 int mp_irq_entries;
 
+#ifndef CONFIG_XEN
+/* Number of legacy interrupts */
+static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY;
+/* GSI interrupts */
+static int nr_irqs_gsi = NR_IRQS_LEGACY;
+#else
+#define nr_legacy_irqs NR_IRQS_LEGACY
+#endif
+
 #if defined (CONFIG_MCA) || defined (CONFIG_EISA)
 int mp_bus_id_to_type[MAX_MP_BUSSES];
 #endif
@@ -122,15 +136,6 @@ static int __init parse_noapic(char *str
 early_param("noapic", parse_noapic);
 
 #ifndef CONFIG_XEN
-struct irq_pin_list;
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
 struct irq_pin_list {
 	int apic, pin;
 	struct irq_pin_list *next;
@@ -145,6 +150,11 @@ static struct irq_pin_list *get_one_free
 	return pin;
 }
 
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * Most irqs are mapped 1:1 with pins.
+ */
 struct irq_cfg {
 	struct irq_pin_list *irq_2_pin;
 	cpumask_var_t domain;
@@ -178,6 +188,12 @@ static struct irq_cfg irq_cfgx[NR_IRQS] 
 	[15] = { .vector = IRQ15_VECTOR, },
 };
 
+void __init io_apic_disable_legacy(void)
+{
+	nr_legacy_irqs = 0;
+	nr_irqs_gsi = 0;
+}
+
 int __init arch_early_irq_init(void)
 {
 	struct irq_cfg *cfg;
@@ -195,7 +211,7 @@ int __init arch_early_irq_init(void)
 		desc->chip_data = &cfg[i];
 		zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
 		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
-		if (i < NR_IRQS_LEGACY)
+		if (i < nr_legacy_irqs)
 			cpumask_setall(cfg[i].domain);
 	}
 
@@ -221,17 +237,14 @@ static struct irq_cfg *get_one_free_irq_
 
 	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
 	if (cfg) {
-		if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
+		if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
 			kfree(cfg);
 			cfg = NULL;
-		} else if (!alloc_cpumask_var_node(&cfg->old_domain,
+		} else if (!zalloc_cpumask_var_node(&cfg->old_domain,
 							  GFP_ATOMIC, node)) {
 			free_cpumask_var(cfg->domain);
 			kfree(cfg);
 			cfg = NULL;
-		} else {
-			cpumask_clear(cfg->domain);
-			cpumask_clear(cfg->old_domain);
 		}
 	}
 
@@ -445,13 +458,10 @@ static bool io_apic_level_ack_pending(st
 	unsigned long flags;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	entry = cfg->irq_2_pin;
-	for (;;) {
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		unsigned int reg;
 		int pin;
 
-		if (!entry)
-			break;
 		pin = entry->pin;
 		reg = io_apic_read(entry->apic, 0x10 + pin*2);
 		/* Is the remote IRR bit set? */
@@ -459,9 +469,6 @@ static bool io_apic_level_ack_pending(st
 			spin_unlock_irqrestore(&ioapic_lock, flags);
 			return true;
 		}
-		if (!entry->next)
-			break;
-		entry = entry->next;
 	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
@@ -533,72 +540,68 @@ static void ioapic_mask_entry(int apic, 
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+static int
+add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
 {
-	struct irq_pin_list *entry;
-
-	entry = cfg->irq_2_pin;
-	if (!entry) {
-		entry = get_one_free_irq_2_pin(node);
-		if (!entry) {
-			printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
-					apic, pin);
-			return;
-		}
-		cfg->irq_2_pin = entry;
-		entry->apic = apic;
-		entry->pin = pin;
-		return;
-	}
+	struct irq_pin_list **last, *entry;
 
-	while (entry->next) {
-		/* not again, please */
+	/* don't allow duplicates */
+	last = &cfg->irq_2_pin;
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		if (entry->apic == apic && entry->pin == pin)
-			return;
-
-		entry = entry->next;
+			return 0;
+		last = &entry->next;
 	}
 
-	entry->next = get_one_free_irq_2_pin(node);
-	entry = entry->next;
+	entry = get_one_free_irq_2_pin(node);
+	if (!entry) {
+		printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
+				node, apic, pin);
+		return -ENOMEM;
+	}
 	entry->apic = apic;
 	entry->pin = pin;
+
+	*last = entry;
+	return 0;
+}
+
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+{
+	if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
+		panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
 }
 
 /*
  * Reroute an IRQ to a different pin.
  */
 static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
-				      int oldapic, int oldpin,
-				      int newapic, int newpin)
+					   int oldapic, int oldpin,
+					   int newapic, int newpin)
 {
-	struct irq_pin_list *entry = cfg->irq_2_pin;
-	int replaced = 0;
+	struct irq_pin_list *entry;
 
-	while (entry) {
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		if (entry->apic == oldapic && entry->pin == oldpin) {
 			entry->apic = newapic;
 			entry->pin = newpin;
-			replaced = 1;
 			/* every one is different, right? */
-			break;
+			return;
 		}
-		entry = entry->next;
 	}
 
-	/* why? call replace before add? */
-	if (!replaced)
-		add_pin_to_irq_node(cfg, node, newapic, newpin);
+	/* old apic/pin didn't exist, so just add new ones */
+	add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
-static inline void io_apic_modify_irq(struct irq_cfg *cfg,
-				int mask_and, int mask_or,
-				void (*final)(struct irq_pin_list *entry))
+static void io_apic_modify_irq(struct irq_cfg *cfg,
+			       int mask_and, int mask_or,
+			       void (*final)(struct irq_pin_list *entry))
 {
 	int pin;
 	struct irq_pin_list *entry;
 
-	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		unsigned int reg;
 		pin = entry->pin;
 		reg = io_apic_read(entry->apic, 0x10 + pin * 2);
@@ -615,7 +618,6 @@ static void __unmask_IO_APIC_irq(struct 
 	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
 }
 
-#ifdef CONFIG_X86_64
 static void io_apic_sync(struct irq_pin_list *entry)
 {
 	/*
@@ -631,11 +633,6 @@ static void __mask_IO_APIC_irq(struct ir
 {
 	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 }
-#else /* CONFIG_X86_32 */
-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
-{
-	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
-}
 
 static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
 {
@@ -648,7 +645,6 @@ static void __unmask_and_level_IO_APIC_i
 	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
 			IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
 }
-#endif /* CONFIG_X86_32 */
 
 static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
 {
@@ -709,6 +705,7 @@ static void clear_IO_APIC (void)
 }
 #else
 #define add_pin_to_irq_node(cfg, node, apic, pin)
+#define add_pin_to_irq_node_nopanic(cfg, node, apic, pin) 0
 #endif /* !CONFIG_XEN */
 
 #ifdef CONFIG_X86_32
@@ -925,7 +922,7 @@ static int __init find_isa_irq_apic(int 
  */
 static int EISA_ELCR(unsigned int irq)
 {
-	if (irq < NR_IRQS_LEGACY) {
+	if (irq < nr_legacy_irqs) {
 		unsigned int port = 0x4d0 + (irq >> 3);
 		return (inb(port) >> (irq & 7)) & 1;
 	}
@@ -1538,7 +1535,7 @@ static void setup_IO_APIC_irq(int apic_i
 	}
 
 	ioapic_register_intr(irq, desc, trigger);
-	if (irq < NR_IRQS_LEGACY)
+	if (irq < nr_legacy_irqs)
 		disable_8259A_irq(irq);
 
 	ioapic_write_entry(apic_id, pin, entry);
@@ -1766,12 +1763,8 @@ __apicdebuginit(void) print_IO_APIC(void
 		if (!entry)
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", irq);
-		for (;;) {
+		for_each_irq_pin(entry, cfg->irq_2_pin)
 			printk("-> %d:%d", entry->apic, entry->pin);
-			if (!entry->next)
-				break;
-			entry = entry->next;
-		}
 		printk("\n");
 	}
 
@@ -1915,7 +1908,7 @@ __apicdebuginit(void) print_PIC(void)
 	unsigned int v;
 	unsigned long flags;
 
-	if (apic_verbosity == APIC_QUIET)
+	if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs)
 		return;
 
 	printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1947,7 +1940,7 @@ __apicdebuginit(int) print_all_ICs(void)
 	print_PIC();
 
 	/* don't print out if apic is not there */
-	if (!cpu_has_apic || disable_apic)
+	if (!cpu_has_apic && !apic_from_smp_config())
 		return 0;
 
 	print_all_local_APICs();
@@ -1981,6 +1974,10 @@ void __init enable_IO_APIC(void)
 		spin_unlock_irqrestore(&ioapic_lock, flags);
 		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
 	}
+
+	if (!nr_legacy_irqs)
+		return;
+
 #ifndef CONFIG_XEN
 	for(apic = 0; apic < nr_ioapics; apic++) {
 		int pin;
@@ -2038,6 +2035,9 @@ void disable_IO_APIC(void)
 	 */
 	clear_IO_APIC();
 
+	if (!nr_legacy_irqs)
+		return;
+
 	/*
 	 * If the i8259 is routed through an IOAPIC
 	 * Put that IOAPIC in virtual wire mode
@@ -2071,7 +2071,7 @@ void disable_IO_APIC(void)
 	/*
 	 * Use virtual wire A mode when interrupt remapping is enabled.
 	 */
-	if (cpu_has_apic)
+	if (cpu_has_apic || apic_from_smp_config())
 		disconnect_bsp_APIC(!intr_remapping_enabled &&
 				ioapic_i8259.pin != -1);
 }
@@ -2084,7 +2084,7 @@ void disable_IO_APIC(void)
  * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
  */
 
-static void __init setup_ioapic_ids_from_mpc(void)
+void __init setup_ioapic_ids_from_mpc(void)
 {
 	union IO_APIC_reg_00 reg_00;
 	physid_mask_t phys_id_present_map;
@@ -2093,9 +2093,8 @@ static void __init setup_ioapic_ids_from
 	unsigned char old_id;
 	unsigned long flags;
 
-	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+	if (acpi_ioapic)
 		return;
-
 	/*
 	 * Don't check I/O APIC IDs for xAPIC systems.  They have
 	 * no meaning without the serial APIC bus.
@@ -2269,7 +2268,7 @@ static unsigned int startup_ioapic_irq(u
 	struct irq_cfg *cfg;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	if (irq < NR_IRQS_LEGACY) {
+	if (irq < nr_legacy_irqs) {
 		disable_8259A_irq(irq);
 		if (i8259A_irq_pending(irq))
 			was_pending = 1;
@@ -2281,7 +2280,6 @@ static unsigned int startup_ioapic_irq(u
 	return was_pending;
 }
 
-#ifdef CONFIG_X86_64
 static int ioapic_retrigger_irq(unsigned int irq)
 {
 
@@ -2294,14 +2292,6 @@ static int ioapic_retrigger_irq(unsigned
 
 	return 1;
 }
-#else
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-	apic->send_IPI_self(irq_cfg(irq)->vector);
-
-	return 1;
-}
-#endif
 
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
@@ -2339,13 +2329,9 @@ static void __target_IO_APIC_irq(unsigne
 	struct irq_pin_list *entry;
 	u8 vector = cfg->vector;
 
-	entry = cfg->irq_2_pin;
-	for (;;) {
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		unsigned int reg;
 
-		if (!entry)
-			break;
-
 		apic = entry->apic;
 		pin = entry->pin;
 		/*
@@ -2358,9 +2344,6 @@ static void __target_IO_APIC_irq(unsigne
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg |= vector;
 		io_apic_modify(apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
 	}
 }
 
@@ -2585,11 +2568,8 @@ atomic_t irq_mis_count;
 static void ack_apic_level(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-
-#ifdef CONFIG_X86_32
 	unsigned long v;
 	int i;
-#endif
 	struct irq_cfg *cfg;
 	int do_unmask_irq = 0;
 
@@ -2602,31 +2582,28 @@ static void ack_apic_level(unsigned int 
 	}
 #endif
 
-#ifdef CONFIG_X86_32
 	/*
-	* It appears there is an erratum which affects at least version 0x11
-	* of I/O APIC (that's the 82093AA and cores integrated into various
-	* chipsets).  Under certain conditions a level-triggered interrupt is
-	* erroneously delivered as edge-triggered one but the respective IRR
-	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
-	* message but it will never arrive and further interrupts are blocked
-	* from the source.  The exact reason is so far unknown, but the
-	* phenomenon was observed when two consecutive interrupt requests
-	* from a given source get delivered to the same CPU and the source is
-	* temporarily disabled in between.
-	*
-	* A workaround is to simulate an EOI message manually.  We achieve it
-	* by setting the trigger mode to edge and then to level when the edge
-	* trigger mode gets detected in the TMR of a local APIC for a
-	* level-triggered interrupt.  We mask the source for the time of the
-	* operation to prevent an edge-triggered interrupt escaping meanwhile.
-	* The idea is from Manfred Spraul.  --macro
-	*/
+	 * It appears there is an erratum which affects at least version 0x11
+	 * of I/O APIC (that's the 82093AA and cores integrated into various
+	 * chipsets).  Under certain conditions a level-triggered interrupt is
+	 * erroneously delivered as edge-triggered one but the respective IRR
+	 * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+	 * message but it will never arrive and further interrupts are blocked
+	 * from the source.  The exact reason is so far unknown, but the
+	 * phenomenon was observed when two consecutive interrupt requests
+	 * from a given source get delivered to the same CPU and the source is
+	 * temporarily disabled in between.
+	 *
+	 * A workaround is to simulate an EOI message manually.  We achieve it
+	 * by setting the trigger mode to edge and then to level when the edge
+	 * trigger mode gets detected in the TMR of a local APIC for a
+	 * level-triggered interrupt.  We mask the source for the time of the
+	 * operation to prevent an edge-triggered interrupt escaping meanwhile.
+	 * The idea is from Manfred Spraul.  --macro
+	 */
 	cfg = desc->chip_data;
 	i = cfg->vector;
-
 	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-#endif
 
 	/*
 	 * We must acknowledge the irq before we move it or the acknowledge will
@@ -2668,7 +2645,7 @@ static void ack_apic_level(unsigned int 
 		unmask_IO_APIC_irq_desc(desc);
 	}
 
-#ifdef CONFIG_X86_32
+	/* Tail end of version 0x11 I/O APIC bug workaround */
 	if (!(v & (1 << (i & 0x1f)))) {
 		atomic_inc(&irq_mis_count);
 		spin_lock(&ioapic_lock);
@@ -2676,26 +2653,15 @@ static void ack_apic_level(unsigned int 
 		__unmask_and_level_IO_APIC_irq(cfg);
 		spin_unlock(&ioapic_lock);
 	}
-#endif
 }
 
 #ifdef CONFIG_INTR_REMAP
 static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
 {
-	int apic, pin;
 	struct irq_pin_list *entry;
 
-	entry = cfg->irq_2_pin;
-	for (;;) {
-
-		if (!entry)
-			break;
-
-		apic = entry->apic;
-		pin = entry->pin;
-		io_apic_eoi(apic, pin);
-		entry = entry->next;
-	}
+	for_each_irq_pin(entry, cfg->irq_2_pin)
+		io_apic_eoi(entry->apic, entry->pin);
 }
 
 static void
@@ -2785,7 +2751,7 @@ static inline void init_IO_APIC_traps(vo
 			 * so default to an old-fashioned 8259
 			 * interrupt if we can..
 			 */
-			if (irq < NR_IRQS_LEGACY)
+			if (irq < nr_legacy_irqs)
 				make_8259A_irq(irq);
 			else
 				/* Strange. Oh, well.. */
@@ -3125,7 +3091,7 @@ out:
  * the I/O APIC in all cases now.  No actual device should request
  * it anyway.  --macro
  */
-#define PIC_IRQS	(1 << PIC_CASCADE_IR)
+#define PIC_IRQS	(1UL << PIC_CASCADE_IR)
 
 void __init setup_IO_APIC(void)
 {
@@ -3137,23 +3103,21 @@ void __init setup_IO_APIC(void)
 	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
 	 */
 #endif
-
-	io_apic_irqs = ~PIC_IRQS;
+	io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
 
 	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
 	/*
          * Set up IO-APIC IRQ routing.
          */
 #ifndef CONFIG_XEN
-#ifdef CONFIG_X86_32
-	if (!acpi_ioapic)
-		setup_ioapic_ids_from_mpc();
-#endif
+	x86_init.mpparse.setup_ioapic_ids();
+
 	sync_Arb_IDs();
 #endif
 	setup_IO_APIC_irqs();
 	init_IO_APIC_traps();
-	check_timer();
+	if (nr_legacy_irqs)
+		check_timer();
 }
 
 /*
@@ -3263,7 +3227,6 @@ static int __init ioapic_init_sysfs(void
 
 device_initcall(ioapic_init_sysfs);
 
-static int nr_irqs_gsi = NR_IRQS_LEGACY;
 /*
  * Dynamic irq allocate and deallocation
  */
@@ -3335,8 +3298,7 @@ void destroy_irq(unsigned int irq)
 	cfg = desc->chip_data;
 	dynamic_irq_cleanup(irq);
 	/* connect back irq_cfg */
-	if (desc)
-		desc->chip_data = cfg;
+	desc->chip_data = cfg;
 
 	free_irte(irq);
 	spin_lock_irqsave(&vector_lock, flags);
@@ -4014,9 +3976,13 @@ static int __io_apic_set_pci_routing(str
 	/*
 	 * IRQs < 16 are already in the irq_2_pin[] map
 	 */
-	if (irq >= NR_IRQS_LEGACY) {
+	if (irq >= nr_legacy_irqs) {
 		cfg = desc->chip_data;
-		add_pin_to_irq_node(cfg, node, ioapic, pin);
+		if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
+			printk(KERN_INFO "can not add pin %d for irq %d\n",
+				pin, irq);
+			return 0;
+		}
 	}
 
 	setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
@@ -4045,11 +4011,30 @@ int io_apic_set_pci_routing(struct devic
 	return __io_apic_set_pci_routing(dev, irq, irq_attr);
 }
 
-/* --------------------------------------------------------------------------
-                          ACPI-based IOAPIC Configuration
-   -------------------------------------------------------------------------- */
+u8 __init io_apic_unique_id(u8 id)
+{
+#ifdef CONFIG_X86_32
+#ifndef CONFIG_XEN
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		return io_apic_get_unique_id(nr_ioapics, id);
+	else
+#endif
+		return id;
+#else
+	int i;
+	DECLARE_BITMAP(used, 256);
 
-#ifdef CONFIG_ACPI
+	bitmap_zero(used, 256);
+	for (i = 0; i < nr_ioapics; i++) {
+		struct mpc_ioapic *ia = &mp_ioapics[i];
+		__set_bit(ia->apicid, used);
+	}
+	if (!test_bit(id, used))
+		return id;
+	return find_first_zero_bit(used, 256);
+#endif
+}
 
 #if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
 int __init io_apic_get_unique_id(int ioapic, int apic_id)
@@ -4158,8 +4143,6 @@ int acpi_get_override_irq(int bus_irq, i
 	return 0;
 }
 
-#endif /* CONFIG_ACPI */
-
 #ifndef CONFIG_XEN
 /*
  * This function currently is only a helper for the i386 smp boot process where
@@ -4214,7 +4197,7 @@ void __init setup_ioapic_dest(void)
 
 static struct resource *ioapic_resources;
 
-static struct resource * __init ioapic_setup_resources(void)
+static struct resource * __init ioapic_setup_resources(int nr_ioapics)
 {
 	unsigned long n;
 	struct resource *res;
@@ -4230,15 +4213,13 @@ static struct resource * __init ioapic_s
 	mem = alloc_bootmem(n);
 	res = (void *)mem;
 
-	if (mem != NULL) {
-		mem += sizeof(struct resource) * nr_ioapics;
+	mem += sizeof(struct resource) * nr_ioapics;
 
-		for (i = 0; i < nr_ioapics; i++) {
-			res[i].name = mem;
-			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-			sprintf(mem,  "IOAPIC %u", i);
-			mem += IOAPIC_RESOURCE_NAME_SIZE;
-		}
+	for (i = 0; i < nr_ioapics; i++) {
+		res[i].name = mem;
+		res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		sprintf(mem,  "IOAPIC %u", i);
+		mem += IOAPIC_RESOURCE_NAME_SIZE;
 	}
 
 	ioapic_resources = res;
@@ -4252,7 +4233,7 @@ void __init ioapic_init_mappings(void)
 	struct resource *ioapic_res;
 	int i;
 
-	ioapic_res = ioapic_setup_resources();
+	ioapic_res = ioapic_setup_resources(nr_ioapics);
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
 			ioapic_phys = mp_ioapics[i].apicaddr;
@@ -4281,11 +4262,9 @@ fake_ioapic_page:
 			    __fix_to_virt(idx), ioapic_phys);
 		idx++;
 
-		if (ioapic_res != NULL) {
-			ioapic_res->start = ioapic_phys;
-			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
-			ioapic_res++;
-		}
+		ioapic_res->start = ioapic_phys;
+		ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+		ioapic_res++;
 	}
 }
 
@@ -4307,3 +4286,78 @@ void __init ioapic_insert_resources(void
 	}
 }
 #endif /* !CONFIG_XEN */
+
+int mp_find_ioapic(int gsi)
+{
+	int i = 0;
+
+	/* Find the IOAPIC that manages this GSI. */
+	for (i = 0; i < nr_ioapics; i++) {
+		if ((gsi >= mp_gsi_routing[i].gsi_base)
+		    && (gsi <= mp_gsi_routing[i].gsi_end))
+			return i;
+	}
+
+	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+	return -1;
+}
+
+int mp_find_ioapic_pin(int ioapic, int gsi)
+{
+	if (WARN_ON(ioapic == -1))
+		return -1;
+	if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end))
+		return -1;
+
+	return gsi - mp_gsi_routing[ioapic].gsi_base;
+}
+
+static int bad_ioapic(unsigned long address)
+{
+	if (nr_ioapics >= MAX_IO_APICS) {
+		printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded "
+		       "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
+		return 1;
+	}
+	if (!address) {
+		printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
+		       " found in table, skipping!\n");
+		return 1;
+	}
+	return 0;
+}
+
+void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
+{
+	int idx = 0;
+
+	if (bad_ioapic(address))
+		return;
+
+	idx = nr_ioapics;
+
+	mp_ioapics[idx].type = MP_IOAPIC;
+	mp_ioapics[idx].flags = MPC_APIC_USABLE;
+	mp_ioapics[idx].apicaddr = address;
+
+#ifndef CONFIG_XEN
+	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+#endif
+	mp_ioapics[idx].apicid = io_apic_unique_id(id);
+	mp_ioapics[idx].apicver = io_apic_get_version(idx);
+
+	/*
+	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+	 */
+	mp_gsi_routing[idx].gsi_base = gsi_base;
+	mp_gsi_routing[idx].gsi_end = gsi_base +
+	    io_apic_get_redir_entries(idx);
+
+	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+	       "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
+	       mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
+	       mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end);
+
+	nr_ioapics++;
+}
--- 12.2.orig/arch/x86/kernel/apic/ipi-xen.c	2011-02-21 13:57:27.000000000 +0100
+++ 12.2/arch/x86/kernel/apic/ipi-xen.c	2011-03-18 11:29:10.000000000 +0100
@@ -12,6 +12,16 @@ DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]
 static inline void __send_IPI_one(unsigned int cpu, int vector)
 {
 	int irq = per_cpu(ipi_to_irq, cpu)[vector];
+
+	if (vector == NMI_VECTOR) {
+		static int __read_mostly printed;
+		int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL);
+
+		if (rc && !printed)
+			pr_warning("Unable (%d) to send NMI to CPU#%u\n",
+				   printed = rc, cpu);
+		return;
+	}
 	BUG_ON(irq < 0);
 	notify_remote_via_irq(irq);
 }
--- 12.2.orig/arch/x86/kernel/cpu/Makefile	2012-02-16 12:36:52.000000000 +0100
+++ 12.2/arch/x86/kernel/cpu/Makefile	2012-02-16 13:32:56.000000000 +0100
@@ -40,7 +40,7 @@ obj-$(CONFIG_MTRR)			+= mtrr/
 
 obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o perf_event_amd_ibs.o
 
-disabled-obj-$(CONFIG_XEN) := hypervisor.o perfctr-watchdog.o vmware.o
+disabled-obj-$(CONFIG_XEN) := hypervisor.o perfctr-watchdog.o sched.o vmware.o
 
 quiet_cmd_mkcapflags = MKCAP   $@
       cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
--- 12.2.orig/arch/x86/kernel/cpu/amd.c	2012-05-08 10:52:08.000000000 +0200
+++ 12.2/arch/x86/kernel/cpu/amd.c	2012-05-08 10:52:17.000000000 +0200
@@ -334,7 +334,7 @@ static void __cpuinit amd_detect_cmp(str
 int amd_get_nb_id(int cpu)
 {
 	int id = 0;
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
 	id = per_cpu(cpu_llc_id, cpu);
 #endif
 	return id;
@@ -529,18 +529,26 @@ static void __cpuinit init_amd(struct cp
 			u64 val;
 
 			clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
+#ifndef CONFIG_XEN
 			if (!rdmsrl_amd_safe(0xc001100d, &val)) {
 				val &= ~(1ULL << 32);
 				wrmsrl_amd_safe(0xc001100d, val);
 			}
+#else
+			pr_warning("Long-mode LAHF feature wrongly enabled -"
+				   "hypervisor update needed\n");
+			(void)&val;
+#endif
 		}
 
 	}
 	if (c->x86 >= 0x10)
 		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 
+#ifndef CONFIG_XEN
 	/* get apicid instead of initial apic id from cpuid */
 	c->apicid = hard_smp_processor_id();
+#endif
 #else
 
 	/*
--- 12.2.orig/arch/x86/kernel/cpu/common-xen.c	2011-05-18 10:46:35.000000000 +0200
+++ 12.2/arch/x86/kernel/cpu/common-xen.c	2012-04-20 15:14:02.000000000 +0200
@@ -13,13 +13,13 @@
 #include <linux/io.h>
 
 #include <asm/stackprotector.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 #include <asm/mmu_context.h>
 #include <asm/hypervisor.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
-#include <asm/topology.h>
-#include <asm/cpumask.h>
+#include <linux/topology.h>
+#include <linux/cpumask.h>
 #include <asm/pgtable.h>
 #include <asm/atomic.h>
 #include <asm/proto.h>
@@ -28,13 +28,12 @@
 #include <asm/desc.h>
 #include <asm/i387.h>
 #include <asm/mtrr.h>
-#include <asm/numa.h>
+#include <linux/numa.h>
 #include <asm/asm.h>
 #include <asm/cpu.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
 #include <asm/pat.h>
-#include <asm/smp.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/uv/uv.h>
@@ -102,17 +101,17 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_p
 	 * TLS descriptors are currently at a different place compared to i386.
 	 * Hopefully nobody expects them at a fixed place (Wine?)
 	 */
-	[GDT_ENTRY_KERNEL32_CS]		= { { { 0x0000ffff, 0x00cf9b00 } } },
-	[GDT_ENTRY_KERNEL_CS]		= { { { 0x0000ffff, 0x00af9b00 } } },
-	[GDT_ENTRY_KERNEL_DS]		= { { { 0x0000ffff, 0x00cf9300 } } },
-	[GDT_ENTRY_DEFAULT_USER32_CS]	= { { { 0x0000ffff, 0x00cffb00 } } },
-	[GDT_ENTRY_DEFAULT_USER_DS]	= { { { 0x0000ffff, 0x00cff300 } } },
-	[GDT_ENTRY_DEFAULT_USER_CS]	= { { { 0x0000ffff, 0x00affb00 } } },
+	[GDT_ENTRY_KERNEL32_CS]		= GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
+	[GDT_ENTRY_KERNEL_CS]		= GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
+	[GDT_ENTRY_KERNEL_DS]		= GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER32_CS]	= GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER_DS]	= GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER_CS]	= GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
 #else
-	[GDT_ENTRY_KERNEL_CS]		= { { { 0x0000ffff, 0x00cf9a00 } } },
-	[GDT_ENTRY_KERNEL_DS]		= { { { 0x0000ffff, 0x00cf9200 } } },
-	[GDT_ENTRY_DEFAULT_USER_CS]	= { { { 0x0000ffff, 0x00cffa00 } } },
-	[GDT_ENTRY_DEFAULT_USER_DS]	= { { { 0x0000ffff, 0x00cff200 } } },
+	[GDT_ENTRY_KERNEL_CS]		= GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
+	[GDT_ENTRY_KERNEL_DS]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER_CS]	= GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER_DS]	= GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
 #ifndef CONFIG_XEN
 	/*
 	 * Segments used for calling PnP BIOS have byte granularity.
@@ -120,29 +119,29 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_p
 	 * the transfer segment sizes are set at run time.
 	 */
 	/* 32-bit code */
-	[GDT_ENTRY_PNPBIOS_CS32]	= { { { 0x0000ffff, 0x00409a00 } } },
+	[GDT_ENTRY_PNPBIOS_CS32]	= GDT_ENTRY_INIT(0x409a, 0, 0xffff),
 	/* 16-bit code */
-	[GDT_ENTRY_PNPBIOS_CS16]	= { { { 0x0000ffff, 0x00009a00 } } },
+	[GDT_ENTRY_PNPBIOS_CS16]	= GDT_ENTRY_INIT(0x009a, 0, 0xffff),
 	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_DS]		= { { { 0x0000ffff, 0x00009200 } } },
+	[GDT_ENTRY_PNPBIOS_DS]		= GDT_ENTRY_INIT(0x0092, 0, 0xffff),
 	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_TS1]		= { { { 0x00000000, 0x00009200 } } },
+	[GDT_ENTRY_PNPBIOS_TS1]		= GDT_ENTRY_INIT(0x0092, 0, 0),
 	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_TS2]		= { { { 0x00000000, 0x00009200 } } },
+	[GDT_ENTRY_PNPBIOS_TS2]		= GDT_ENTRY_INIT(0x0092, 0, 0),
 	/*
 	 * The APM segments have byte granularity and their bases
 	 * are set at run time.  All have 64k limits.
 	 */
 	/* 32-bit code */
-	[GDT_ENTRY_APMBIOS_BASE]	= { { { 0x0000ffff, 0x00409a00 } } },
+	[GDT_ENTRY_APMBIOS_BASE]	= GDT_ENTRY_INIT(0x409a, 0, 0xffff),
 	/* 16-bit code */
-	[GDT_ENTRY_APMBIOS_BASE+1]	= { { { 0x0000ffff, 0x00009a00 } } },
+	[GDT_ENTRY_APMBIOS_BASE+1]	= GDT_ENTRY_INIT(0x009a, 0, 0xffff),
 	/* data */
-	[GDT_ENTRY_APMBIOS_BASE+2]	= { { { 0x0000ffff, 0x00409200 } } },
+	[GDT_ENTRY_APMBIOS_BASE+2]	= GDT_ENTRY_INIT(0x4092, 0, 0xffff),
 
-	[GDT_ENTRY_ESPFIX_SS]		= { { { 0x0000ffff, 0x00cf9200 } } },
+	[GDT_ENTRY_ESPFIX_SS]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
 #endif
-	[GDT_ENTRY_PERCPU]		= { { { 0x0000ffff, 0x00cf9200 } } },
+	[GDT_ENTRY_PERCPU]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
 	GDT_STACK_CANARY_INIT
 #endif
 } };
@@ -911,7 +910,7 @@ void __init identify_boot_cpu(void)
 #else
 	vgetcpu_set_mode();
 #endif
-	init_hw_perf_counters();
+	init_hw_perf_events();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
@@ -1024,7 +1023,7 @@ __setup("clearcpuid=", setup_disablecpui
 
 #ifdef CONFIG_X86_64
 #ifndef CONFIG_X86_NO_IDT
-struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
+struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
 #endif
 
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
@@ -1038,13 +1037,21 @@ void xen_switch_pt(void)
 #endif
 }
 
-DEFINE_PER_CPU(char *, irq_stack_ptr) =
-	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+/*
+ * The following four percpu variables are hot.  Align current_task to
+ * cacheline size such that all four fall in the same cacheline.
+ */
+DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
+	&init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
 
 DEFINE_PER_CPU(unsigned long, kernel_stack) =
 	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
 EXPORT_PER_CPU_SYMBOL(kernel_stack);
 
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+
 DEFINE_PER_CPU(unsigned int, irq_count) = -1;
 
 #ifndef CONFIG_X86_NO_TSS
@@ -1060,8 +1067,7 @@ static const unsigned int exception_stac
 };
 
 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
-	__aligned(PAGE_SIZE);
+	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
 #endif
 
 void __cpuinit syscall_init(void)
@@ -1111,8 +1117,11 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist
 
 #else	/* CONFIG_X86_64 */
 
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
 #ifdef CONFIG_CC_STACKPROTECTOR
-DEFINE_PER_CPU(unsigned long, stack_canary);
+DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
 
 /* Make sure %fs and %gs are initialized properly in idle threads */
--- 12.2.orig/arch/x86/kernel/cpu/mcheck/mce-inject.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/arch/x86/kernel/cpu/mcheck/mce-inject.c	2012-02-08 12:15:59.000000000 +0100
@@ -151,7 +151,7 @@ static void raise_mce(struct mce *m)
 	if (context == MCJ_CTX_RANDOM)
 		return;
 
-#ifdef CONFIG_X86_LOCAL_APIC
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
 	if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) {
 		unsigned long start;
 		int cpu;
--- 12.2.orig/arch/x86/kernel/cpu/mtrr/main-xen.c	2011-02-01 14:42:26.000000000 +0100
+++ 12.2/arch/x86/kernel/cpu/mtrr/main-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -1,10 +1,9 @@
-#include <linux/init.h>
-#include <linux/proc_fs.h>
-#include <linux/ctype.h>
+#define DEBUG
+
+#include <linux/uaccess.h>
 #include <linux/module.h>
-#include <linux/seq_file.h>
-#include <asm/uaccess.h>
 #include <linux/mutex.h>
+#include <linux/init.h>
 
 #include <asm/mtrr.h>
 #include "mtrr.h"
@@ -58,7 +57,7 @@ static void __init init_table(void)
 		mtrr_usage_table[i] = 0;
 }
 
-int mtrr_add_page(unsigned long base, unsigned long size, 
+int mtrr_add_page(unsigned long base, unsigned long size,
 		  unsigned int type, bool increment)
 {
 	int error;
@@ -88,25 +87,23 @@ int mtrr_add_page(unsigned long base, un
 static int mtrr_check(unsigned long base, unsigned long size)
 {
 	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
-		printk(KERN_WARNING
-			"mtrr: size and base must be multiples of 4 kiB\n");
-		printk(KERN_DEBUG
-			"mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
+		pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
+		pr_debug("mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
 		dump_stack();
 		return -1;
 	}
 	return 0;
 }
 
-int
-mtrr_add(unsigned long base, unsigned long size, unsigned int type,
-	 bool increment)
+int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
+	     bool increment)
 {
 	if (mtrr_check(base, size))
 		return -EINVAL;
 	return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
 			     increment);
 }
+EXPORT_SYMBOL(mtrr_add);
 
 int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 {
@@ -128,13 +125,13 @@ int mtrr_del_page(int reg, unsigned long
 			}
 		}
 		if (reg < 0) {
-			printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base,
-			       size);
+			pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
+				 base, size);
 			goto out;
 		}
 	}
 	if (mtrr_usage_table[reg] < 1) {
-		printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
+		pr_warning("mtrr: reg: %d has count=0\n", reg);
 		goto out;
 	}
 	if (--mtrr_usage_table[reg] < 1) {
@@ -153,15 +150,12 @@ int mtrr_del_page(int reg, unsigned long
 	return error;
 }
 
-int
-mtrr_del(int reg, unsigned long base, unsigned long size)
+int mtrr_del(int reg, unsigned long base, unsigned long size)
 {
 	if (mtrr_check(base, size))
 		return -EINVAL;
 	return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
 }
-
-EXPORT_SYMBOL(mtrr_add);
 EXPORT_SYMBOL(mtrr_del);
 
 /*
--- 12.2.orig/arch/x86/kernel/e820-xen.c	2011-09-23 15:55:04.000000000 +0200
+++ 12.2/arch/x86/kernel/e820-xen.c	2011-09-23 15:55:50.000000000 +0200
@@ -137,7 +137,7 @@ static void __init __e820_add_region(str
 {
 	int x = e820x->nr_map;
 
-	if (x == ARRAY_SIZE(e820x->map)) {
+	if (x >= ARRAY_SIZE(e820x->map)) {
 		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
 		return;
 	}
@@ -1459,7 +1459,7 @@ void __init e820_reserve_resources(void)
 	struct resource *res;
 	u64 end;
 
-	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
+	res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
 	e820_res = res;
 	for (i = 0; i < e820.nr_map; i++) {
 		end = e820.map[i].addr + e820.map[i].size - 1;
@@ -1506,8 +1506,8 @@ static unsigned long ram_alignment(resou
 	if (mb < 16)
 		return 1024*1024;
 
-	/* To 32MB for anything above that */
-	return 32*1024*1024;
+	/* To 64MB for anything above that */
+	return 64*1024*1024;
 }
 
 #define MAX_RESOURCE_SIZE ((resource_size_t)-1)
@@ -1547,59 +1547,8 @@ void __init e820_reserve_resources_late(
 
 #undef e820
 
-#ifndef CONFIG_XEN
 char *__init default_machine_specific_memory_setup(void)
 {
-	char *who = "BIOS-e820";
-	u32 new_nr;
-	/*
-	 * Try to copy the BIOS-supplied E820-map.
-	 *
-	 * Otherwise fake a memory map; one section from 0k->640k,
-	 * the next section from 1mb->appropriate_mem_k
-	 */
-	new_nr = boot_params.e820_entries;
-	sanitize_e820_map(boot_params.e820_map,
-			ARRAY_SIZE(boot_params.e820_map),
-			&new_nr);
-	boot_params.e820_entries = new_nr;
-	if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
-	  < 0) {
-		u64 mem_size;
-
-		/* compare results from other methods and take the greater */
-		if (boot_params.alt_mem_k
-		    < boot_params.screen_info.ext_mem_k) {
-			mem_size = boot_params.screen_info.ext_mem_k;
-			who = "BIOS-88";
-		} else {
-			mem_size = boot_params.alt_mem_k;
-			who = "BIOS-e801";
-		}
-
-		e820.nr_map = 0;
-		e820_add_region(0, LOWMEMSIZE(), E820_RAM);
-		e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
-	}
-
-	/* In case someone cares... */
-	return who;
-}
-
-char *__init __attribute__((weak)) machine_specific_memory_setup(void)
-{
-	if (x86_quirks->arch_memory_setup) {
-		char *who = x86_quirks->arch_memory_setup();
-
-		if (who)
-			return who;
-	}
-	return default_machine_specific_memory_setup();
-}
-#endif
-
-static char * __init _memory_setup(void)
-{
 	int rc, nr_map;
 	struct xen_memory_map memmap;
 	static struct e820entry __initdata map[E820MAX];
@@ -1643,7 +1592,7 @@ void __init setup_memory_map(void)
 {
 	char *who;
 
-	who = _memory_setup();
+	who = x86_init.resources.memory_setup();
 #ifndef CONFIG_XEN_UNPRIVILEGED_GUEST
 #ifdef CONFIG_XEN
 	if (is_initial_xendomain()) {
--- 12.2.orig/arch/x86/kernel/early_printk-xen.c	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/arch/x86/kernel/early_printk-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -178,7 +178,6 @@ static __init void early_serial_init(cha
  * mappings. Someone should fix this for domain 0. For now, use fake serial.
  */
 #define early_vga_console early_serial_console
-#define xenboot_console early_serial_console
 
 #endif
 
@@ -189,721 +188,6 @@ static struct console early_serial_conso
 	.index =	-1,
 };
 
-#ifdef CONFIG_EARLY_PRINTK_DBGP
-
-static struct ehci_caps __iomem *ehci_caps;
-static struct ehci_regs __iomem *ehci_regs;
-static struct ehci_dbg_port __iomem *ehci_debug;
-static unsigned int dbgp_endpoint_out;
-
-struct ehci_dev {
-	u32 bus;
-	u32 slot;
-	u32 func;
-};
-
-static struct ehci_dev ehci_dev;
-
-#define USB_DEBUG_DEVNUM 127
-
-#define DBGP_DATA_TOGGLE	0x8800
-
-static inline u32 dbgp_pid_update(u32 x, u32 tok)
-{
-	return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
-}
-
-static inline u32 dbgp_len_update(u32 x, u32 len)
-{
-	return (x & ~0x0f) | (len & 0x0f);
-}
-
-/*
- * USB Packet IDs (PIDs)
- */
-
-/* token */
-#define USB_PID_OUT		0xe1
-#define USB_PID_IN		0x69
-#define USB_PID_SOF		0xa5
-#define USB_PID_SETUP		0x2d
-/* handshake */
-#define USB_PID_ACK		0xd2
-#define USB_PID_NAK		0x5a
-#define USB_PID_STALL		0x1e
-#define USB_PID_NYET		0x96
-/* data */
-#define USB_PID_DATA0		0xc3
-#define USB_PID_DATA1		0x4b
-#define USB_PID_DATA2		0x87
-#define USB_PID_MDATA		0x0f
-/* Special */
-#define USB_PID_PREAMBLE	0x3c
-#define USB_PID_ERR		0x3c
-#define USB_PID_SPLIT		0x78
-#define USB_PID_PING		0xb4
-#define USB_PID_UNDEF_0		0xf0
-
-#define USB_PID_DATA_TOGGLE	0x88
-#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
-
-#define PCI_CAP_ID_EHCI_DEBUG	0xa
-
-#define HUB_ROOT_RESET_TIME	50	/* times are in msec */
-#define HUB_SHORT_RESET_TIME	10
-#define HUB_LONG_RESET_TIME	200
-#define HUB_RESET_TIMEOUT	500
-
-#define DBGP_MAX_PACKET		8
-
-static int dbgp_wait_until_complete(void)
-{
-	u32 ctrl;
-	int loop = 0x100000;
-
-	do {
-		ctrl = readl(&ehci_debug->control);
-		/* Stop when the transaction is finished */
-		if (ctrl & DBGP_DONE)
-			break;
-	} while (--loop > 0);
-
-	if (!loop)
-		return -1;
-
-	/*
-	 * Now that we have observed the completed transaction,
-	 * clear the done bit.
-	 */
-	writel(ctrl | DBGP_DONE, &ehci_debug->control);
-	return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
-}
-
-static void __init dbgp_mdelay(int ms)
-{
-	int i;
-
-	while (ms--) {
-		for (i = 0; i < 1000; i++)
-			outb(0x1, 0x80);
-	}
-}
-
-static void dbgp_breath(void)
-{
-	/* Sleep to give the debug port a chance to breathe */
-}
-
-static int dbgp_wait_until_done(unsigned ctrl)
-{
-	u32 pids, lpid;
-	int ret;
-	int loop = 3;
-
-retry:
-	writel(ctrl | DBGP_GO, &ehci_debug->control);
-	ret = dbgp_wait_until_complete();
-	pids = readl(&ehci_debug->pids);
-	lpid = DBGP_PID_GET(pids);
-
-	if (ret < 0)
-		return ret;
-
-	/*
-	 * If the port is getting full or it has dropped data
-	 * start pacing ourselves, not necessary but it's friendly.
-	 */
-	if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
-		dbgp_breath();
-
-	/* If I get a NACK reissue the transmission */
-	if (lpid == USB_PID_NAK) {
-		if (--loop > 0)
-			goto retry;
-	}
-
-	return ret;
-}
-
-static void dbgp_set_data(const void *buf, int size)
-{
-	const unsigned char *bytes = buf;
-	u32 lo, hi;
-	int i;
-
-	lo = hi = 0;
-	for (i = 0; i < 4 && i < size; i++)
-		lo |= bytes[i] << (8*i);
-	for (; i < 8 && i < size; i++)
-		hi |= bytes[i] << (8*(i - 4));
-	writel(lo, &ehci_debug->data03);
-	writel(hi, &ehci_debug->data47);
-}
-
-static void __init dbgp_get_data(void *buf, int size)
-{
-	unsigned char *bytes = buf;
-	u32 lo, hi;
-	int i;
-
-	lo = readl(&ehci_debug->data03);
-	hi = readl(&ehci_debug->data47);
-	for (i = 0; i < 4 && i < size; i++)
-		bytes[i] = (lo >> (8*i)) & 0xff;
-	for (; i < 8 && i < size; i++)
-		bytes[i] = (hi >> (8*(i - 4))) & 0xff;
-}
-
-static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
-			 const char *bytes, int size)
-{
-	u32 pids, addr, ctrl;
-	int ret;
-
-	if (size > DBGP_MAX_PACKET)
-		return -1;
-
-	addr = DBGP_EPADDR(devnum, endpoint);
-
-	pids = readl(&ehci_debug->pids);
-	pids = dbgp_pid_update(pids, USB_PID_OUT);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, size);
-	ctrl |= DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	dbgp_set_data(bytes, size);
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	return ret;
-}
-
-static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
-				 int size)
-{
-	u32 pids, addr, ctrl;
-	int ret;
-
-	if (size > DBGP_MAX_PACKET)
-		return -1;
-
-	addr = DBGP_EPADDR(devnum, endpoint);
-
-	pids = readl(&ehci_debug->pids);
-	pids = dbgp_pid_update(pids, USB_PID_IN);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, size);
-	ctrl &= ~DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	if (size > ret)
-		size = ret;
-	dbgp_get_data(data, size);
-	return ret;
-}
-
-static int __init dbgp_control_msg(unsigned devnum, int requesttype,
-	int request, int value, int index, void *data, int size)
-{
-	u32 pids, addr, ctrl;
-	struct usb_ctrlrequest req;
-	int read;
-	int ret;
-
-	read = (requesttype & USB_DIR_IN) != 0;
-	if (size > (read ? DBGP_MAX_PACKET:0))
-		return -1;
-
-	/* Compute the control message */
-	req.bRequestType = requesttype;
-	req.bRequest = request;
-	req.wValue = cpu_to_le16(value);
-	req.wIndex = cpu_to_le16(index);
-	req.wLength = cpu_to_le16(size);
-
-	pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
-	addr = DBGP_EPADDR(devnum, 0);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, sizeof(req));
-	ctrl |= DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	/* Send the setup message */
-	dbgp_set_data(&req, sizeof(req));
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	/* Read the result */
-	return dbgp_bulk_read(devnum, 0, data, size);
-}
-
-
-/* Find a PCI capability */
-static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
-{
-	u8 pos;
-	int bytes;
-
-	if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
-		PCI_STATUS_CAP_LIST))
-		return 0;
-
-	pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
-	for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
-		u8 id;
-
-		pos &= ~3;
-		id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
-		if (id == 0xff)
-			break;
-		if (id == cap)
-			return pos;
-
-		pos = read_pci_config_byte(num, slot, func,
-						 pos+PCI_CAP_LIST_NEXT);
-	}
-	return 0;
-}
-
-static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
-{
-	u32 class;
-
-	class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
-	if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
-		return 0;
-
-	return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
-}
-
-static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
-{
-	u32 bus, slot, func;
-
-	for (bus = 0; bus < 256; bus++) {
-		for (slot = 0; slot < 32; slot++) {
-			for (func = 0; func < 8; func++) {
-				unsigned cap;
-
-				cap = __find_dbgp(bus, slot, func);
-
-				if (!cap)
-					continue;
-				if (ehci_num-- != 0)
-					continue;
-				*rbus = bus;
-				*rslot = slot;
-				*rfunc = func;
-				return cap;
-			}
-		}
-	}
-	return 0;
-}
-
-static int __init ehci_reset_port(int port)
-{
-	u32 portsc;
-	u32 delay_time, delay;
-	int loop;
-
-	/* Reset the usb debug port */
-	portsc = readl(&ehci_regs->port_status[port - 1]);
-	portsc &= ~PORT_PE;
-	portsc |= PORT_RESET;
-	writel(portsc, &ehci_regs->port_status[port - 1]);
-
-	delay = HUB_ROOT_RESET_TIME;
-	for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
-	     delay_time += delay) {
-		dbgp_mdelay(delay);
-
-		portsc = readl(&ehci_regs->port_status[port - 1]);
-		if (portsc & PORT_RESET) {
-			/* force reset to complete */
-			loop = 2;
-			writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
-				&ehci_regs->port_status[port - 1]);
-			do {
-				portsc = readl(&ehci_regs->port_status[port-1]);
-			} while ((portsc & PORT_RESET) && (--loop > 0));
-		}
-
-		/* Device went away? */
-		if (!(portsc & PORT_CONNECT))
-			return -ENOTCONN;
-
-		/* bomb out completely if something weird happend */
-		if ((portsc & PORT_CSC))
-			return -EINVAL;
-
-		/* If we've finished resetting, then break out of the loop */
-		if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
-			return 0;
-	}
-	return -EBUSY;
-}
-
-static int __init ehci_wait_for_port(int port)
-{
-	u32 status;
-	int ret, reps;
-
-	for (reps = 0; reps < 3; reps++) {
-		dbgp_mdelay(100);
-		status = readl(&ehci_regs->status);
-		if (status & STS_PCD) {
-			ret = ehci_reset_port(port);
-			if (ret == 0)
-				return 0;
-		}
-	}
-	return -ENOTCONN;
-}
-
-#ifdef DBGP_DEBUG
-# define dbgp_printk early_printk
-#else
-static inline void dbgp_printk(const char *fmt, ...) { }
-#endif
-
-typedef void (*set_debug_port_t)(int port);
-
-static void __init default_set_debug_port(int port)
-{
-}
-
-static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
-
-static void __init nvidia_set_debug_port(int port)
-{
-	u32 dword;
-	dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
-				 0x74);
-	dword &= ~(0x0f<<12);
-	dword |= ((port & 0x0f)<<12);
-	write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
-				 dword);
-	dbgp_printk("set debug port to %d\n", port);
-}
-
-static void __init detect_set_debug_port(void)
-{
-	u32 vendorid;
-
-	vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
-		 0x00);
-
-	if ((vendorid & 0xffff) == 0x10de) {
-		dbgp_printk("using nvidia set_debug_port\n");
-		set_debug_port = nvidia_set_debug_port;
-	}
-}
-
-static int __init ehci_setup(void)
-{
-	struct usb_debug_descriptor dbgp_desc;
-	u32 cmd, ctrl, status, portsc, hcs_params;
-	u32 debug_port, new_debug_port = 0, n_ports;
-	u32  devnum;
-	int ret, i;
-	int loop;
-	int port_map_tried;
-	int playtimes = 3;
-
-try_next_time:
-	port_map_tried = 0;
-
-try_next_port:
-
-	hcs_params = readl(&ehci_caps->hcs_params);
-	debug_port = HCS_DEBUG_PORT(hcs_params);
-	n_ports    = HCS_N_PORTS(hcs_params);
-
-	dbgp_printk("debug_port: %d\n", debug_port);
-	dbgp_printk("n_ports:    %d\n", n_ports);
-
-	for (i = 1; i <= n_ports; i++) {
-		portsc = readl(&ehci_regs->port_status[i-1]);
-		dbgp_printk("portstatus%d: %08x\n", i, portsc);
-	}
-
-	if (port_map_tried && (new_debug_port != debug_port)) {
-		if (--playtimes) {
-			set_debug_port(new_debug_port);
-			goto try_next_time;
-		}
-		return -1;
-	}
-
-	loop = 10;
-	/* Reset the EHCI controller */
-	cmd = readl(&ehci_regs->command);
-	cmd |= CMD_RESET;
-	writel(cmd, &ehci_regs->command);
-	do {
-		cmd = readl(&ehci_regs->command);
-	} while ((cmd & CMD_RESET) && (--loop > 0));
-
-	if (!loop) {
-		dbgp_printk("can not reset ehci\n");
-		return -1;
-	}
-	dbgp_printk("ehci reset done\n");
-
-	/* Claim ownership, but do not enable yet */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_OWNER;
-	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
-	writel(ctrl, &ehci_debug->control);
-
-	/* Start the ehci running */
-	cmd = readl(&ehci_regs->command);
-	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
-	cmd |= CMD_RUN;
-	writel(cmd, &ehci_regs->command);
-
-	/* Ensure everything is routed to the EHCI */
-	writel(FLAG_CF, &ehci_regs->configured_flag);
-
-	/* Wait until the controller is no longer halted */
-	loop = 10;
-	do {
-		status = readl(&ehci_regs->status);
-	} while ((status & STS_HALT) && (--loop > 0));
-
-	if (!loop) {
-		dbgp_printk("ehci can be started\n");
-		return -1;
-	}
-	dbgp_printk("ehci started\n");
-
-	/* Wait for a device to show up in the debug port */
-	ret = ehci_wait_for_port(debug_port);
-	if (ret < 0) {
-		dbgp_printk("No device found in debug port\n");
-		goto next_debug_port;
-	}
-	dbgp_printk("ehci wait for port done\n");
-
-	/* Enable the debug port */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_CLAIM;
-	writel(ctrl, &ehci_debug->control);
-	ctrl = readl(&ehci_debug->control);
-	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
-		dbgp_printk("No device in debug port\n");
-		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
-		goto err;
-	}
-	dbgp_printk("debug ported enabled\n");
-
-	/* Completely transfer the debug device to the debug controller */
-	portsc = readl(&ehci_regs->port_status[debug_port - 1]);
-	portsc &= ~PORT_PE;
-	writel(portsc, &ehci_regs->port_status[debug_port - 1]);
-
-	dbgp_mdelay(100);
-
-	/* Find the debug device and make it device number 127 */
-	for (devnum = 0; devnum <= 127; devnum++) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
-			&dbgp_desc, sizeof(dbgp_desc));
-		if (ret > 0)
-			break;
-	}
-	if (devnum > 127) {
-		dbgp_printk("Could not find attached debug device\n");
-		goto err;
-	}
-	if (ret < 0) {
-		dbgp_printk("Attached device is not a debug device\n");
-		goto err;
-	}
-	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
-
-	/* Move the device to 127 if it isn't already there */
-	if (devnum != USB_DEBUG_DEVNUM) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
-		if (ret < 0) {
-			dbgp_printk("Could not move attached device to %d\n",
-				USB_DEBUG_DEVNUM);
-			goto err;
-		}
-		devnum = USB_DEBUG_DEVNUM;
-		dbgp_printk("debug device renamed to 127\n");
-	}
-
-	/* Enable the debug interface */
-	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
-		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
-	if (ret < 0) {
-		dbgp_printk(" Could not enable the debug device\n");
-		goto err;
-	}
-	dbgp_printk("debug interface enabled\n");
-
-	/* Perform a small write to get the even/odd data state in sync
-	 */
-	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
-	if (ret < 0) {
-		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
-		goto err;
-	}
-	dbgp_printk("small write doned\n");
-
-	return 0;
-err:
-	/* Things didn't work so remove my claim */
-	ctrl = readl(&ehci_debug->control);
-	ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
-	writel(ctrl, &ehci_debug->control);
-	return -1;
-
-next_debug_port:
-	port_map_tried |= (1<<(debug_port - 1));
-	new_debug_port = ((debug_port-1+1)%n_ports) + 1;
-	if (port_map_tried != ((1<<n_ports) - 1)) {
-		set_debug_port(new_debug_port);
-		goto try_next_port;
-	}
-	if (--playtimes) {
-		set_debug_port(new_debug_port);
-		goto try_next_time;
-	}
-
-	return -1;
-}
-
-static int __init early_dbgp_init(char *s)
-{
-	u32 debug_port, bar, offset;
-	u32 bus, slot, func, cap;
-	void __iomem *ehci_bar;
-	u32 dbgp_num;
-	u32 bar_val;
-	char *e;
-	int ret;
-	u8 byte;
-
-	if (!early_pci_allowed())
-		return -1;
-
-	dbgp_num = 0;
-	if (*s)
-		dbgp_num = simple_strtoul(s, &e, 10);
-	dbgp_printk("dbgp_num: %d\n", dbgp_num);
-
-	cap = find_dbgp(dbgp_num, &bus, &slot, &func);
-	if (!cap)
-		return -1;
-
-	dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
-			 func);
-
-	debug_port = read_pci_config(bus, slot, func, cap);
-	bar = (debug_port >> 29) & 0x7;
-	bar = (bar * 4) + 0xc;
-	offset = (debug_port >> 16) & 0xfff;
-	dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
-	if (bar != PCI_BASE_ADDRESS_0) {
-		dbgp_printk("only debug ports on bar 1 handled.\n");
-
-		return -1;
-	}
-
-	bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
-	dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
-	if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
-		dbgp_printk("only simple 32bit mmio bars supported\n");
-
-		return -1;
-	}
-
-	/* double check if the mem space is enabled */
-	byte = read_pci_config_byte(bus, slot, func, 0x04);
-	if (!(byte & 0x2)) {
-		byte  |= 0x02;
-		write_pci_config_byte(bus, slot, func, 0x04, byte);
-		dbgp_printk("mmio for ehci enabled\n");
-	}
-
-	/*
-	 * FIXME I don't have the bar size so just guess PAGE_SIZE is more
-	 * than enough.  1K is the biggest I have seen.
-	 */
-	set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
-	ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
-	ehci_bar += bar_val & ~PAGE_MASK;
-	dbgp_printk("ehci_bar: %p\n", ehci_bar);
-
-	ehci_caps  = ehci_bar;
-	ehci_regs  = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
-	ehci_debug = ehci_bar + offset;
-	ehci_dev.bus = bus;
-	ehci_dev.slot = slot;
-	ehci_dev.func = func;
-
-	detect_set_debug_port();
-
-	ret = ehci_setup();
-	if (ret < 0) {
-		dbgp_printk("ehci_setup failed\n");
-		ehci_debug = NULL;
-
-		return -1;
-	}
-
-	return 0;
-}
-
-static void early_dbgp_write(struct console *con, const char *str, u32 n)
-{
-	int chunk, ret;
-
-	if (!ehci_debug)
-		return;
-	while (n > 0) {
-		chunk = n;
-		if (chunk > DBGP_MAX_PACKET)
-			chunk = DBGP_MAX_PACKET;
-		ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
-			dbgp_endpoint_out, str, chunk);
-		str += chunk;
-		n -= chunk;
-	}
-}
-
-static struct console early_dbgp_console = {
-	.name =		"earlydbg",
-	.write =	early_dbgp_write,
-	.flags =	CON_PRINTBUFFER,
-	.index =	-1,
-};
-#endif
-
 /* Direct interface for emergencies */
 static struct console *early_console = &early_vga_console;
 static int __initdata early_console_initialized;
@@ -920,10 +204,24 @@ asmlinkage void early_printk(const char 
 	va_end(ap);
 }
 
+static inline void early_console_register(struct console *con, int keep_early)
+{
+	if (early_console->index != -1) {
+		printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
+		       con->name);
+		return;
+	}
+	early_console = con;
+	if (keep_early)
+		early_console->flags &= ~CON_BOOT;
+	else
+		early_console->flags |= CON_BOOT;
+	register_console(early_console);
+}
 
 static int __init setup_early_printk(char *buf)
 {
-	int keep_early;
+	int keep;
 
 	if (!buf)
 		return 0;
@@ -932,44 +230,41 @@ static int __init setup_early_printk(cha
 		return 0;
 	early_console_initialized = 1;
 
-	keep_early = (strstr(buf, "keep") != NULL);
+	keep = (strstr(buf, "keep") != NULL);
 
-	if (!strncmp(buf, "serial", 6)) {
-		early_serial_init(buf + 6);
-		early_console = &early_serial_console;
-	} else if (!strncmp(buf, "ttyS", 4)) {
-		early_serial_init(buf);
-		early_console = &early_serial_console;
-	} else if (!strncmp(buf, "vga", 3)) {
+	while (*buf != '\0') {
+		if (!strncmp(buf, "serial", 6)) {
+			buf += 6;
+			early_serial_init(buf);
+			early_console_register(&early_serial_console, keep);
+			if (!strncmp(buf, ",ttyS", 5))
+				buf += 5;
+		}
+		if (!strncmp(buf, "ttyS", 4)) {
+			early_serial_init(buf + 4);
+			early_console_register(&early_serial_console, keep);
+		}
 #ifndef CONFIG_XEN
-		&& boot_params.screen_info.orig_video_isVGA == 1) {
-		max_xpos = boot_params.screen_info.orig_video_cols;
-		max_ypos = boot_params.screen_info.orig_video_lines;
-		current_ypos = boot_params.screen_info.orig_y;
+		if (!strncmp(buf, "vga", 3) &&
+		    boot_params.screen_info.orig_video_isVGA == 1) {
+			max_xpos = boot_params.screen_info.orig_video_cols;
+			max_ypos = boot_params.screen_info.orig_video_lines;
+			current_ypos = boot_params.screen_info.orig_y;
+#else
+		if (!strncmp(buf, "vga", 3) || !strncmp(buf, "xen", 3)) {
 #endif
-		early_console = &early_vga_console;
+			early_console_register(&early_vga_console, keep);
+		}
 #ifdef CONFIG_EARLY_PRINTK_DBGP
-	} else if (!strncmp(buf, "dbgp", 4)) {
-		if (early_dbgp_init(buf+4) < 0)
-			return 0;
-		early_console = &early_dbgp_console;
-		/*
-		 * usb subsys will reset ehci controller, so don't keep
-		 * that early console
-		 */
-		keep_early = 0;
+		if (!strncmp(buf, "dbgp", 4) && !early_dbgp_init(buf + 4))
+			early_console_register(&early_dbgp_console, keep);
 #endif
-#ifdef CONFIG_XEN
-	} else if (!strncmp(buf, "xen", 3)) {
-		early_console = &xenboot_console;
+#ifdef CONFIG_HVC_XEN
+		if (!strncmp(buf, "xen", 3))
+			early_console_register(&xenboot_console, keep);
 #endif
+		buf++;
 	}
-
-	if (keep_early)
-		early_console->flags &= ~CON_BOOT;
-	else
-		early_console->flags |= CON_BOOT;
-	register_console(early_console);
 	return 0;
 }
 
--- 12.2.orig/arch/x86/kernel/entry_64-xen.S	2011-10-07 11:39:17.000000000 +0200
+++ 12.2/arch/x86/kernel/entry_64-xen.S	2011-10-07 11:39:43.000000000 +0200
@@ -53,6 +53,7 @@
 #include <asm/hw_irq.h>
 #include <asm/page_types.h>
 #include <asm/irqflags.h>
+#include <asm/processor-flags.h>
 #include <asm/ftrace.h>
 #include <asm/percpu.h>
 #include <xen/interface/xen.h>
@@ -150,7 +151,7 @@ ENTRY(ftrace_graph_caller)
 END(ftrace_graph_caller)
 
 GLOBAL(return_to_handler)
-	subq  $80, %rsp
+	subq  $24, %rsp
 
 	/* Save the return values */
 	movq %rax, (%rsp)
@@ -159,10 +160,10 @@ GLOBAL(return_to_handler)
 
 	call ftrace_return_to_handler
 
-	movq %rax, 72(%rsp)
+	movq %rax, 16(%rsp)
 	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
-	addq $72, %rsp
+	addq $16, %rsp
 	retq
 #endif
 
@@ -549,20 +550,13 @@ sysret_signal:
 	bt $TIF_SYSCALL_AUDIT,%edx
 	jc sysret_audit
 #endif
-	/* edx:	work flags (arg3) */
-	leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
-	xorl %esi,%esi # oldset -> arg2
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
-	call do_notify_resume
-	RESTORE_TOP_OF_STACK %r11
-	RESTORE_REST
-	movl $_TIF_WORK_MASK,%edi
-	/* Use IRET because user could have changed frame. This
-	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	jmp int_with_check
+	/*
+	 * We have a signal, or exit tracing or single-step.
+	 * These all wind up with the iret return path anyway,
+	 * so just join that path right now.
+	 */
+	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
+	jmp int_check_syscall_exit_work
 
 badsys:
 	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -671,6 +665,7 @@ int_careful:
 int_very_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
+int_check_syscall_exit_work:
 	SAVE_REST
 	/* Check for syscall exit trace */
 	testl $_TIF_WORK_SYSCALL_EXIT,%edx
@@ -917,7 +912,7 @@ apicinterrupt ERROR_APIC_VECTOR \
 apicinterrupt SPURIOUS_APIC_VECTOR \
 	spurious_interrupt smp_spurious_interrupt
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 apicinterrupt LOCAL_PENDING_VECTOR \
 	perf_pending_interrupt smp_perf_pending_interrupt
 #endif
--- 12.2.orig/arch/x86/kernel/head-xen.c	2011-08-09 11:02:22.000000000 +0200
+++ 12.2/arch/x86/kernel/head-xen.c	2011-09-07 15:59:12.000000000 +0200
@@ -57,9 +57,9 @@ void __init reserve_ebda_region(void)
 #else /* CONFIG_XEN */
 #include <linux/module.h>
 #include <asm/fixmap.h>
+#include <asm/mc146818rtc.h>
 #include <asm/pgtable.h>
 #include <asm/sections.h>
-#include <asm/setup_arch.h>
 #include <xen/interface/callback.h>
 #include <xen/interface/memory.h>
 
@@ -151,9 +151,13 @@ void __init xen_start_kernel(void)
 				     virt_to_machine(empty_zero_page),
 				     PAGE_KERNEL_RO);
 
+	if (is_initial_xendomain()) {
+		x86_platform.get_wallclock = mach_get_cmos_time;
+		x86_platform.set_wallclock = mach_set_rtc_mmss;
+	}
 }
 
-void __init machine_specific_arch_setup(void)
+void __init xen_arch_setup(void)
 {
 	int ret;
 	static const struct callback_register __initconst event = {
--- 12.2.orig/arch/x86/kernel/head32-xen.c	2011-05-09 11:39:22.000000000 +0200
+++ 12.2/arch/x86/kernel/head32-xen.c	2011-05-09 11:40:41.000000000 +0200
@@ -10,11 +10,25 @@
 #include <linux/start_kernel.h>
 
 #include <asm/setup.h>
-#include <asm/setup_arch.h>
 #include <asm/sections.h>
 #include <asm/e820.h>
 #include <asm/pgtable.h>
 #include <asm/trampoline.h>
+#include <asm/apic.h>
+#include <asm/io_apic.h>
+
+static void __init i386_default_early_setup(void)
+{
+	/* Initialize 32bit specific setup functions */
+	if (is_initial_xendomain())
+		x86_init.resources.probe_roms = probe_roms;
+	x86_init.resources.reserve_resources = i386_reserve_resources;
+#ifndef CONFIG_XEN
+	x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
+
+	reserve_ebda_region();
+#endif
+}
 
 void __init i386_start_kernel(void)
 {
@@ -48,7 +62,16 @@ void __init i386_start_kernel(void)
 		reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
 	}
 #endif
-	reserve_ebda_region();
+
+	/* Call the subarch specific early setup function */
+	switch (boot_params.hdr.hardware_subarch) {
+	case X86_SUBARCH_MRST:
+		x86_mrst_early_setup();
+		break;
+	default:
+		i386_default_early_setup();
+		break;
+	}
 #else
 	{
 		int max_cmdline;
@@ -59,6 +82,7 @@ void __init i386_start_kernel(void)
 		boot_command_line[max_cmdline-1] = '\0';
 	}
 
+	i386_default_early_setup();
 	xen_start_kernel();
 #endif
 
--- 12.2.orig/arch/x86/kernel/head64-xen.c	2011-08-09 10:59:59.000000000 +0200
+++ 12.2/arch/x86/kernel/head64-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -20,15 +20,14 @@
 #include <asm/proto.h>
 #include <asm/smp.h>
 #include <asm/setup.h>
-#include <asm/setup_arch.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/kdebug.h>
 #include <asm/e820.h>
-#include <asm/bios_ebda.h>
 #include <asm/trampoline.h>
+#include <asm/bios_ebda.h>
 
 #ifndef CONFIG_XEN
 static void __init zap_identity_mappings(void)
--- 12.2.orig/arch/x86/kernel/head_32-xen.S	2011-08-09 11:03:22.000000000 +0200
+++ 12.2/arch/x86/kernel/head_32-xen.S	2011-08-09 11:03:50.000000000 +0200
@@ -31,7 +31,7 @@
 #define X86_CAPABILITY	new_cpu_data+CPUINFO_x86_capability
 #define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
 
-.section .text.head,"ax",@progbits
+__HEAD
 #define VIRT_ENTRY_OFFSET 0x0
 .org VIRT_ENTRY_OFFSET
 ENTRY(startup_32)
@@ -68,7 +68,6 @@ ENTRY(startup_32)
 	 */
 	movl $per_cpu__gdt_page,%eax
 	movl $per_cpu__stack_canary,%ecx
-	subl $20, %ecx
 	movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
 	shrl $16, %ecx
 	movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
@@ -121,7 +120,7 @@ ENTRY(hypercall_page)
 /*
  * BSS section
  */
-.section ".bss.page_aligned","wa"
+__PAGE_ALIGNED_BSS
 	.align PAGE_SIZE_asm
 ENTRY(swapper_pg_fixmap)
 	.fill 1024,4,0
--- 12.2.orig/arch/x86/kernel/head_64-xen.S	2011-08-09 11:03:31.000000000 +0200
+++ 12.2/arch/x86/kernel/head_64-xen.S	2011-08-09 11:03:49.000000000 +0200
@@ -24,7 +24,7 @@
 #include <xen/interface/elfnote.h>
 #include <xen/interface/features.h>
 
-	.section .text.head, "ax", @progbits
+	__HEAD
 	.code64
 	.globl startup_64
 startup_64:
@@ -52,7 +52,7 @@ startup_64:
 
 #define NEXT_PAGE(name) \
 	.balign	PAGE_SIZE; \
-	phys_##name = . - .text.head; \
+	phys_##name = . - .head.text; \
 ENTRY(name)
 
 NEXT_PAGE(init_level4_pgt)
@@ -105,7 +105,7 @@ NEXT_PAGE(hypercall_page)
 
 #undef NEXT_PAGE
 
-	.section .bss.page_aligned, "aw", @nobits
+	__PAGE_ALIGNED_BSS
 	.align PAGE_SIZE
 ENTRY(empty_zero_page)
 	.skip PAGE_SIZE
--- 12.2.orig/arch/x86/kernel/irq-xen.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/kernel/irq-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -67,10 +67,10 @@ static int show_other_interrupts(struct 
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
 	seq_printf(p, "  Spurious interrupts\n");
-	seq_printf(p, "%*s: ", prec, "CNT");
+	seq_printf(p, "%*s: ", prec, "PMI");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
-	seq_printf(p, "  Performance counter interrupts\n");
+	seq_printf(p, "  Performance monitoring interrupts\n");
 	seq_printf(p, "%*s: ", prec, "PND");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
@@ -117,7 +117,7 @@ static int show_other_interrupts(struct 
 		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
 	seq_printf(p, "  Threshold APIC interrupts\n");
 #endif
-#ifdef CONFIG_X86_NEW_MCE
+#ifdef CONFIG_X86_MCE
 	seq_printf(p, "%*s: ", prec, "MCE");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
@@ -219,7 +219,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 #ifdef CONFIG_X86_MCE_THRESHOLD
 	sum += irq_stats(cpu)->irq_threshold_count;
 #endif
-#ifdef CONFIG_X86_NEW_MCE
+#ifdef CONFIG_X86_MCE
 	sum += per_cpu(mce_exception_count, cpu);
 	sum += per_cpu(mce_poll_count, cpu);
 #endif
--- 12.2.orig/arch/x86/kernel/ldt-xen.c	2011-02-01 14:42:26.000000000 +0100
+++ 12.2/arch/x86/kernel/ldt-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -70,8 +70,8 @@ static int alloc_ldt(mm_context_t *pc, i
 				    XENFEAT_writable_descriptor_tables);
 		load_LDT(pc);
 #ifdef CONFIG_SMP
-		if (!cpus_equal(current->mm->cpu_vm_mask,
-				cpumask_of_cpu(smp_processor_id())))
+		if (!cpumask_equal(mm_cpumask(current->mm),
+				   cpumask_of(smp_processor_id())))
 			smp_call_function(flush_ldt, current->mm, 1);
 		preempt_enable();
 #endif
--- 12.2.orig/arch/x86/kernel/microcode_core-xen.c	2011-12-01 15:33:41.000000000 +0100
+++ 12.2/arch/x86/kernel/microcode_core-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -97,8 +97,8 @@ static ssize_t microcode_write(struct fi
 {
 	ssize_t ret = -EINVAL;
 
-	if ((len >> PAGE_SHIFT) > num_physpages) {
-		pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
+	if ((len >> PAGE_SHIFT) > totalram_pages) {
+		pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
 		return ret;
  	}
 
@@ -121,7 +121,7 @@ static const struct file_operations micr
 static struct miscdevice microcode_dev = {
 	.minor			= MICROCODE_MINOR,
 	.name			= "microcode",
-	.devnode		= "cpu/microcode",
+	.nodename		= "cpu/microcode",
 	.fops			= &microcode_fops,
 };
 
--- 12.2.orig/arch/x86/kernel/mpparse-xen.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/kernel/mpparse-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -51,6 +51,13 @@ static int __init mpf_checksum(unsigned 
 	return sum & 0xFF;
 }
 
+#ifndef CONFIG_XEN
+int __init default_mpc_apic_id(struct mpc_cpu *m)
+{
+	return m->apicid;
+}
+#endif
+
 static void __init MP_processor_info(struct mpc_cpu *m)
 {
 #ifndef CONFIG_XEN
@@ -62,10 +69,7 @@ static void __init MP_processor_info(str
 		return;
 	}
 
-	if (x86_quirks->mpc_apic_id)
-		apicid = x86_quirks->mpc_apic_id(m);
-	else
-		apicid = m->apicid;
+	apicid = x86_init.mpparse.mpc_apic_id(m);
 
 	if (m->cpuflag & CPU_BOOTPROCESSOR) {
 		bootup_cpu = " (Bootup-CPU)";
@@ -80,16 +84,18 @@ static void __init MP_processor_info(str
 }
 
 #ifdef CONFIG_X86_IO_APIC
-static void __init MP_bus_info(struct mpc_bus *m)
+void __init default_mpc_oem_bus_info(struct mpc_bus *m, char *str)
 {
-	char str[7];
 	memcpy(str, m->bustype, 6);
 	str[6] = 0;
+	apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str);
+}
 
-	if (x86_quirks->mpc_oem_bus_info)
-		x86_quirks->mpc_oem_bus_info(m, str);
-	else
-		apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str);
+static void __init MP_bus_info(struct mpc_bus *m)
+{
+	char str[7];
+
+	x86_init.mpparse.mpc_oem_bus_info(m, str);
 
 #if MAX_MP_BUSSES < 256
 	if (m->busid >= MAX_MP_BUSSES) {
@@ -106,8 +112,8 @@ static void __init MP_bus_info(struct mp
 		mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
 #endif
 	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
-		if (x86_quirks->mpc_oem_pci_bus)
-			x86_quirks->mpc_oem_pci_bus(m);
+		if (x86_init.mpparse.mpc_oem_pci_bus)
+			x86_init.mpparse.mpc_oem_pci_bus(m);
 
 		clear_bit(m->busid, mp_bus_not_pci);
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
@@ -303,6 +309,8 @@ static void __init smp_dump_mptable(stru
 			1, mpc, mpc->length, 1);
 }
 
+void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { }
+
 static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 {
 	char str[16];
@@ -326,16 +334,13 @@ static int __init smp_read_mpc(struct mp
 	if (early)
 		return 1;
 
-	if (mpc->oemptr && x86_quirks->smp_read_mpc_oem) {
-		struct mpc_oemtable *oem_table = (void *)(long)mpc->oemptr;
-		x86_quirks->smp_read_mpc_oem(oem_table, mpc->oemsize);
-	}
+	if (mpc->oemptr)
+		x86_init.mpparse.smp_read_mpc_oem(mpc);
 
 	/*
 	 *      Now process the configuration blocks.
 	 */
-	if (x86_quirks->mpc_record)
-		*x86_quirks->mpc_record = 0;
+	x86_init.mpparse.mpc_record(0);
 
 	while (count < mpc->length) {
 		switch (*mpt) {
@@ -367,8 +372,7 @@ static int __init smp_read_mpc(struct mp
 			count = mpc->length;
 			break;
 		}
-		if (x86_quirks->mpc_record)
-			(*x86_quirks->mpc_record)++;
+		x86_init.mpparse.mpc_record(1);
 	}
 
 #ifdef CONFIG_X86_BIGSMP
@@ -496,11 +500,11 @@ static void __init construct_ioapic_tabl
 		MP_bus_info(&bus);
 	}
 
-	ioapic.type = MP_IOAPIC;
-	ioapic.apicid = 2;
-	ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
-	ioapic.flags = MPC_APIC_USABLE;
-	ioapic.apicaddr = 0xFEC00000;
+	ioapic.type	= MP_IOAPIC;
+	ioapic.apicid	= 2;
+	ioapic.apicver	= mpc_default_type > 4 ? 0x10 : 0x01;
+	ioapic.flags	= MPC_APIC_USABLE;
+	ioapic.apicaddr	= IO_APIC_DEFAULT_PHYS_BASE;
 	MP_ioapic_info(&ioapic);
 
 	/*
@@ -624,18 +628,18 @@ static int __init check_physptr(struct m
 /*
  * Scan the memory blocks for an SMP configuration block.
  */
-#ifndef CONFIG_XEN
-static void __init __get_smp_config(unsigned int early)
-#else
-void __init get_smp_config(void)
-#define early 0
-#endif
+void __init default_get_smp_config(unsigned int early)
 {
 	struct mpf_intel *mpf = mpf_found;
 
 	if (!mpf)
 		return;
 
+#ifdef CONFIG_XEN
+	BUG_ON(early);
+#define early 0
+#endif
+
 	if (acpi_lapic && early)
 		return;
 
@@ -646,11 +650,6 @@ void __init get_smp_config(void)
 	if (acpi_lapic && acpi_ioapic)
 		return;
 
-	if (x86_quirks->mach_get_smp_config) {
-		if (x86_quirks->mach_get_smp_config(early))
-			return;
-	}
-
 	printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
 	       mpf->specification);
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
@@ -695,16 +694,6 @@ void __init get_smp_config(void)
 }
 
 #ifndef CONFIG_XEN
-void __init early_get_smp_config(void)
-{
-	__get_smp_config(1);
-}
-
-void __init get_smp_config(void)
-{
-	__get_smp_config(0);
-}
-
 static void __init smp_reserve_bootmem(struct mpf_intel *mpf)
 {
 	unsigned long size = get_mpc_size(mpf->physptr);
@@ -775,16 +764,12 @@ static int __init smp_scan_config(unsign
 	return 0;
 }
 
-static void __init __find_smp_config(unsigned int reserve)
+void __init default_find_smp_config(unsigned int reserve)
 {
 #ifndef CONFIG_XEN
 	unsigned int address;
 #endif
 
-	if (x86_quirks->mach_find_smp_config) {
-		if (x86_quirks->mach_find_smp_config(reserve))
-			return;
-	}
 	/*
 	 * FIXME: Linux assumes you have 640K of base ram..
 	 * this continues the error...
@@ -821,16 +806,6 @@ static void __init __find_smp_config(uns
 #endif
 }
 
-void __init early_find_smp_config(void)
-{
-	__find_smp_config(0);
-}
-
-void __init find_smp_config(void)
-{
-	__find_smp_config(1);
-}
-
 #ifdef CONFIG_X86_IO_APIC
 static u8 __initdata irq_used[MAX_IRQ_SOURCES];
 
--- 12.2.orig/arch/x86/kernel/pci-dma-xen.c	2012-04-04 14:30:36.000000000 +0200
+++ 12.2/arch/x86/kernel/pci-dma-xen.c	2012-04-04 14:30:59.000000000 +0200
@@ -2,6 +2,7 @@
 #include <linux/dma-debug.h>
 #include <linux/bootmem.h>
 #include <linux/pci.h>
+#include <linux/kmemleak.h>
 
 #include <asm/proto.h>
 #include <asm/dma.h>
@@ -29,18 +30,23 @@ int no_iommu __initdata;
 /* Set this to 1 if there is a HW IOMMU in the system */
 int iommu_detected __read_mostly = 0;
 
-int iommu_pass_through;
+/*
+ * This variable becomes 1 if iommu=pt is passed on the kernel command line.
+ * If this variable is 1, IOMMU implementations do no DMA translation for
+ * devices and allow every device to access to whole physical memory. This is
+ * useful if a user want to use an IOMMU only for KVM device assignment to
+ * guests and not for driver dma translation.
+ */
+int iommu_pass_through __read_mostly;
 #endif
 
 dma_addr_t bad_dma_address __read_mostly = 0;
 EXPORT_SYMBOL(bad_dma_address);
 
-/* Dummy device used for NULL arguments (normally ISA). Better would
-   be probably a smaller DMA mask, but this is bug-to-bug compatible
-   to older i386. */
+/* Dummy device used for NULL arguments (normally ISA). */
 struct device x86_dma_fallback_dev = {
 	.init_name = "fallback device",
-	.coherent_dma_mask = DMA_BIT_MASK(32),
+	.coherent_dma_mask = ISA_DMA_BIT_MASK,
 	.dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
 };
 EXPORT_SYMBOL(x86_dma_fallback_dev);
@@ -86,6 +92,11 @@ void __init dma32_reserve_bootmem(void)
 	size = roundup(dma32_bootmem_size, align);
 	dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
 				 512ULL<<20);
+	/*
+	 * Kmemleak should not scan this block as it may not be mapped via the
+	 * kernel direct mapping.
+	 */
+	kmemleak_ignore(dma32_bootmem_ptr);
 	if (dma32_bootmem_ptr)
 		dma32_bootmem_size = size;
 	else
@@ -176,7 +187,7 @@ again:
 
 #ifndef CONFIG_XEN
 	addr = page_to_phys(page);
-	if (!is_buffer_dma_capable(dma_mask, addr, size)) {
+	if (addr + size > dma_mask) {
 		__free_pages(page, order);
 
 		if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
@@ -265,10 +276,8 @@ static __init int iommu_setup(char *p)
 			swiotlb = 1;
 #endif
 #ifndef CONFIG_XEN
-		if (!strncmp(p, "pt", 2)) {
+		if (!strncmp(p, "pt", 2))
 			iommu_pass_through = 1;
-			return 1;
-		}
 
 		gart_parse_options(p);
 #endif
@@ -381,7 +390,7 @@ void pci_iommu_shutdown(void)
 	amd_iommu_shutdown();
 }
 /* Must execute after PCI subsystem */
-fs_initcall(pci_iommu_init);
+rootfs_initcall(pci_iommu_init);
 
 #ifdef CONFIG_PCI
 /* Many VIA bridges seem to corrupt data for DAC. Disable it here */
--- 12.2.orig/arch/x86/kernel/pci-nommu-xen.c	2012-04-04 14:08:52.000000000 +0200
+++ 12.2/arch/x86/kernel/pci-nommu-xen.c	2012-04-04 14:09:25.000000000 +0200
@@ -34,7 +34,7 @@ gnttab_map_sg(struct device *hwdev, stru
 		sg->dma_address =
 			gnttab_dma_map_page(sg_page(sg)) + sg->offset;
 		sg->dma_length  = sg->length;
-		IOMMU_BUG_ON(address_needs_mapping(
+		IOMMU_BUG_ON(!dma_capable(
 			hwdev, sg->dma_address, sg->length));
 		IOMMU_BUG_ON(range_straddles_page_boundary(
 			page_to_pseudophys(sg_page(sg)) + sg->offset,
@@ -67,7 +67,7 @@ gnttab_map_page(struct device *dev, stru
 	dma = gnttab_dma_map_page(page) + offset;
 	IOMMU_BUG_ON(range_straddles_page_boundary(page_to_pseudophys(page) +
 						   offset, size));
-	IOMMU_BUG_ON(address_needs_mapping(dev, dma, size));
+	IOMMU_BUG_ON(!dma_capable(dev, dma, size));
 
 	return dma;
 }
@@ -79,19 +79,36 @@ gnttab_unmap_page(struct device *dev, dm
 	gnttab_dma_unmap_page(dma_addr);
 }
 
+static void nommu_sync_single_for_device(struct device *dev,
+			dma_addr_t addr, size_t size,
+			enum dma_data_direction dir)
+{
+	flush_write_buffers();
+}
+
+
+static void nommu_sync_sg_for_device(struct device *dev,
+			struct scatterlist *sg, int nelems,
+			enum dma_data_direction dir)
+{
+	flush_write_buffers();
+}
+
 static int nommu_dma_supported(struct device *hwdev, u64 mask)
 {
 	return 1;
 }
 
 struct dma_map_ops nommu_dma_ops = {
-	.alloc_coherent	= dma_generic_alloc_coherent,
-	.free_coherent	= dma_generic_free_coherent,
-	.map_page	= gnttab_map_page,
-	.unmap_page	= gnttab_unmap_page,
-	.map_sg		= gnttab_map_sg,
-	.unmap_sg	= gnttab_unmap_sg,
-	.dma_supported	= nommu_dma_supported,
+	.alloc_coherent		= dma_generic_alloc_coherent,
+	.free_coherent		= dma_generic_free_coherent,
+	.map_page		= gnttab_map_page,
+	.unmap_page		= gnttab_unmap_page,
+	.map_sg			= gnttab_map_sg,
+	.unmap_sg		= gnttab_unmap_sg,
+	.sync_single_for_device = nommu_sync_single_for_device,
+	.sync_sg_for_device	= nommu_sync_sg_for_device,
+	.dma_supported		= nommu_dma_supported,
 };
 
 void __init no_iommu_init(void)
--- 12.2.orig/arch/x86/kernel/process-xen.c	2011-03-03 16:07:25.000000000 +0100
+++ 12.2/arch/x86/kernel/process-xen.c	2011-03-03 16:07:49.000000000 +0100
@@ -9,7 +9,7 @@
 #include <linux/pm.h>
 #include <linux/clockchips.h>
 #include <linux/random.h>
-#include <trace/power.h>
+#include <trace/events/power.h>
 #include <asm/system.h>
 #include <asm/apic.h>
 #include <asm/syscalls.h>
@@ -26,9 +26,6 @@ EXPORT_SYMBOL(idle_nomwait);
 
 struct kmem_cache *task_xstate_cachep;
 
-DEFINE_TRACE(power_start);
-DEFINE_TRACE(power_end);
-
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
 	*dst = *src;
@@ -256,9 +253,7 @@ EXPORT_SYMBOL(pm_idle);
  */
 void xen_idle(void)
 {
-	struct power_trace it;
-
-	trace_power_start(&it, POWER_CSTATE, 1);
+	trace_power_start(POWER_CSTATE, 1);
 	current_thread_info()->status &= ~TS_POLLING;
 	/*
 	 * TS_POLLING-cleared state must be visible before we
@@ -271,7 +266,6 @@ void xen_idle(void)
 	else
 		local_irq_enable();
 	current_thread_info()->status |= TS_POLLING;
-	trace_power_end(&it);
 }
 #ifdef CONFIG_APM_MODULE
 EXPORT_SYMBOL(default_idle);
@@ -325,9 +319,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
  */
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
-	struct power_trace it;
-
-	trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
+	trace_power_start(POWER_CSTATE, (ax>>4)+1);
 	if (!need_resched()) {
 		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
@@ -337,15 +329,13 @@ void mwait_idle_with_hints(unsigned long
 		if (!need_resched())
 			__mwait(ax, cx);
 	}
-	trace_power_end(&it);
 }
 
 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
 static void mwait_idle(void)
 {
-	struct power_trace it;
 	if (!need_resched()) {
-		trace_power_start(&it, POWER_CSTATE, 1);
+		trace_power_start(POWER_CSTATE, 1);
 		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
@@ -355,7 +345,6 @@ static void mwait_idle(void)
 			__sti_mwait(0, 0);
 		else
 			local_irq_enable();
-		trace_power_end(&it);
 	} else
 		local_irq_enable();
 }
@@ -368,13 +357,11 @@ static void mwait_idle(void)
  */
 static void poll_idle(void)
 {
-	struct power_trace it;
-
-	trace_power_start(&it, POWER_CSTATE, 0);
+	trace_power_start(POWER_CSTATE, 0);
 	local_irq_enable();
 	while (!need_resched())
 		cpu_relax();
-	trace_power_end(&it);
+	trace_power_end(0);
 }
 
 #ifndef CONFIG_XEN
@@ -527,10 +514,8 @@ void __init init_c1e_mask(void)
 {
 #ifndef CONFIG_XEN
 	/* If we're using c1e_idle, we need to allocate c1e_mask. */
-	if (pm_idle == c1e_idle) {
-		alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
-		cpumask_clear(c1e_mask);
-	}
+	if (pm_idle == c1e_idle)
+		zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
 #endif
 }
 
--- 12.2.orig/arch/x86/kernel/process_32-xen.c	2012-02-29 14:15:29.000000000 +0100
+++ 12.2/arch/x86/kernel/process_32-xen.c	2012-02-29 14:15:56.000000000 +0100
@@ -64,9 +64,6 @@
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork");
 
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
 /*
  * Return saved PC of a blocked thread.
  */
@@ -360,6 +357,7 @@ __switch_to(struct task_struct *prev_p, 
 #ifndef CONFIG_X86_NO_TSS
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 #endif
+	bool preload_fpu;
 #if CONFIG_XEN_COMPAT > 0x030002
 	struct physdev_set_iopl iopl_op;
 	struct physdev_set_iobitmap iobmp_op;
@@ -373,15 +371,24 @@ __switch_to(struct task_struct *prev_p, 
 	/* XEN NOTE: FS/GS saved in switch_mm(), not here. */
 
 	/*
+	 * If the task has used fpu the last 5 timeslices, just do a full
+	 * restore of the math state immediately to avoid the trap; the
+	 * chances of needing FPU soon are obviously high now
+	 */
+	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
+
+	/*
 	 * This is basically '__unlazy_fpu', except that we queue a
 	 * multicall to indicate FPU task switch, rather than
 	 * synchronously trapping to Xen.
 	 */
 	if (task_thread_info(prev_p)->status & TS_USEDFPU) {
 		__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
-		mcl->op      = __HYPERVISOR_fpu_taskswitch;
-		mcl->args[0] = 1;
-		mcl++;
+		if (!preload_fpu) {
+			mcl->op      = __HYPERVISOR_fpu_taskswitch;
+			mcl->args[0] = 1;
+			mcl++;
+		}
 	}
 #if 0 /* lazy fpu sanity check */
 	else BUG_ON(!(read_cr0() & 8));
@@ -427,6 +434,14 @@ __switch_to(struct task_struct *prev_p, 
 		mcl++;
 	}
 
+	/* If we're going to preload the fpu context, make sure clts
+	   is run while we're batching the cpu state updates. */
+	if (preload_fpu) {
+		mcl->op      = __HYPERVISOR_fpu_taskswitch;
+		mcl->args[0] = 0;
+		mcl++;
+	}
+
 	if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
 		set_xen_guest_handle(iobmp_op.bitmap,
 				     (char *)next->io_bitmap_ptr);
@@ -451,7 +466,7 @@ __switch_to(struct task_struct *prev_p, 
 		BUG();
 
 	/* we're going to use this soon, after a few expensive things */
-	if (next_p->fpu_counter > 5)
+	if (preload_fpu)
 		prefetch(next->xstate);
 
 	/*
@@ -470,15 +485,8 @@ __switch_to(struct task_struct *prev_p, 
 	 */
 	arch_end_context_switch(next_p);
 
-	/* If the task has used fpu the last 5 timeslices, just do a full
-	 * restore of the math state immediately to avoid the trap; the
-	 * chances of needing FPU soon are obviously high now
-	 *
-	 * tsk_used_math() checks prevent calling math_state_restore(),
-	 * which can sleep in the case of !tsk_used_math()
-	 */
-	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
-		math_state_restore();
+	if (preload_fpu)
+		__math_state_restore();
 
 	/*
 	 * Restore %gs if needed (which is common)
--- 12.2.orig/arch/x86/kernel/process_64-xen.c	2011-02-02 08:37:47.000000000 +0100
+++ 12.2/arch/x86/kernel/process_64-xen.c	2011-02-02 08:37:59.000000000 +0100
@@ -60,9 +60,6 @@
 
 asmlinkage extern void ret_from_fork(void);
 
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
 static DEFINE_PER_CPU(unsigned char, is_idle);
 
 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
@@ -395,6 +392,7 @@ __switch_to(struct task_struct *prev_p, 
 #ifndef CONFIG_X86_NO_TSS
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 #endif
+	bool preload_fpu;
 #if CONFIG_XEN_COMPAT > 0x030002
 	struct physdev_set_iopl iopl_op;
 	struct physdev_set_iobitmap iobmp_op;
@@ -405,8 +403,15 @@ __switch_to(struct task_struct *prev_p, 
 #endif
 	multicall_entry_t _mcl[8], *mcl = _mcl;
 
+	/*
+	 * If the task has used fpu the last 5 timeslices, just do a full
+	 * restore of the math state immediately to avoid the trap; the
+	 * chances of needing FPU soon are obviously high now
+	 */
+	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
+
 	/* we're going to use this soon, after a few expensive things */
-	if (next_p->fpu_counter > 5)
+	if (preload_fpu)
 		prefetch(next->xstate);
 
 	/*
@@ -418,12 +423,21 @@ __switch_to(struct task_struct *prev_p, 
 	 */
 	if (task_thread_info(prev_p)->status & TS_USEDFPU) {
 		__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
-		mcl->op      = __HYPERVISOR_fpu_taskswitch;
-		mcl->args[0] = 1;
-		mcl++;
+		if (!preload_fpu) {
+			mcl->op      = __HYPERVISOR_fpu_taskswitch;
+			mcl->args[0] = 1;
+			mcl++;
+		}
 	} else
 		prev_p->fpu_counter = 0;
 
+	/* Make sure cpu is ready for new context */
+	if (preload_fpu) {
+		mcl->op      = __HYPERVISOR_fpu_taskswitch;
+		mcl->args[0] = 0;
+		mcl++;
+	}
+
 	/*
 	 * Reload sp0.
 	 * This is load_sp0(tss, next) with a multicall.
@@ -541,15 +555,12 @@ __switch_to(struct task_struct *prev_p, 
 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev_p, next_p);
 
-	/* If the task has used fpu the last 5 timeslices, just do a full
-	 * restore of the math state immediately to avoid the trap; the
-	 * chances of needing FPU soon are obviously high now
-	 *
-	 * tsk_used_math() checks prevent calling math_state_restore(),
-	 * which can sleep in the case of !tsk_used_math()
+	/*
+	 * Preload the FPU context, now that we've determined that the
+	 * task is likely to be using it.
 	 */
-	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
-		math_state_restore();
+	if (preload_fpu)
+		__math_state_restore();
 	return prev_p;
 }
 
--- 12.2.orig/arch/x86/kernel/setup-xen.c	2012-06-06 14:02:35.000000000 +0200
+++ 12.2/arch/x86/kernel/setup-xen.c	2012-06-06 14:03:17.000000000 +0200
@@ -27,6 +27,7 @@
 #include <linux/screen_info.h>
 #include <linux/ioport.h>
 #include <linux/acpi.h>
+#include <linux/sfi.h>
 #include <linux/apm_bios.h>
 #include <linux/initrd.h>
 #include <linux/bootmem.h>
@@ -66,6 +67,7 @@
 
 #include <linux/percpu.h>
 #include <linux/crash_dump.h>
+#include <linux/tboot.h>
 
 #include <video/edid.h>
 
@@ -138,10 +140,6 @@ start_info_t *xen_start_info;
 EXPORT_SYMBOL(xen_start_info);
 #endif
 
-#ifndef ARCH_SETUP
-#define ARCH_SETUP
-#endif
-
 /*
  * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
  * The direct mapping extends to max_pfn_mapped, so that we can directly access
@@ -164,9 +162,9 @@ int default_cpu_present_to_apicid(int mp
 	return __default_cpu_present_to_apicid(mps_cpu);
 }
 
-int default_check_phys_apicid_present(int boot_cpu_physical_apicid)
+int default_check_phys_apicid_present(int phys_apicid)
 {
-	return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
+	return __default_check_phys_apicid_present(phys_apicid);
 }
 #endif
 
@@ -203,13 +201,6 @@ static struct resource bss_resource = {
 
 
 #ifdef CONFIG_X86_32
-static struct resource video_ram_resource = {
-	.name	= "Video RAM area",
-	.start	= 0xa0000,
-	.end	= 0xbffff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
 /* cpu data as detected by the assembly code in head.S */
 struct cpuinfo_x86 new_cpu_data __cpuinitdata = { .wp_works_ok = 1, .hard_math = 1 };
 /* common cpu data for all cpus */
@@ -670,7 +661,7 @@ static struct resource standard_io_resou
 		.flags = IORESOURCE_BUSY | IORESOURCE_IO }
 };
 
-static void __init reserve_standard_io_resources(void)
+void __init reserve_standard_io_resources(void)
 {
 	int i;
 
@@ -706,10 +697,6 @@ static int __init setup_elfcorehdr(char 
 early_param("elfcorehdr", setup_elfcorehdr);
 #endif
 
-static struct x86_quirks default_x86_quirks __initdata;
-
-struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
-
 #ifdef CONFIG_X86_RESERVE_LOW_64K
 static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
 {
@@ -742,6 +729,13 @@ static struct dmi_system_id __initdata b
 		},
 	},
 	{
+		.callback = dmi_low_memory_corruption,
+		.ident = "Phoenix/MSC BIOS",
+		.matches = {
+			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
+		},
+	},
+	{
 	/*
 	 * AMI BIOS with low memory corruption was found on Intel DG45ID board.
 	 * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
@@ -868,7 +862,7 @@ void __init setup_arch(char **cmdline_p)
 	copy_edid();
 #endif /* CONFIG_XEN */
 
-	ARCH_SETUP
+	x86_init.oem.arch_setup();
 
 	setup_memory_map();
 	parse_setup_data();
@@ -909,6 +903,16 @@ void __init setup_arch(char **cmdline_p)
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 
+#ifdef CONFIG_X86_64
+	/*
+	 * Must call this twice: Once just to detect whether hardware doesn't
+	 * support NX (so that the early EHCI debug console setup can safely
+	 * call set_fixmap(), and then again after parsing early parameters to
+	 * honor the respective command line option.
+	 */
+	check_efer();
+#endif
+
 	parse_early_param();
 
 #ifdef CONFIG_X86_64
@@ -948,12 +952,9 @@ void __init setup_arch(char **cmdline_p)
 	 * VMware detection requires dmi to be available, so this
 	 * needs to be done after dmi_scan_machine, for the BP.
 	 */
-	init_hypervisor(&boot_cpu_data);
+	init_hypervisor_platform();
 
-#ifdef CONFIG_X86_32
-	if (is_initial_xendomain())
-		probe_roms();
-#endif
+	x86_init.resources.probe_roms();
 
 #ifndef CONFIG_XEN
 	/* after parse_early_param, so could debug it */
@@ -1106,10 +1107,11 @@ void __init setup_arch(char **cmdline_p)
 	kvmclock_init();
 #endif
 
-	xen_pagetable_setup_start(swapper_pg_dir);
+	x86_init.paging.pagetable_setup_start(swapper_pg_dir);
 	paging_init();
-	xen_pagetable_setup_done(swapper_pg_dir);
-	paravirt_post_allocator_init();
+	x86_init.paging.pagetable_setup_done(swapper_pg_dir);
+
+	tboot_probe();
 
 #ifdef CONFIG_X86_64
 	map_vsyscall();
@@ -1196,13 +1198,13 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	acpi_boot_init();
 
-#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
+	sfi_init();
+
 	/*
 	 * get boot-time SMP configuration:
 	 */
 	if (smp_found_config)
 		get_smp_config();
-#endif
 
 	prefill_possible_map();
 
@@ -1226,11 +1228,7 @@ void __init setup_arch(char **cmdline_p)
 		e820_reserve_resources();
 #endif
 
-#ifdef CONFIG_X86_32
-	if (is_initial_xendomain())
-		request_resource(&iomem_resource, &video_ram_resource);
-#endif
-	reserve_standard_io_resources();
+	x86_init.resources.reserve_resources();
 
 #ifdef CONFIG_XEN
 	if (is_initial_xendomain())
@@ -1251,80 +1249,25 @@ void __init setup_arch(char **cmdline_p)
 		conswitchp = &vga_con;
 #endif
 #endif
+	x86_init.oem.banner();
 }
 
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
-
-/**
- * x86_quirk_intr_init - post gate setup interrupt initialisation
- *
- * Description:
- *	Fill in any interrupts that may have been left out by the general
- *	init_IRQ() routine.  interrupts having to do with the machine rather
- *	than the devices on the I/O bus (like APIC interrupts in intel MP
- *	systems) are started here.
- **/
-void __init x86_quirk_intr_init(void)
-{
-	if (x86_quirks->arch_intr_init) {
-		if (x86_quirks->arch_intr_init())
-			return;
-	}
-}
-
-/**
- * x86_quirk_trap_init - initialise system specific traps
- *
- * Description:
- *	Called as the final act of trap_init().  Used in VISWS to initialise
- *	the various board specific APIC traps.
- **/
-void __init x86_quirk_trap_init(void)
-{
-	if (x86_quirks->arch_trap_init) {
-		if (x86_quirks->arch_trap_init())
-			return;
-	}
-}
+#ifdef CONFIG_X86_32
 
-static struct irqaction irq0  = {
-	.handler = timer_interrupt,
-	.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
-	.name = "timer"
+static struct resource video_ram_resource = {
+	.name	= "Video RAM area",
+	.start	= 0xa0000,
+	.end	= 0xbffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
 };
 
-/**
- * x86_quirk_pre_time_init - do any specific initialisations before.
- *
- **/
-void __init x86_quirk_pre_time_init(void)
+void __init i386_reserve_resources(void)
 {
-	if (x86_quirks->arch_pre_time_init)
-		x86_quirks->arch_pre_time_init();
+	if (is_initial_xendomain())
+		request_resource(&iomem_resource, &video_ram_resource);
+	reserve_standard_io_resources();
 }
 
-/**
- * x86_quirk_time_init - do any specific initialisations for the system timer.
- *
- * Description:
- *	Must plug the system timer interrupt source at HZ into the IRQ listed
- *	in irq_vectors.h:TIMER_IRQ
- **/
-void __init x86_quirk_time_init(void)
-{
-	if (x86_quirks->arch_time_init) {
-		/*
-		 * A nonzero return code does not mean failure, it means
-		 * that the architecture quirk does not want any
-		 * generic (timer) setup to be performed after this:
-		 */
-		if (x86_quirks->arch_time_init())
-			return;
-	}
-
-	irq0.mask = cpumask_of_cpu(0);
-	setup_irq(0, &irq0);
-}
 #endif /* CONFIG_X86_32 */
 
 #ifdef CONFIG_XEN
--- 12.2.orig/arch/x86/platform/sfi/sfi.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/arch/x86/platform/sfi/sfi.c	2011-02-02 08:45:00.000000000 +0100
@@ -32,6 +32,7 @@
 #include <asm/apic.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
+#ifndef CONFIG_XEN
 static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
 
 /* All CPUs enumerated by SFI must be present and enabled */
@@ -47,6 +48,9 @@ static void __cpuinit mp_sfi_register_la
 
 	generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR)));
 }
+#else
+#define mp_sfi_register_lapic(id)
+#endif
 
 static int __init sfi_parse_cpus(struct sfi_table_header *table)
 {
@@ -86,9 +90,12 @@ static int __init sfi_parse_ioapic(struc
 		pentry++;
 	}
 
+#ifndef CONFIG_XEN
 	WARN(pic_mode, KERN_WARNING
 		"SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n");
 	pic_mode = 0;
+#endif
+
 	return 0;
 }
 #endif /* CONFIG_X86_IO_APIC */
--- 12.2.orig/arch/x86/kernel/time-xen.c	2012-02-10 13:28:24.000000000 +0100
+++ 12.2/arch/x86/kernel/time-xen.c	2012-02-10 13:28:49.000000000 +0100
@@ -1,31 +1,12 @@
 /*
- *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ *  Copyright (c) 1991,1992,1995  Linus Torvalds
+ *  Copyright (c) 1994  Alan Modra
+ *  Copyright (c) 1995  Markus Kuhn
+ *  Copyright (c) 1996  Ingo Molnar
+ *  Copyright (c) 1998  Andrea Arcangeli
+ *  Copyright (c) 2002,2006  Vojtech Pavlik
+ *  Copyright (c) 2003  Andi Kleen
  *
- * This file contains the PC-specific time handling details:
- * reading the RTC at bootup, etc..
- * 1994-07-02    Alan Modra
- *	fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
- * 1995-03-26    Markus Kuhn
- *      fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
- *      precision CMOS clock update
- * 1996-05-03    Ingo Molnar
- *      fixed time warps in do_[slow|fast]_gettimeoffset()
- * 1997-09-10	Updated NTP code according to technical memorandum Jan '96
- *		"A Kernel Model for Precision Timekeeping" by Dave Mills
- * 1998-09-05    (Various)
- *	More robust do_fast_gettimeoffset() algorithm implemented
- *	(works with APM, Cyrix 6x86MX and Centaur C6),
- *	monotonic gettimeofday() with fast_get_timeoffset(),
- *	drift-proof precision TSC calibration on boot
- *	(C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
- *	Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
- *	ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
- * 1998-12-16    Andrea Arcangeli
- *	Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
- *	because was not accounting lost_ticks.
- * 1998-12-24 Copyright (C) 1998  Andrea Arcangeli
- *	Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
- *	serialize accesses to xtime/lost_ticks).
  */
 
 #include <linux/init.h>
@@ -38,6 +19,7 @@
 #include <linux/cpufreq.h>
 #include <linux/clocksource.h>
 
+#include <asm/vsyscall.h>
 #include <asm/delay.h>
 #include <asm/time.h>
 #include <asm/timer.h>
@@ -51,7 +33,6 @@ DEFINE_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
 
 #ifdef CONFIG_X86_64
-#include <asm/vsyscall.h>
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 #endif
 
@@ -99,20 +80,6 @@ static DECLARE_WORK(clock_was_set_work, 
  */
 #define clobber_induction_variable(v) asm ( "" : "+r" (v) )
 
-static inline void __normalize_time(time_t *sec, s64 *nsec)
-{
-	while (*nsec >= NSEC_PER_SEC) {
-		clobber_induction_variable(*nsec);
-		(*nsec) -= NSEC_PER_SEC;
-		(*sec)++;
-	}
-	while (*nsec < 0) {
-		clobber_induction_variable(*nsec);
-		(*nsec) += NSEC_PER_SEC;
-		(*sec)--;
-	}
-}
-
 /* Does this guest OS track Xen time, or set its wall clock independently? */
 static int independent_wallclock = 0;
 static int __init __independent_wallclock(char *str)
@@ -272,7 +239,7 @@ static inline int time_values_up_to_date
 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
 int xen_update_wallclock(const struct timespec *tv)
 {
-	time_t sec;
+	struct timespec now;
 	s64 nsec;
 	unsigned int cpu;
 	struct shadow_time_info *shadow;
@@ -281,11 +248,6 @@ int xen_update_wallclock(const struct ti
 	if (!is_initial_xendomain() || independent_wallclock)
 		return -EPERM;
 
-	if (!tv) {
-		mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
-		return 0;
-	}
-
 	cpu = get_cpu();
 	shadow = &per_cpu(shadow_time, cpu);
 
@@ -300,12 +262,11 @@ int xen_update_wallclock(const struct ti
 			break;
 		get_time_values_from_xen(cpu);
 	}
-	sec = tv->tv_sec;
-	__normalize_time(&sec, &nsec);
+	set_normalized_timespec(&now, tv->tv_sec, nsec);
 
 	op.cmd = XENPF_settime;
-	op.u.settime.secs        = sec;
-	op.u.settime.nsecs       = nsec;
+	op.u.settime.secs        = now.tv_sec;
+	op.u.settime.nsecs       = now.tv_nsec;
 	op.u.settime.system_time = shadow->system_timestamp;
 	WARN_ON(HYPERVISOR_platform_op(&op));
 	update_wallclock(false);
@@ -319,8 +280,7 @@ static void sync_xen_wallclock(unsigned 
 static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0);
 static void sync_xen_wallclock(unsigned long dummy)
 {
-	time_t sec;
-	s64 nsec;
+	struct timespec now;
 	struct xen_platform_op op;
 
 	BUG_ON(!is_initial_xendomain());
@@ -329,13 +289,11 @@ static void sync_xen_wallclock(unsigned 
 
 	write_seqlock_irq(&xtime_lock);
 
-	sec  = xtime.tv_sec;
-	nsec = xtime.tv_nsec;
-	__normalize_time(&sec, &nsec);
+	set_normalized_timespec(&now, xtime.tv_sec, xtime.tv_nsec);
 
 	op.cmd = XENPF_settime;
-	op.u.settime.secs        = sec;
-	op.u.settime.nsecs       = nsec;
+	op.u.settime.secs        = now.tv_sec;
+	op.u.settime.nsecs       = now.tv_nsec;
 	op.u.settime.system_time = processed_system_time;
 	WARN_ON(HYPERVISOR_platform_op(&op));
 
@@ -389,6 +347,18 @@ unsigned long xen_read_wallclock(void)
 	return delta;
 }
 
+int xen_write_wallclock(unsigned long now)
+{
+	if (!is_initial_xendomain() || independent_wallclock)
+		return 0;
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+	mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
+#endif
+
+	return mach_set_rtc_mmss(now);
+}
+
 /*
  * Runstate accounting
  */
@@ -448,38 +418,33 @@ unsigned long profile_pc(struct pt_regs 
 {
 	unsigned long pc = instruction_pointer(regs);
 
-#if defined(CONFIG_SMP) || defined(__x86_64__)
 	if (!user_mode_vm(regs) && in_lock_functions(pc)) {
-# ifdef CONFIG_FRAME_POINTER
+#ifdef CONFIG_FRAME_POINTER
 		return *(unsigned long *)(regs->bp + sizeof(long));
-# else
-#  ifdef __i386__
-		unsigned long *sp = (unsigned long *)&regs->sp;
-#  else
-		unsigned long *sp = (unsigned long *)regs->sp;
-#  endif
-
-		/* Return address is either directly at stack pointer
-		   or above a saved flags. Eflags has bits 22-31 zero,
-		   kernel addresses don't. */
+#else
+		unsigned long *sp =
+			(unsigned long *)kernel_stack_pointer(regs);
+
+		/*
+		 * Return address is either directly at stack pointer
+		 * or above a saved flags. Eflags has bits 22-31 zero,
+		 * kernel addresses don't.
+		 */
 		if (sp[0] >> 22)
 			return sp[0];
 		if (sp[1] >> 22)
 			return sp[1];
-# endif
-	}
 #endif
+	}
 
 	return pc;
 }
 EXPORT_SYMBOL(profile_pc);
 
 /*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
+ * Default timer interrupt handler
  */
-irqreturn_t timer_interrupt(int irq, void *dev_id)
+static irqreturn_t timer_interrupt(int irq, void *dev_id)
 {
 	s64 delta, delta_cpu, stolen, blocked;
 	unsigned int i, cpu = smp_processor_id();
@@ -605,8 +570,7 @@ irqreturn_t timer_interrupt(int irq, voi
 
 	/* Local timer processing (see update_process_times()). */
 	run_local_timers();
-	if (rcu_pending(cpu))
-		rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
+	rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
 	printk_tick();
 	scheduler_tick();
 	run_posix_cpu_timers(current);
--- 12.2.orig/arch/x86/kernel/traps-xen.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/kernel/traps-xen.c	2012-06-20 12:16:30.000000000 +0200
@@ -14,7 +14,6 @@
 #include <linux/spinlock.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
-#include <linux/utsname.h>
 #include <linux/kdebug.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -59,12 +58,12 @@
 #include <asm/mach_traps.h>
 
 #ifdef CONFIG_X86_64
+#include <asm/x86_init.h>
 #include <asm/pgalloc.h>
 #include <asm/proto.h>
 #else
 #include <asm/processor-flags.h>
 #include <asm/setup.h>
-#include <asm/traps.h>
 
 asmlinkage int system_call(void);
 
@@ -74,11 +73,9 @@ char ignore_fpu_irq;
 #ifndef CONFIG_X86_NO_IDT
 /*
  * The IDT has to be page-aligned to simplify the Pentium
- * F0 0F bug workaround.. We have a special link segment
- * for this.
+ * F0 0F bug workaround.
  */
-gate_desc idt_table[256]
-	__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
+gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
 #endif
 #endif
 
@@ -780,27 +777,6 @@ do_spurious_interrupt_bug(struct pt_regs
 #endif
 }
 
-#ifdef CONFIG_X86_32
-unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
-{
-	struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
-	unsigned long base = (kesp - uesp) & -THREAD_SIZE;
-	unsigned long new_kesp = kesp - base;
-	unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
-	__u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
-
-	/* Set up base for espfix segment */
-	desc &= 0x00f0ff0000000000ULL;
-	desc |=	((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
-		((((__u64)base) << 32) & 0xff00000000000000ULL) |
-		((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
-		(lim_pages & 0xffff);
-	*(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
-
-	return new_kesp;
-}
-#endif
-
 asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 {
 }
@@ -811,6 +787,28 @@ asmlinkage void __attribute__((weak)) sm
 #endif /* CONFIG_XEN */
 
 /*
+ * __math_state_restore assumes that cr0.TS is already clear and the
+ * fpu state is all ready for use.  Used during context switch.
+ */
+void __math_state_restore(void)
+{
+	struct thread_info *thread = current_thread_info();
+	struct task_struct *tsk = thread->task;
+
+	/*
+	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+	 */
+	if (unlikely(restore_fpu_checking(tsk))) {
+		stts();
+		force_sig(SIGSEGV, tsk);
+		return;
+	}
+
+	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
+	tsk->fpu_counter++;
+}
+
+/*
  * 'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
  *
@@ -841,17 +839,7 @@ asmlinkage void math_state_restore(void)
 	}
 
 	/* NB. 'clts' is done for us by Xen during virtual trap. */
-	/*
-	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
-	 */
-	if (unlikely(restore_fpu_checking(tsk))) {
-		stts();
-		force_sig(SIGSEGV, tsk);
-		return;
-	}
-
-	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
-	tsk->fpu_counter++;
+	__math_state_restore();
 }
 
 #ifndef CONFIG_MATH_EMULATION
@@ -966,6 +954,8 @@ void __init trap_init(void)
 	 * Should be a barrier for any external CPU state:
 	 */
 	cpu_init();
+
+	x86_init.irqs.trap_init();
 }
 
 void __cpuinit smp_trap_init(trap_info_t *trap_ctxt)
@@ -977,4 +967,7 @@ void __cpuinit smp_trap_init(trap_info_t
 		trap_ctxt[t->vector].cs = t->cs;
 		trap_ctxt[t->vector].address = t->address;
 	}
+	TI_SET_IF(trap_ctxt + NMI_VECTOR, 1);
+	trap_ctxt[NMI_VECTOR].cs = __KERNEL_CS;
+	trap_ctxt[NMI_VECTOR].address = (unsigned long)nmi;
 }
--- 12.2.orig/arch/x86/kernel/vsyscall_64-xen.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/kernel/vsyscall_64-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wa
 	vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
 	vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
 	vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
+	vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
 	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
 }
 
@@ -227,19 +228,11 @@ static long __vsyscall(3) venosys_1(void
 }
 
 #ifdef CONFIG_SYSCTL
-
-static int
-vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
-		       void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-}
-
 static ctl_table kernel_table2[] = {
 	{ .procname = "vsyscall64",
 	  .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
 	  .mode = 0644,
-	  .proc_handler = vsyscall_sysctl_change },
+	  .proc_handler = proc_dointvec },
 	{}
 };
 
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ 12.2/arch/x86/kernel/x86_init-xen.c	2011-07-11 12:20:38.000000000 +0200
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2009 Thomas Gleixner <tglx@linutronix.de>
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#include <linux/bitmap.h>
+#include <linux/init.h>
+#include <linux/threads.h>
+
+#include <asm/mpspec.h>
+#include <asm/setup.h>
+#include <asm/apic.h>
+#include <asm/e820.h>
+#include <asm/time.h>
+#include <asm/irq.h>
+
+void __cpuinit x86_init_noop(void) { }
+void __init x86_init_uint_noop(unsigned int unused) { }
+void __init x86_init_pgd_noop(pgd_t *unused) { }
+
+/*
+ * The platform setup functions are preset with the default functions
+ * for standard PC hardware.
+ */
+struct x86_init_ops x86_init __initdata = {
+
+	.resources = {
+		.probe_roms		= x86_init_noop,
+		.reserve_resources	= reserve_standard_io_resources,
+		.memory_setup		= default_machine_specific_memory_setup,
+	},
+
+	.mpparse = {
+		.mpc_record		= x86_init_uint_noop,
+		.setup_ioapic_ids	= x86_init_noop,
+		.mpc_apic_id		= NULL,
+		.smp_read_mpc_oem	= default_smp_read_mpc_oem,
+		.mpc_oem_bus_info	= default_mpc_oem_bus_info,
+		.find_smp_config	= default_find_smp_config,
+		.get_smp_config		= default_get_smp_config,
+	},
+
+	.irqs = {
+		.pre_vector_init	= NULL,
+		.intr_init		= NULL,
+		.trap_init		= x86_init_noop,
+	},
+
+	.oem = {
+		.arch_setup		= xen_arch_setup,
+		.banner			= x86_init_noop,
+	},
+
+	.paging = {
+		.pagetable_setup_start	= x86_init_pgd_noop,
+		.pagetable_setup_done	= x86_init_pgd_noop,
+	},
+
+	.timers = {
+		.setup_percpu_clockev	= NULL,
+		.tsc_pre_init		= x86_init_noop,
+		.timer_init		= x86_init_noop,
+	},
+};
+
+struct x86_platform_ops x86_platform = {
+	.calibrate_tsc			= NULL,
+	.get_wallclock			= xen_read_wallclock,
+	.set_wallclock			= xen_write_wallclock,
+};
--- 12.2.orig/arch/x86/mm/fault-xen.c	2011-08-15 11:03:45.000000000 +0200
+++ 12.2/arch/x86/mm/fault-xen.c	2011-08-15 11:03:59.000000000 +0200
@@ -10,7 +10,7 @@
 #include <linux/bootmem.h>		/* max_low_pfn			*/
 #include <linux/kprobes.h>		/* __kprobes, ...		*/
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
-#include <linux/perf_counter.h>		/* perf_swcounter_event		*/
+#include <linux/perf_event.h>		/* perf_sw_event		*/
 
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
@@ -168,6 +168,7 @@ force_sig_info_fault(int si_signo, int s
 	info.si_errno	= 0;
 	info.si_code	= si_code;
 	info.si_addr	= (void __user *)address;
+	info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
 
 	force_sig_info(si_signo, &info, tsk);
 }
@@ -303,27 +304,25 @@ check_v8086_mode(struct pt_regs *regs, u
 		tsk->thread.screen_bitmap |= 1 << bit;
 }
 
-static void dump_pagetable(unsigned long address)
+static bool low_pfn(unsigned long pfn)
 {
-	__typeof__(pte_val(__pte(0))) page;
+	return pfn < max_low_pfn;
+}
 
-	page = read_cr3();
-	page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+static void dump_pagetable(unsigned long address)
+{
+	pgd_t *base = __va(read_cr3());
+	pgd_t *pgd = &base[pgd_index(address)];
+	pmd_t *pmd;
+	pte_t *pte;
 
 #ifdef CONFIG_X86_PAE
-	printk("*pdpt = %016Lx ", page);
-	if ((page & _PAGE_PRESENT)
-	    && mfn_to_local_pfn(page >> PAGE_SHIFT) < max_low_pfn) {
-		page = mfn_to_pfn(page >> PAGE_SHIFT);
-		page <<= PAGE_SHIFT;
-		page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
-							& (PTRS_PER_PMD - 1)];
-		printk(KERN_CONT "*pde = %016Lx ", page);
-		page &= ~_PAGE_NX;
-	}
-#else
-	printk("*pde = %08lx ", page);
+	printk("*pdpt = %016Lx ", __pgd_val(*pgd));
+	if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
+		goto out;
 #endif
+	pmd = pmd_offset(pud_offset(pgd, address), address);
+	printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)__pmd_val(*pmd));
 
 	/*
 	 * We must not directly access the pte in the highpte
@@ -331,17 +330,12 @@ static void dump_pagetable(unsigned long
 	 * And let's rather not kmap-atomic the pte, just in case
 	 * it's allocated already:
 	 */
-	if ((page & _PAGE_PRESENT)
-	    && mfn_to_local_pfn(page >> PAGE_SHIFT) < max_low_pfn
-	    && !(page & _PAGE_PSE)) {
-
-		page = mfn_to_pfn(page >> PAGE_SHIFT);
-		page <<= PAGE_SHIFT;
-		page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
-							& (PTRS_PER_PTE - 1)];
-		printk(KERN_CONT "*pte = %0*Lx ", sizeof(page)*2, (u64)page);
-	}
+	if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
+		goto out;
 
+	pte = pte_offset_kernel(pmd, address);
+	printk(KERN_CONT "*pte = %0*Lx ", sizeof(*pte) * 2, (u64)__pte_val(*pte));
+out:
 	printk(KERN_CONT "\n");
 }
 
@@ -472,16 +466,12 @@ static int bad_address(void *p)
 
 static void dump_pagetable(unsigned long address)
 {
-	pgd_t *pgd;
+	pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+	pgd_t *pgd = base + pgd_index(address);
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 
-	pgd = (pgd_t *)read_cr3();
-
-	pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
-
-	pgd += pgd_index(address);
 	if (bad_address(pgd))
 		goto bad;
 
@@ -822,10 +812,12 @@ out_of_memory(struct pt_regs *regs, unsi
 }
 
 static void
-do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
+	  unsigned int fault)
 {
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
+	int code = BUS_ADRERR;
 
 	up_read(&mm->mmap_sem);
 
@@ -841,7 +833,15 @@ do_sigbus(struct pt_regs *regs, unsigned
 	tsk->thread.error_code	= error_code;
 	tsk->thread.trap_no	= 14;
 
-	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+#ifdef CONFIG_MEMORY_FAILURE
+	if (fault & VM_FAULT_HWPOISON) {
+		printk(KERN_ERR
+	"MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
+			tsk->comm, tsk->pid, address);
+		code = BUS_MCEERR_AR;
+	}
+#endif
+	force_sig_info_fault(SIGBUS, code, address, tsk);
 }
 
 static noinline void
@@ -851,8 +851,8 @@ mm_fault_error(struct pt_regs *regs, uns
 	if (fault & VM_FAULT_OOM) {
 		out_of_memory(regs, error_code, address);
 	} else {
-		if (fault & VM_FAULT_SIGBUS)
-			do_sigbus(regs, error_code, address);
+		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
+			do_sigbus(regs, error_code, address, fault);
 		else
 			BUG();
 	}
@@ -1066,7 +1066,7 @@ do_page_fault(struct pt_regs *regs, unsi
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -1163,11 +1163,11 @@ good_area:
 
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
--- 12.2.orig/arch/x86/mm/highmem_32-xen.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/mm/highmem_32-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -24,7 +24,7 @@ void kunmap(struct page *page)
  * no global lock is needed and because the kmap code must perform a global TLB
  * invalidation when the kmap pool wraps.
  *
- * However when holding an atomic kmap is is not legal to sleep, so atomic
+ * However when holding an atomic kmap it is not legal to sleep, so atomic
  * kmaps are appropriate for short, tight code paths only.
  */
 void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
@@ -149,9 +149,7 @@ EXPORT_SYMBOL(kunmap);
 EXPORT_SYMBOL(kmap_atomic);
 EXPORT_SYMBOL(kunmap_atomic);
 EXPORT_SYMBOL(kmap_atomic_prot);
-#ifdef CONFIG_HIGHPTE
 EXPORT_SYMBOL(kmap_atomic_to_page);
-#endif
 EXPORT_SYMBOL(clear_highpage);
 EXPORT_SYMBOL(copy_highpage);
 
--- 12.2.orig/arch/x86/mm/hypervisor.c	2011-03-23 09:59:29.000000000 +0100
+++ 12.2/arch/x86/mm/hypervisor.c	2012-05-31 14:45:03.000000000 +0200
@@ -45,6 +45,9 @@
 #include <linux/percpu.h>
 #include <asm/tlbflush.h>
 #include <linux/highmem.h>
+#ifdef CONFIG_X86_32
+#include <linux/bootmem.h> /* for max_pfn */
+#endif
 
 EXPORT_SYMBOL(hypercall_page);
 
@@ -1020,6 +1023,15 @@ bool hypervisor_oom(void)
 	return false;//temp
 }
 
+int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
+			  void *arg, int (*func)(unsigned long, unsigned long,
+						 void *))
+{
+	return start_pfn < max_pfn && nr_pages
+	       ? func(start_pfn, min(max_pfn - start_pfn, nr_pages), arg)
+	       : -1;
+}
+
 int write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc)
 {
 	maddr_t mach_lp = arbitrary_virt_to_machine(ldt + entry);
--- 12.2.orig/arch/x86/mm/init-xen.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/mm/init-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -36,69 +36,6 @@ extern unsigned long extend_init_mapping
 extern void xen_finish_init_mapping(void);
 #endif
 
-int nx_enabled;
-
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-static int disable_nx __cpuinitdata;
-
-/*
- * noexec = on|off
- *
- * Control non-executable mappings for processes.
- *
- * on      Enable
- * off     Disable
- */
-static int __init noexec_setup(char *str)
-{
-	if (!str)
-		return -EINVAL;
-	if (!strncmp(str, "on", 2)) {
-		__supported_pte_mask |= _PAGE_NX;
-		disable_nx = 0;
-	} else if (!strncmp(str, "off", 3)) {
-		disable_nx = 1;
-		__supported_pte_mask &= ~_PAGE_NX;
-	}
-	return 0;
-}
-early_param("noexec", noexec_setup);
-#endif
-
-#ifdef CONFIG_X86_PAE
-static void __init set_nx(void)
-{
-	unsigned int v[4], l, h;
-
-	if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
-		cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
-
-		if ((v[3] & (1 << 20)) && !disable_nx) {
-			rdmsr(MSR_EFER, l, h);
-			l |= EFER_NX;
-			wrmsr(MSR_EFER, l, h);
-			nx_enabled = 1;
-			__supported_pte_mask |= _PAGE_NX;
-		}
-	}
-}
-#else
-static inline void set_nx(void)
-{
-}
-#endif
-
-#ifdef CONFIG_X86_64
-void __cpuinit check_efer(void)
-{
-	unsigned long efer;
-
-	rdmsrl(MSR_EFER, efer);
-	if (!(efer & EFER_NX) || disable_nx)
-		__supported_pte_mask &= ~_PAGE_NX;
-}
-#endif
-
 static void __init find_early_table_space(unsigned long end, int use_pse,
 					  int use_gbpages)
 {
--- 12.2.orig/arch/x86/mm/init_32-xen.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/mm/init_32-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -87,7 +87,7 @@ static pmd_t * __init one_md_table_init(
 #ifdef CONFIG_X86_PAE
 	if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
 		if (after_bootmem)
-			pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+			pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
 		else
 			pmd_table = (pmd_t *)alloc_low_page();
 		paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
@@ -124,7 +124,7 @@ static pte_t * __init one_page_table_ini
 #endif
 			if (!page_table)
 				page_table =
-				(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+				(pte_t *)alloc_bootmem_pages(PAGE_SIZE);
 		} else
 			page_table = (pte_t *)alloc_low_page();
 
@@ -914,8 +914,6 @@ static void __init test_wp_bit(void)
 	}
 }
 
-static struct kcore_list kcore_mem, kcore_vmalloc;
-
 void __init mem_init(void)
 {
 	int codesize, reservedpages, datasize, initsize;
@@ -949,13 +947,9 @@ void __init mem_init(void)
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
-	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START);
-
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 			"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
-		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		nr_free_pages() << (PAGE_SHIFT-10),
 		num_physpages << (PAGE_SHIFT-10),
 		codesize >> 10,
 		reservedpages << (PAGE_SHIFT-10),
--- 12.2.orig/arch/x86/mm/init_64-xen.c	2011-06-30 17:04:34.000000000 +0200
+++ 12.2/arch/x86/mm/init_64-xen.c	2011-06-30 17:06:27.000000000 +0200
@@ -900,8 +900,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
-			 kcore_modules, kcore_vsyscall;
+static struct kcore_list kcore_vsyscall;
 
 void __init mem_init(void)
 {
@@ -937,17 +936,12 @@ void __init mem_init(void)
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
 	/* Register memory areas for /proc/kcore */
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
-	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START);
-	kclist_add(&kcore_kernel, &_stext, _end - _stext);
-	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
 	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
-				 VSYSCALL_END - VSYSCALL_START);
+			 VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 			 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
-		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		nr_free_pages() << (PAGE_SHIFT-10),
 		max_pfn << (PAGE_SHIFT-10),
 		codesize >> 10,
 		absent_pages << (PAGE_SHIFT-10),
--- 12.2.orig/arch/x86/mm/iomap_32-xen.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/mm/iomap_32-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -22,7 +22,7 @@
 #include <linux/module.h>
 #include <linux/highmem.h>
 
-int is_io_mapping_possible(resource_size_t base, unsigned long size)
+static int is_io_mapping_possible(resource_size_t base, unsigned long size)
 {
 #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
 	/* There is no way to map greater than 1 << 32 address without PAE */
@@ -31,7 +31,30 @@ int is_io_mapping_possible(resource_size
 #endif
 	return 1;
 }
-EXPORT_SYMBOL_GPL(is_io_mapping_possible);
+
+int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
+{
+	unsigned long flag = _PAGE_CACHE_WC;
+	int ret;
+
+	if (!is_io_mapping_possible(base, size))
+		return -EINVAL;
+
+	ret = io_reserve_memtype(base, base + size, &flag);
+	if (ret)
+		return ret;
+
+	*prot = __pgprot(__PAGE_KERNEL | flag);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_create_wc);
+
+void
+iomap_free(resource_size_t base, unsigned long size)
+{
+	io_free_memtype(base, base + size);
+}
+EXPORT_SYMBOL_GPL(iomap_free);
 
 void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
 {
--- 12.2.orig/arch/x86/mm/ioremap-xen.c	2011-02-07 15:41:25.000000000 +0100
+++ 12.2/arch/x86/mm/ioremap-xen.c	2011-02-07 15:41:38.000000000 +0100
@@ -23,81 +23,7 @@
 #include <asm/pgalloc.h>
 #include <asm/pat.h>
 
-static inline int phys_addr_valid(resource_size_t addr)
-{
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
-	return !(addr >> boot_cpu_data.x86_phys_bits);
-#else
-	return 1;
-#endif
-}
-
-#ifdef CONFIG_X86_64
-
-#define phys_base 0
-
-unsigned long __phys_addr(unsigned long x)
-{
-	if (x >= __START_KERNEL_map) {
-		x -= __START_KERNEL_map;
-		VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
-		x += phys_base;
-	} else {
-		VIRTUAL_BUG_ON(x < PAGE_OFFSET);
-		x -= PAGE_OFFSET;
-		VIRTUAL_BUG_ON(!phys_addr_valid(x));
-	}
-	return x;
-}
-EXPORT_SYMBOL(__phys_addr);
-
-bool __virt_addr_valid(unsigned long x)
-{
-	if (x >= __START_KERNEL_map) {
-		x -= __START_KERNEL_map;
-		if (x >= KERNEL_IMAGE_SIZE)
-			return false;
-		x += phys_base;
-	} else {
-		if (x < PAGE_OFFSET)
-			return false;
-		x -= PAGE_OFFSET;
-		if (!phys_addr_valid(x))
-			return false;
-	}
-
-	return pfn_valid(x >> PAGE_SHIFT);
-}
-EXPORT_SYMBOL(__virt_addr_valid);
-
-#undef phys_base
-
-#else
-
-#ifdef CONFIG_DEBUG_VIRTUAL
-unsigned long __phys_addr(unsigned long x)
-{
-	/* VMALLOC_* aren't constants  */
-	VIRTUAL_BUG_ON(x < PAGE_OFFSET);
-	VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
-	return x - PAGE_OFFSET;
-}
-EXPORT_SYMBOL(__phys_addr);
-#endif
-
-bool __virt_addr_valid(unsigned long x)
-{
-	if (x < PAGE_OFFSET)
-		return false;
-	if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
-		return false;
-	if (x >= FIXADDR_START)
-		return false;
-	return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
-}
-EXPORT_SYMBOL(__virt_addr_valid);
-
-#endif
+#include "physaddr.h"
 
 static int direct_remap_area_pte_fn(pte_t *pte,
 				    struct page *pmd_page,
@@ -388,30 +314,19 @@ static void __iomem *__ioremap_caller(re
 	retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
 						prot_val, &new_prot_val);
 	if (retval) {
-		pr_debug("Warning: reserve_memtype returned %d\n", retval);
+		printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
 		return NULL;
 	}
 
 	if (prot_val != new_prot_val) {
-		/*
-		 * Do not fallback to certain memory types with certain
-		 * requested type:
-		 * - request is uc-, return cannot be write-back
-		 * - request is uc-, return cannot be write-combine
-		 * - request is write-combine, return cannot be write-back
-		 */
-		if ((prot_val == _PAGE_CACHE_UC_MINUS &&
-		     (new_prot_val == _PAGE_CACHE_WB ||
-		      new_prot_val == _PAGE_CACHE_WC)) ||
-		    (prot_val == _PAGE_CACHE_WC &&
-		     new_prot_val == _PAGE_CACHE_WB)) {
-			pr_debug(
+		if (!is_new_memtype_allowed(phys_addr, size,
+					    prot_val, new_prot_val)) {
+			printk(KERN_ERR
 		"ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
 				(unsigned long long)phys_addr,
 				(unsigned long long)(phys_addr + size),
 				prot_val, new_prot_val);
-			free_memtype(phys_addr, phys_addr + size);
-			return NULL;
+			goto err_free_memtype;
 		}
 		prot_val = new_prot_val;
 	}
@@ -437,27 +352,26 @@ static void __iomem *__ioremap_caller(re
 	 */
 	area = get_vm_area_caller(size, VM_IOREMAP, caller);
 	if (!area)
-		return NULL;
+		goto err_free_memtype;
 	area->phys_addr = phys_addr;
 	vaddr = (unsigned long) area->addr;
 
-	if (kernel_map_sync_memtype(phys_addr, size, prot_val)) {
-		free_memtype(phys_addr, phys_addr + size);
-		free_vm_area(area);
-		return NULL;
-	}
+	if (kernel_map_sync_memtype(phys_addr, size, prot_val))
+		goto err_free_area;
 
 	if (__direct_remap_pfn_range(&init_mm, vaddr, PFN_DOWN(phys_addr),
-				     size, prot, domid)) {
-		free_memtype(phys_addr, phys_addr + size);
-		free_vm_area(area);
-		return NULL;
-	}
+				     size, prot, domid))
+		goto err_free_area;
 
 	ret_addr = (void __iomem *) (vaddr + offset);
 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
 
 	return ret_addr;
+err_free_area:
+	free_vm_area(area);
+err_free_memtype:
+	free_memtype(phys_addr, phys_addr + size);
+	return NULL;
 }
 
 /**
--- 12.2.orig/arch/x86/mm/pageattr-xen.c	2011-03-23 09:59:25.000000000 +0100
+++ 12.2/arch/x86/mm/pageattr-xen.c	2011-03-23 10:00:03.000000000 +0100
@@ -12,6 +12,7 @@
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/pfn.h>
+#include <linux/percpu.h>
 
 #include <asm/e820.h>
 #include <asm/processor.h>
@@ -143,6 +144,7 @@ void clflush_cache_range(void *vaddr, un
 
 	mb();
 }
+EXPORT_SYMBOL_GPL(clflush_cache_range);
 
 static void __cpa_flush_all(void *arg)
 {
@@ -729,7 +731,7 @@ static int cpa_process_alias(struct cpa_
 {
 	struct cpa_data alias_cpa;
 	unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
-	unsigned long vaddr, remapped;
+	unsigned long vaddr;
 	int ret;
 
 	if (cpa->pfn >= max_pfn_mapped)
@@ -787,24 +789,6 @@ static int cpa_process_alias(struct cpa_
 	}
 #endif
 
-	/*
-	 * If the PMD page was partially used for per-cpu remapping,
-	 * the recycled area needs to be split and modified.  Because
-	 * the area is always proper subset of a PMD page
-	 * cpa->numpages is guaranteed to be 1 for these areas, so
-	 * there's no need to loop over and check for further remaps.
-	 */
-	remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
-	if (remapped) {
-		WARN_ON(cpa->numpages > 1);
-		alias_cpa = *cpa;
-		alias_cpa.vaddr = &remapped;
-		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
-		ret = __change_page_attr_set_clr(&alias_cpa, 0);
-		if (ret)
-			return ret;
-	}
-
 	return 0;
 }
 
@@ -865,6 +849,7 @@ static int change_page_attr_set_clr(unsi
 {
 	struct cpa_data cpa;
 	int ret, cache, checkalias;
+	unsigned long baddr = 0;
 
 	/*
 	 * Check, if we are requested to change a not supported
@@ -896,6 +881,11 @@ static int change_page_attr_set_clr(unsi
 			 */
 			WARN_ON_ONCE(1);
 		}
+		/*
+		 * Save address for cache flush. *addr is modified in the call
+		 * to __change_page_attr_set_clr() below.
+		 */
+		baddr = *addr;
 	}
 
 	/* Must avoid aliasing mappings in the highmem code */
@@ -943,7 +933,7 @@ static int change_page_attr_set_clr(unsi
 			cpa_flush_array(addr, numpages, cache,
 					cpa.flags, pages);
 		} else
-			cpa_flush_range(*addr, numpages, cache);
+			cpa_flush_range(baddr, numpages, cache);
 	} else
 		cpa_flush_all(cache);
 
--- 12.2.orig/arch/x86/mm/pat-xen.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/mm/pat-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -15,6 +15,7 @@
 #include <linux/gfp.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
+#include <linux/rbtree.h>
 
 #include <asm/cacheflush.h>
 #include <asm/processor.h>
@@ -80,6 +81,7 @@ enum {
 void pat_init(void)
 {
 	u64 pat;
+	bool boot_cpu = !boot_pat_state;
 
 	if (!pat_enabled)
 		return;
@@ -131,8 +133,10 @@ void pat_init(void)
 	if (!boot_pat_state)
 		boot_pat_state = pat;
 #endif
-	printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
-	       smp_processor_id(), boot_pat_state, pat);
+
+	if (boot_cpu)
+		printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
+		       smp_processor_id(), boot_pat_state, pat);
 }
 
 #undef PAT
@@ -160,11 +164,10 @@ static char *cattr_name(unsigned long fl
  * areas). All the aliases have the same cache attributes of course.
  * Zero attributes are represented as holes.
  *
- * Currently the data structure is a list because the number of mappings
- * are expected to be relatively small. If this should be a problem
- * it could be changed to a rbtree or similar.
+ * The data structure is a list that is also organized as an rbtree
+ * sorted on the start address of memtype range.
  *
- * memtype_lock protects the whole list.
+ * memtype_lock protects both the linear list and rbtree.
  */
 
 struct memtype {
@@ -172,11 +175,53 @@ struct memtype {
 	u64			end;
 	unsigned long		type;
 	struct list_head	nd;
+	struct rb_node		rb;
 };
 
+static struct rb_root memtype_rbroot = RB_ROOT;
 static LIST_HEAD(memtype_list);
 static DEFINE_SPINLOCK(memtype_lock);	/* protects memtype list */
 
+static struct memtype *memtype_rb_search(struct rb_root *root, u64 start)
+{
+	struct rb_node *node = root->rb_node;
+	struct memtype *last_lower = NULL;
+
+	while (node) {
+		struct memtype *data = container_of(node, struct memtype, rb);
+
+		if (data->start < start) {
+			last_lower = data;
+			node = node->rb_right;
+		} else if (data->start > start) {
+			node = node->rb_left;
+		} else
+			return data;
+	}
+
+	/* Will return NULL if there is no entry with its start <= start */
+	return last_lower;
+}
+
+static void memtype_rb_insert(struct rb_root *root, struct memtype *data)
+{
+	struct rb_node **new = &(root->rb_node);
+	struct rb_node *parent = NULL;
+
+	while (*new) {
+		struct memtype *this = container_of(*new, struct memtype, rb);
+
+		parent = *new;
+		if (data->start <= this->start)
+			new = &((*new)->rb_left);
+		else if (data->start > this->start)
+			new = &((*new)->rb_right);
+	}
+
+	rb_link_node(&data->rb, parent, new);
+	rb_insert_color(&data->rb, root);
+}
+
 static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end);
 static inline u8 _mtrr_type_lookup(u64 start, u64 end)
 {
@@ -240,9 +285,6 @@ chk_conflict(struct memtype *new, struct
 	return -EBUSY;
 }
 
-static struct memtype *cached_entry;
-static u64 cached_start;
-
 static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end)
 {
 	int ram_page = 0, not_rampage = 0;
@@ -271,69 +313,65 @@ static int pat_pagerange_is_ram(resource
 }
 
 /*
- * For RAM pages, mark the pages as non WB memory type using
- * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
- * set_memory_wc() on a RAM page at a time before marking it as WB again.
- * This is ok, because only one driver will be owning the page and
- * doing set_memory_*() calls.
- *
- * For now, we use PageNonWB to track that the RAM page is being mapped
- * as non WB. In future, we will have to use one more flag
- * (or some other mechanism in page_struct) to distinguish between
- * UC and WC mapping.
+ * For RAM pages, we use page flags to mark the pages with appropriate type.
+ * Here we do two pass:
+ * - Find the memtype of all the pages in the range, look for any conflicts
+ * - In case of no conflicts, set the new memtype for pages in the range
+ *
+ * Caller must hold memtype_lock for atomicity.
  */
 static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
 				  unsigned long *new_type)
 {
 	struct page *page;
-	unsigned long mfn, end_mfn;
+	unsigned long mfn;
+
+	if (req_type == _PAGE_CACHE_UC) {
+		/* We do not support strong UC */
+		WARN_ON_ONCE(1);
+		req_type = _PAGE_CACHE_UC_MINUS;
+	}
 
 	for (mfn = (start >> PAGE_SHIFT); mfn < (end >> PAGE_SHIFT); ++mfn) {
-		unsigned long pfn = mfn_to_local_pfn(mfn);
+		unsigned long type, pfn = mfn_to_local_pfn(mfn);
 
 		BUG_ON(!pfn_valid(pfn));
 		page = pfn_to_page(pfn);
-		if (page_mapped(page) || PageNonWB(page))
-			goto out;
+		type = get_page_memtype(page);
+		if (type != -1) {
+			printk(KERN_INFO "reserve_ram_pages_type failed "
+				"0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n",
+				start, end, type, req_type);
+			if (new_type)
+				*new_type = type;
 
-		SetPageNonWB(page);
+			return -EBUSY;
+		}
 	}
-	return 0;
 
-out:
-	end_mfn = mfn;
-	for (mfn = (start >> PAGE_SHIFT); mfn < end_mfn; ++mfn) {
+	if (new_type)
+		*new_type = req_type;
+
+	for (mfn = (start >> PAGE_SHIFT); mfn < (end >> PAGE_SHIFT); ++mfn) {
 		page = pfn_to_page(mfn_to_local_pfn(mfn));
-		ClearPageNonWB(page);
+		set_page_memtype(page, req_type);
 	}
-
-	return -EINVAL;
+	return 0;
 }
 
 static int free_ram_pages_type(u64 start, u64 end)
 {
 	struct page *page;
-	unsigned long mfn, end_mfn;
+	unsigned long mfn;
 
 	for (mfn = (start >> PAGE_SHIFT); mfn < (end >> PAGE_SHIFT); ++mfn) {
 		unsigned long pfn = mfn_to_local_pfn(mfn);
 
 		BUG_ON(!pfn_valid(pfn));
 		page = pfn_to_page(pfn);
-		if (page_mapped(page) || !PageNonWB(page))
-			goto out;
-
-		ClearPageNonWB(page);
+		set_page_memtype(page, -1);
 	}
 	return 0;
-
-out:
-	end_mfn = mfn;
-	for (mfn = (start >> PAGE_SHIFT); mfn < end_mfn; ++mfn) {
-		page = pfn_to_page(mfn_to_local_pfn(mfn));
-		SetPageNonWB(page);
-	}
-	return -EINVAL;
 }
 
 /*
@@ -367,6 +405,8 @@ int reserve_memtype(u64 start, u64 end, 
 		if (new_type) {
 			if (req_type == -1)
 				*new_type = _PAGE_CACHE_WB;
+			else if (req_type == _PAGE_CACHE_WC)
+				*new_type = _PAGE_CACHE_UC_MINUS;
 			else
 				*new_type = req_type & _PAGE_CACHE_MASK;
 		}
@@ -392,11 +432,16 @@ int reserve_memtype(u64 start, u64 end, 
 		*new_type = actual_type;
 
 	is_range_ram = pat_pagerange_is_ram(start, end);
-	if (is_range_ram == 1)
-		return reserve_ram_pages_type(start, end, req_type,
-					      new_type);
-	else if (is_range_ram < 0)
+	if (is_range_ram == 1) {
+
+		spin_lock(&memtype_lock);
+		err = reserve_ram_pages_type(start, end, req_type, new_type);
+		spin_unlock(&memtype_lock);
+
+		return err;
+	} else if (is_range_ram < 0) {
 		return -EINVAL;
+	}
 
 	new  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
 	if (!new)
@@ -408,17 +453,11 @@ int reserve_memtype(u64 start, u64 end, 
 
 	spin_lock(&memtype_lock);
 
-	if (cached_entry && start >= cached_start)
-		entry = cached_entry;
-	else
-		entry = list_entry(&memtype_list, struct memtype, nd);
-
 	/* Search for existing mapping that overlaps the current range */
 	where = NULL;
-	list_for_each_entry_continue(entry, &memtype_list, nd) {
+	list_for_each_entry(entry, &memtype_list, nd) {
 		if (end <= entry->start) {
 			where = entry->nd.prev;
-			cached_entry = list_entry(where, struct memtype, nd);
 			break;
 		} else if (start <= entry->start) { /* end > entry->start */
 			err = chk_conflict(new, entry, new_type);
@@ -426,8 +465,6 @@ int reserve_memtype(u64 start, u64 end, 
 				dprintk("Overlap at 0x%Lx-0x%Lx\n",
 					entry->start, entry->end);
 				where = entry->nd.prev;
-				cached_entry = list_entry(where,
-							struct memtype, nd);
 			}
 			break;
 		} else if (start < entry->end) { /* start > entry->start */
@@ -435,8 +472,6 @@ int reserve_memtype(u64 start, u64 end, 
 			if (!err) {
 				dprintk("Overlap at 0x%Lx-0x%Lx\n",
 					entry->start, entry->end);
-				cached_entry = list_entry(entry->nd.prev,
-							struct memtype, nd);
 
 				/*
 				 * Move to right position in the linked
@@ -464,13 +499,13 @@ int reserve_memtype(u64 start, u64 end, 
 		return err;
 	}
 
-	cached_start = start;
-
 	if (where)
 		list_add(&new->nd, where);
 	else
 		list_add_tail(&new->nd, &memtype_list);
 
+	memtype_rb_insert(&memtype_rbroot, new);
+
 	spin_unlock(&memtype_lock);
 
 	dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
@@ -482,7 +517,7 @@ int reserve_memtype(u64 start, u64 end, 
 
 int free_memtype(u64 start, u64 end)
 {
-	struct memtype *entry;
+	struct memtype *entry, *saved_entry;
 	int err = -EINVAL;
 	int is_range_ram;
 
@@ -494,23 +529,58 @@ int free_memtype(u64 start, u64 end)
 		return 0;
 
 	is_range_ram = pat_pagerange_is_ram(start, end);
-	if (is_range_ram == 1)
-		return free_ram_pages_type(start, end);
-	else if (is_range_ram < 0)
+	if (is_range_ram == 1) {
+
+		spin_lock(&memtype_lock);
+		err = free_ram_pages_type(start, end);
+		spin_unlock(&memtype_lock);
+
+		return err;
+	} else if (is_range_ram < 0) {
 		return -EINVAL;
+	}
 
 	spin_lock(&memtype_lock);
-	list_for_each_entry(entry, &memtype_list, nd) {
+
+	entry = memtype_rb_search(&memtype_rbroot, start);
+	if (unlikely(entry == NULL))
+		goto unlock_ret;
+
+	/*
+	 * Saved entry points to an entry with start same or less than what
+	 * we searched for. Now go through the list in both directions to look
+	 * for the entry that matches with both start and end, with list stored
+	 * in sorted start address
+	 */
+	saved_entry = entry;
+	list_for_each_entry_from(entry, &memtype_list, nd) {
 		if (entry->start == start && entry->end == end) {
-			if (cached_entry == entry || cached_start == start)
-				cached_entry = NULL;
+			rb_erase(&entry->rb, &memtype_rbroot);
+			list_del(&entry->nd);
+			kfree(entry);
+			err = 0;
+			break;
+		} else if (entry->start > start) {
+			break;
+		}
+	}
+
+	if (!err)
+		goto unlock_ret;
 
+	entry = saved_entry;
+	list_for_each_entry_reverse(entry, &memtype_list, nd) {
+		if (entry->start == start && entry->end == end) {
+			rb_erase(&entry->rb, &memtype_rbroot);
 			list_del(&entry->nd);
 			kfree(entry);
 			err = 0;
 			break;
+		} else if (entry->start < start) {
+			break;
 		}
 	}
+unlock_ret:
 	spin_unlock(&memtype_lock);
 
 	if (err) {
@@ -524,6 +594,103 @@ int free_memtype(u64 start, u64 end)
 }
 
 
+#ifndef CONFIG_XEN
+/**
+ * lookup_memtype - Looksup the memory type for a physical address
+ * @paddr: physical address of which memory type needs to be looked up
+ *
+ * Only to be called when PAT is enabled
+ *
+ * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or
+ * _PAGE_CACHE_UC
+ */
+static unsigned long lookup_memtype(u64 paddr)
+{
+	int rettype = _PAGE_CACHE_WB;
+	struct memtype *entry;
+
+	if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1))
+		return rettype;
+
+	if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
+		struct page *page;
+		spin_lock(&memtype_lock);
+		page = pfn_to_page(paddr >> PAGE_SHIFT);
+		rettype = get_page_memtype(page);
+		spin_unlock(&memtype_lock);
+		/*
+		 * -1 from get_page_memtype() implies RAM page is in its
+		 * default state and not reserved, and hence of type WB
+		 */
+		if (rettype == -1)
+			rettype = _PAGE_CACHE_WB;
+
+		return rettype;
+	}
+
+	spin_lock(&memtype_lock);
+
+	entry = memtype_rb_search(&memtype_rbroot, paddr);
+	if (entry != NULL)
+		rettype = entry->type;
+	else
+		rettype = _PAGE_CACHE_UC_MINUS;
+
+	spin_unlock(&memtype_lock);
+	return rettype;
+}
+#endif
+
+/**
+ * io_reserve_memtype - Request a memory type mapping for a region of memory
+ * @start: start (physical address) of the region
+ * @end: end (physical address) of the region
+ * @type: A pointer to memtype, with requested type. On success, requested
+ * or any other compatible type that was available for the region is returned
+ *
+ * On success, returns 0
+ * On failure, returns non-zero
+ */
+int io_reserve_memtype(resource_size_t start, resource_size_t end,
+			unsigned long *type)
+{
+	resource_size_t size = end - start;
+	unsigned long req_type = *type;
+	unsigned long new_type;
+	int ret;
+
+	WARN_ON_ONCE(iomem_map_sanity_check(start, size));
+
+	ret = reserve_memtype(start, end, req_type, &new_type);
+	if (ret)
+		goto out_err;
+
+	if (!is_new_memtype_allowed(start, size, req_type, new_type))
+		goto out_free;
+
+	if (kernel_map_sync_memtype(start, size, new_type) < 0)
+		goto out_free;
+
+	*type = new_type;
+	return 0;
+
+out_free:
+	free_memtype(start, end);
+	ret = -EBUSY;
+out_err:
+	return ret;
+}
+
+/**
+ * io_free_memtype - Release a memory type mapping for a region of memory
+ * @start: start (physical address) of the region
+ * @end: end (physical address) of the region
+ */
+void io_free_memtype(resource_size_t start, resource_size_t end)
+{
+	free_memtype(start, end);
+}
+
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long mfn,
 				unsigned long size, pgprot_t vma_prot)
 {
@@ -605,9 +772,6 @@ int phys_mem_access_prot_allowed(struct 
  */
 int kernel_map_sync_memtype(u64 ma, unsigned long size, unsigned long flags)
 {
-	if (!pat_enabled)
-		return 0;
-
 	return ioremap_check_change_attr(ma >> PAGE_SHIFT, size, flags);
 }
 
@@ -628,11 +792,29 @@ static int reserve_pfn_range(u64 paddr, 
 	is_ram = pat_pagerange_is_ram(paddr, paddr + size);
 
 	/*
-	 * reserve_pfn_range() doesn't support RAM pages. Maintain the current
-	 * behavior with RAM pages by returning success.
+	 * reserve_pfn_range() for RAM pages. We do not refcount to keep
+	 * track of number of mappings of RAM pages. We can assert that
+	 * the type requested matches the type of first page in the range.
 	 */
-	if (is_ram != 0)
+	if (is_ram) {
+		if (!pat_enabled)
+			return 0;
+
+		flags = lookup_memtype(paddr);
+		if (want_flags != flags) {
+			printk(KERN_WARNING
+			"%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n",
+				current->comm, current->pid,
+				cattr_name(want_flags),
+				(unsigned long long)paddr,
+				(unsigned long long)(paddr + size),
+				cattr_name(flags));
+			*vma_prot = __pgprot((pgprot_val(*vma_prot) &
+					      (~_PAGE_CACHE_MASK)) |
+					     flags);
+		}
 		return 0;
+	}
 
 	ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
 	if (ret)
@@ -694,14 +876,6 @@ int track_pfn_vma_copy(struct vm_area_st
 	unsigned long vma_size = vma->vm_end - vma->vm_start;
 	pgprot_t pgprot;
 
-	if (!pat_enabled)
-		return 0;
-
-	/*
-	 * For now, only handle remap_pfn_range() vmas where
-	 * is_linear_pfn_mapping() == TRUE. Handling of
-	 * vm_insert_pfn() is TBD.
-	 */
 	if (is_linear_pfn_mapping(vma)) {
 		/*
 		 * reserve the whole chunk covered by vma. We need the
@@ -729,23 +903,24 @@ int track_pfn_vma_copy(struct vm_area_st
 int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 			unsigned long pfn, unsigned long size)
 {
+	unsigned long flags;
 	resource_size_t paddr;
 	unsigned long vma_size = vma->vm_end - vma->vm_start;
 
-	if (!pat_enabled)
-		return 0;
-
-	/*
-	 * For now, only handle remap_pfn_range() vmas where
-	 * is_linear_pfn_mapping() == TRUE. Handling of
-	 * vm_insert_pfn() is TBD.
-	 */
 	if (is_linear_pfn_mapping(vma)) {
 		/* reserve the whole chunk starting from vm_pgoff */
 		paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
 		return reserve_pfn_range(paddr, vma_size, prot, 0);
 	}
 
+	if (!pat_enabled)
+		return 0;
+
+	/* for vm_insert_pfn and friends, we set prot based on lookup */
+	flags = lookup_memtype(pfn << PAGE_SHIFT);
+	*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+			 flags);
+
 	return 0;
 }
 
@@ -760,14 +935,6 @@ void untrack_pfn_vma(struct vm_area_stru
 	resource_size_t paddr;
 	unsigned long vma_size = vma->vm_end - vma->vm_start;
 
-	if (!pat_enabled)
-		return;
-
-	/*
-	 * For now, only handle remap_pfn_range() vmas where
-	 * is_linear_pfn_mapping() == TRUE. Handling of
-	 * vm_insert_pfn() is TBD.
-	 */
 	if (is_linear_pfn_mapping(vma)) {
 		/* free the whole chunk starting from vm_pgoff */
 		paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
@@ -844,7 +1011,7 @@ static int memtype_seq_show(struct seq_f
 	return 0;
 }
 
-static struct seq_operations memtype_seq_ops = {
+static const struct seq_operations memtype_seq_ops = {
 	.start = memtype_seq_start,
 	.next  = memtype_seq_next,
 	.stop  = memtype_seq_stop,
--- 12.2.orig/arch/x86/mm/pgtable-xen.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/arch/x86/mm/pgtable-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -695,8 +695,7 @@ int ptep_set_access_flags(struct vm_area
 		if (likely(vma->vm_mm == current->mm)) {
 			if (HYPERVISOR_update_va_mapping(address,
 				entry,
-				uvm_multi(vma->vm_mm->cpu_vm_mask) |
-					UVMF_INVLPG))
+				uvm_multi(mm_cpumask(vma->vm_mm))|UVMF_INVLPG))
 				BUG();
 		} else {
 			xen_l1_entry_update(ptep, entry);
--- 12.2.orig/arch/x86/mm/physaddr.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/arch/x86/mm/physaddr.c	2011-02-01 14:54:13.000000000 +0100
@@ -8,6 +8,10 @@
 
 #ifdef CONFIG_X86_64
 
+#ifdef CONFIG_XEN
+#define phys_base 0
+#endif
+
 unsigned long __phys_addr(unsigned long x)
 {
 	if (x >= __START_KERNEL_map) {
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ 12.2/drivers/acpi/acpi_pad-xen.c	2012-06-01 11:18:36.000000000 +0200
@@ -0,0 +1,235 @@
+/*
+ * acpi_pad-xen.c - Xen pad interface
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ *    Author: Liu, Jinsong <jinsong.liu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+#include <asm/hypervisor.h>
+
+#define ACPI_PROCESSOR_AGGREGATOR_CLASS	"processor_aggregator"
+#define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Xen Processor Aggregator"
+#define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80
+static DEFINE_MUTEX(xen_cpu_lock);
+
+static int xen_acpi_pad_idle_cpus(unsigned int num_cpus)
+{
+	struct xen_platform_op op;
+
+	op.cmd = XENPF_core_parking;
+	op.u.core_parking.type = XEN_CORE_PARKING_SET;
+	op.u.core_parking.idle_nums = num_cpus;
+
+	return HYPERVISOR_platform_op(&op);
+}
+
+static long xen_acpi_pad_idle_cpus_num(void)
+{
+	struct xen_platform_op op;
+
+	op.cmd = XENPF_core_parking;
+	op.u.core_parking.type = XEN_CORE_PARKING_GET;
+
+	return (long)HYPERVISOR_platform_op(&op)
+	       ?: op.u.core_parking.idle_nums;
+}
+
+static ssize_t acpi_pad_idlecpus_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	unsigned long num;
+	int err;
+
+	if (strict_strtoul(buf, 0, &num))
+		return -EINVAL;
+	mutex_lock(&xen_cpu_lock);
+	err = xen_acpi_pad_idle_cpus(num);
+	mutex_unlock(&xen_cpu_lock);
+	return err ?: count;
+}
+
+static ssize_t acpi_pad_idlecpus_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	long num = xen_acpi_pad_idle_cpus_num();
+
+	return num < 0 ? num : snprintf(buf, PAGE_SIZE, "%ld\n", num);
+}
+static DEVICE_ATTR(idlecpus, S_IRUGO|S_IWUSR,
+	acpi_pad_idlecpus_show,
+	acpi_pad_idlecpus_store);
+
+static int acpi_pad_add_sysfs(struct acpi_device *device)
+{
+	return device_create_file(&device->dev, &dev_attr_idlecpus);
+}
+
+static void acpi_pad_remove_sysfs(struct acpi_device *device)
+{
+	device_remove_file(&device->dev, &dev_attr_idlecpus);
+}
+
+/*
+ * Query firmware how many CPUs should be idle
+ * return -1 on failure
+ */
+static int acpi_pad_pur(acpi_handle handle)
+{
+	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object *package;
+	int num = -1;
+
+	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PUR", NULL, &buffer)))
+		return num;
+
+	if (!buffer.length || !buffer.pointer)
+		return num;
+
+	package = buffer.pointer;
+
+	if (package->type == ACPI_TYPE_PACKAGE &&
+		package->package.count == 2 &&
+		package->package.elements[0].integer.value == 1) /* rev 1 */
+
+		num = package->package.elements[1].integer.value;
+
+	kfree(buffer.pointer);
+	return num;
+}
+
+/* Notify firmware how many CPUs are idle */
+static void acpi_pad_ost(acpi_handle handle, int stat,
+	uint32_t idle_cpus)
+{
+	union acpi_object params[3] = {
+		{.type = ACPI_TYPE_INTEGER,},
+		{.type = ACPI_TYPE_INTEGER,},
+		{.type = ACPI_TYPE_BUFFER,},
+	};
+	struct acpi_object_list arg_list = {3, params};
+
+	params[0].integer.value = ACPI_PROCESSOR_AGGREGATOR_NOTIFY;
+	params[1].integer.value =  stat;
+	params[2].buffer.length = 4;
+	params[2].buffer.pointer = (void *)&idle_cpus;
+	acpi_evaluate_object(handle, "_OST", &arg_list, NULL);
+}
+
+static void acpi_pad_handle_notify(acpi_handle handle)
+{
+	int num_cpus;
+	long idle_cpus;
+
+	mutex_lock(&xen_cpu_lock);
+	num_cpus = acpi_pad_pur(handle);
+	if (num_cpus < 0) {
+		mutex_unlock(&xen_cpu_lock);
+		return;
+	}
+
+	idle_cpus = xen_acpi_pad_idle_cpus(num_cpus)
+		    ?: xen_acpi_pad_idle_cpus_num();
+	if (idle_cpus >= 0)
+		acpi_pad_ost(handle, 0, idle_cpus);
+	mutex_unlock(&xen_cpu_lock);
+}
+
+static void acpi_pad_notify(acpi_handle handle, u32 event,
+	void *data)
+{
+	switch (event) {
+	case ACPI_PROCESSOR_AGGREGATOR_NOTIFY:
+		acpi_pad_handle_notify(handle);
+		break;
+	default:
+		printk(KERN_WARNING "Unsupported event [0x%x]\n", event);
+		break;
+	}
+}
+
+static int acpi_pad_add(struct acpi_device *device)
+{
+	acpi_status status;
+
+	strcpy(acpi_device_name(device), ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME);
+	strcpy(acpi_device_class(device), ACPI_PROCESSOR_AGGREGATOR_CLASS);
+
+	if (acpi_pad_add_sysfs(device))
+		return -ENODEV;
+
+	status = acpi_install_notify_handler(device->handle,
+		ACPI_DEVICE_NOTIFY, acpi_pad_notify, device);
+	if (ACPI_FAILURE(status)) {
+		acpi_pad_remove_sysfs(device);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int acpi_pad_remove(struct acpi_device *device,
+	int type)
+{
+	mutex_lock(&xen_cpu_lock);
+	xen_acpi_pad_idle_cpus(0);
+	mutex_unlock(&xen_cpu_lock);
+
+	acpi_remove_notify_handler(device->handle,
+		ACPI_DEVICE_NOTIFY, acpi_pad_notify);
+	acpi_pad_remove_sysfs(device);
+	return 0;
+}
+
+static const struct acpi_device_id pad_device_ids[] = {
+	{"ACPI000C", 0},
+	{"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, pad_device_ids);
+
+static struct acpi_driver acpi_pad_driver = {
+	.name = "processor_aggregator",
+	.class = ACPI_PROCESSOR_AGGREGATOR_CLASS,
+	.ids = pad_device_ids,
+	.ops = {
+		.add = acpi_pad_add,
+		.remove = acpi_pad_remove,
+	},
+};
+
+static int __init acpi_pad_init(void)
+{
+	if (xen_acpi_pad_idle_cpus_num() < 0)
+		return -ENODEV;
+	return acpi_bus_register_driver(&acpi_pad_driver);
+}
+
+static void __exit acpi_pad_exit(void)
+{
+	acpi_bus_unregister_driver(&acpi_pad_driver);
+}
+
+module_init(acpi_pad_init);
+module_exit(acpi_pad_exit);
+MODULE_AUTHOR("Liu, Jinsong <jinsong.liu@intel.com>");
+MODULE_DESCRIPTION("ACPI Processor Aggregator Driver");
+MODULE_LICENSE("GPL");
--- 12.2.orig/drivers/acpi/processor_driver.c	2012-02-08 12:13:33.000000000 +0100
+++ 12.2/drivers/acpi/processor_driver.c	2012-05-23 13:34:56.000000000 +0200
@@ -507,7 +507,7 @@ static __ref int acpi_processor_start(st
 
 	result = processor_extcntl_prepare(pr);
 	if (result)
-		goto end;
+		goto err_power_exit;
 
 	pr->cdev = thermal_cooling_device_register("Processor", device,
 						   &processor_cooling_ops);
--- 12.2.orig/drivers/char/agp/agp.h	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/char/agp/agp.h	2011-02-01 14:54:13.000000000 +0100
@@ -31,6 +31,10 @@
 
 #include <asm/agp.h>	/* for flush_agp_cache() */
 
+#ifndef virt_to_gart
+#define virt_to_gart virt_to_phys
+#endif
+
 #define PFX "agpgart: "
 
 //#define AGP_DEBUG 1
--- 12.2.orig/drivers/char/agp/amd-k7-agp.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/char/agp/amd-k7-agp.c	2011-02-17 10:18:42.000000000 +0100
@@ -142,7 +142,7 @@ static int amd_create_gatt_table(struct 
 
 	agp_bridge->gatt_table_real = (u32 *)page_dir.real;
 	agp_bridge->gatt_table = (u32 __iomem *)page_dir.remapped;
-	agp_bridge->gatt_bus_addr = virt_to_phys(page_dir.real);
+	agp_bridge->gatt_bus_addr = virt_to_gart(page_dir.real);
 
 	/* Get the address for the gart region.
 	 * This is a bus address even on the alpha, b/c its
@@ -155,7 +155,7 @@ static int amd_create_gatt_table(struct 
 
 	/* Calculate the agp offset */
 	for (i = 0; i < value->num_entries / 1024; i++, addr += 0x00400000) {
-		writel(virt_to_phys(amd_irongate_private.gatt_pages[i]->real) | 1,
+		writel(virt_to_gart(amd_irongate_private.gatt_pages[i]->real) | 1,
 			page_dir.remapped+GET_PAGE_DIR_OFF(addr));
 		readl(page_dir.remapped+GET_PAGE_DIR_OFF(addr));	/* PCI Posting. */
 	}
--- 12.2.orig/drivers/char/agp/amd64-agp.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/char/agp/amd64-agp.c	2011-02-01 14:54:13.000000000 +0100
@@ -178,7 +178,7 @@ static const struct aper_size_info_32 am
 
 static int amd_8151_configure(void)
 {
-	unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real);
+	unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real);
 	int i;
 
 	if (!amd_nb_has_feature(AMD_NB_GART))
@@ -583,7 +583,7 @@ static void __devexit agp_amd64_remove(s
 {
 	struct agp_bridge_data *bridge = pci_get_drvdata(pdev);
 
-	release_mem_region(virt_to_phys(bridge->gatt_table_real),
+	release_mem_region(virt_to_gart(bridge->gatt_table_real),
 			   amd64_aperture_sizes[bridge->aperture_size_idx].size);
 	agp_remove_bridge(bridge);
 	agp_put_bridge(bridge);
--- 12.2.orig/drivers/char/agp/ati-agp.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/char/agp/ati-agp.c	2011-02-01 14:54:13.000000000 +0100
@@ -361,7 +361,7 @@ static int ati_create_gatt_table(struct 
 
 	agp_bridge->gatt_table_real = (u32 *)page_dir.real;
 	agp_bridge->gatt_table = (u32 __iomem *) page_dir.remapped;
-	agp_bridge->gatt_bus_addr = virt_to_phys(page_dir.real);
+	agp_bridge->gatt_bus_addr = virt_to_gart(page_dir.real);
 
 	/* Write out the size register */
 	current_size = A_SIZE_LVL2(agp_bridge->current_size);
@@ -391,7 +391,7 @@ static int ati_create_gatt_table(struct 
 
 	/* Calculate the agp offset */
 	for (i = 0; i < value->num_entries / 1024; i++, addr += 0x00400000) {
-		writel(virt_to_phys(ati_generic_private.gatt_pages[i]->real) | 1,
+		writel(virt_to_gart(ati_generic_private.gatt_pages[i]->real) | 1,
 			page_dir.remapped+GET_PAGE_DIR_OFF(addr));
 		readl(page_dir.remapped+GET_PAGE_DIR_OFF(addr));	/* PCI Posting. */
 	}
--- 12.2.orig/drivers/char/agp/efficeon-agp.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/char/agp/efficeon-agp.c	2011-02-01 14:54:13.000000000 +0100
@@ -227,7 +227,7 @@ static int efficeon_create_gatt_table(st
 
 		efficeon_private.l1_table[index] = page;
 
-		value = virt_to_phys((unsigned long *)page) | pati | present | index;
+		value = virt_to_gart((unsigned long *)page) | pati | present | index;
 
 		pci_write_config_dword(agp_bridge->dev,
 			EFFICEON_ATTPAGE, value);
--- 12.2.orig/drivers/char/agp/generic.c	2011-04-28 11:13:24.000000000 +0200
+++ 12.2/drivers/char/agp/generic.c	2011-04-28 11:14:07.000000000 +0200
@@ -960,7 +960,7 @@ int agp_generic_create_gatt_table(struct
 
 	bridge->gatt_table = (void *)table;
 #else
-	bridge->gatt_table = ioremap_nocache(virt_to_phys(table),
+	bridge->gatt_table = ioremap_nocache(virt_to_gart(table),
 					(PAGE_SIZE * (1 << page_order)));
 	bridge->driver->cache_flush();
 #endif
@@ -973,7 +973,7 @@ int agp_generic_create_gatt_table(struct
 
 		return -ENOMEM;
 	}
-	bridge->gatt_bus_addr = virt_to_phys(bridge->gatt_table_real);
+	bridge->gatt_bus_addr = virt_to_gart(bridge->gatt_table_real);
 
 	/* AK: bogus, should encode addresses > 4GB */
 	for (i = 0; i < num_entries; i++) {
--- 12.2.orig/drivers/char/agp/sworks-agp.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/char/agp/sworks-agp.c	2011-02-01 14:54:13.000000000 +0100
@@ -155,7 +155,7 @@ static int serverworks_create_gatt_table
 	/* Create a fake scratch directory */
 	for (i = 0; i < 1024; i++) {
 		writel(agp_bridge->scratch_page, serverworks_private.scratch_dir.remapped+i);
-		writel(virt_to_phys(serverworks_private.scratch_dir.real) | 1, page_dir.remapped+i);
+		writel(virt_to_gart(serverworks_private.scratch_dir.real) | 1, page_dir.remapped+i);
 	}
 
 	retval = serverworks_create_gatt_pages(value->num_entries / 1024);
@@ -167,7 +167,7 @@ static int serverworks_create_gatt_table
 
 	agp_bridge->gatt_table_real = (u32 *)page_dir.real;
 	agp_bridge->gatt_table = (u32 __iomem *)page_dir.remapped;
-	agp_bridge->gatt_bus_addr = virt_to_phys(page_dir.real);
+	agp_bridge->gatt_bus_addr = virt_to_gart(page_dir.real);
 
 	/* Get the address for the gart region.
 	 * This is a bus address even on the alpha, b/c its
@@ -179,7 +179,7 @@ static int serverworks_create_gatt_table
 
 	/* Calculate the agp offset */
 	for (i = 0; i < value->num_entries / 1024; i++)
-		writel(virt_to_phys(serverworks_private.gatt_pages[i]->real)|1, page_dir.remapped+i);
+		writel(virt_to_gart(serverworks_private.gatt_pages[i]->real)|1, page_dir.remapped+i);
 
 	return 0;
 }
--- 12.2.orig/drivers/dma/ioat/dma.h	2012-04-10 16:46:27.000000000 +0200
+++ 12.2/drivers/dma/ioat/dma.h	2012-04-10 17:01:09.000000000 +0200
@@ -339,8 +339,6 @@ __ioat_dca_init(struct pci_dev *pdev, vo
 	return NULL;
 }
 #define ioat_dca_init __ioat_dca_init
-#define ioat2_dca_init __ioat_dca_init
-#define ioat3_dca_init __ioat_dca_init
 #endif
 
 #endif /* IOATDMA_H */
--- 12.2.orig/drivers/dma/ioat/dma_v2.h	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/dma/ioat/dma_v2.h	2011-02-01 14:54:13.000000000 +0100
@@ -176,4 +176,10 @@ int ioat2_quiesce(struct ioat_chan_commo
 int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo);
 extern struct kobj_type ioat2_ktype;
 extern struct kmem_cache *ioat2_cache;
+
+#ifdef CONFIG_XEN
+#define ioat2_dca_init __ioat_dca_init
+#define ioat3_dca_init __ioat_dca_init
+#endif
+
 #endif /* IOATDMA_V2_H */
--- 12.2.orig/drivers/dma/ioat/hw.h	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/dma/ioat/hw.h	2011-02-01 14:54:13.000000000 +0100
@@ -39,7 +39,11 @@
 #define IOAT_VER_3_0            0x30    /* Version 3.0 */
 #define IOAT_VER_3_2            0x32    /* Version 3.2 */
 
+#ifndef CONFIG_XEN
 int system_has_dca_enabled(struct pci_dev *pdev);
+#else
+static inline int system_has_dca_enabled(struct pci_dev *pdev) { return 0; }
+#endif
 
 struct ioat_dma_descriptor {
 	uint32_t	size;
--- 12.2.orig/drivers/gpu/drm/radeon/radeon_device.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/gpu/drm/radeon/radeon_device.c	2012-04-10 17:01:29.000000000 +0200
@@ -448,6 +448,18 @@ int radeon_dummy_page_init(struct radeon
 	rdev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO);
 	if (rdev->dummy_page.page == NULL)
 		return -ENOMEM;
+#ifdef CONFIG_XEN
+	{
+		int ret = xen_limit_pages_to_max_mfn(rdev->dummy_page.page,
+						     0, 32);
+
+		if (!ret)
+			clear_page(page_address(rdev->dummy_page.page));
+		else
+			dev_warn(rdev->dev,
+				 "Error restricting dummy page: %d\n", ret);
+	}
+#endif
 	rdev->dummy_page.addr = pci_map_page(rdev->pdev, rdev->dummy_page.page,
 					0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 	if (pci_dma_mapping_error(rdev->pdev, rdev->dummy_page.addr)) {
--- 12.2.orig/drivers/hwmon/coretemp-xen.c	2011-02-01 14:38:38.000000000 +0100
+++ 12.2/drivers/hwmon/coretemp-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -162,17 +162,26 @@ static int adjust_tjmax(struct coretemp_
 	/* The 100C is default for both mobile and non mobile CPUs */
 
 	int tjmax = 100000;
-	int ismobile = 1;
+	int tjmax_ee = 85000;
+	int usemsr_ee = 1;
 	int err;
 	u32 eax, edx;
 
 	/* Early chips have no MSR for TjMax */
 
 	if ((c->x86_model == 0xf) && (c->x86_mask < 4)) {
-		ismobile = 0;
+		usemsr_ee = 0;
 	}
 
-	if ((c->x86_model > 0xe) && (ismobile)) {
+	/* Atoms seems to have TjMax at 90C */
+
+	if (c->x86_model == 0x1c) {
+		usemsr_ee = 0;
+		tjmax = 90000;
+	}
+
+	if ((c->x86_model > 0xe) && (usemsr_ee)) {
+		u8 platform_id;
 
 		/* Now we can detect the mobile CPU using Intel provided table
 		   http://softwarecommunity.intel.com/Wiki/Mobility/720.htm
@@ -184,13 +193,29 @@ static int adjust_tjmax(struct coretemp_
 			dev_warn(dev,
 				 "Unable to access MSR 0x17, assuming desktop"
 				 " CPU\n");
-			ismobile = 0;
-		} else if (!(eax & 0x10000000)) {
-			ismobile = 0;
+			usemsr_ee = 0;
+		} else if (c->x86_model < 0x17 && !(eax & 0x10000000)) {
+			/* Trust bit 28 up to Penryn, I could not find any
+			   documentation on that; if you happen to know
+			   someone at Intel please ask */
+			usemsr_ee = 0;
+		} else {
+			/* Platform ID bits 52:50 (EDX starts at bit 32) */
+			platform_id = (edx >> 18) & 0x7;
+
+			/* Mobile Penryn CPU seems to be platform ID 7 or 5
+			  (guesswork) */
+			if ((c->x86_model == 0x17) &&
+			    ((platform_id == 5) || (platform_id == 7))) {
+				/* If MSR EE bit is set, set it to 90 degrees C,
+				   otherwise 105 degrees C */
+				tjmax_ee = 90000;
+				tjmax = 105000;
+			}
 		}
 	}
 
-	if (ismobile) {
+	if (usemsr_ee) {
 
 		err = rdmsr_safe_on_pcpu(id, 0xee, &eax, &edx);
 		if (err < 0) {
@@ -198,9 +223,11 @@ static int adjust_tjmax(struct coretemp_
 				 "Unable to access MSR 0xEE, for Tjmax, left"
 				 " at default");
 		} else if (eax & 0x40000000) {
-			tjmax = 85000;
+			tjmax = tjmax_ee;
 		}
-	} else {
+	/* if we dont use msr EE it means we are desktop CPU (with exeception
+	   of Atom) */
+	} else if (tjmax == 100000) {
 		dev_warn(dev, "Using relative temperature scale!\n");
 	}
 
@@ -246,9 +273,9 @@ static int coretemp_probe(struct platfor
 	data->tjmax = adjust_tjmax(data, pdev->id, &pdev->dev);
 
 	/* read the still undocumented IA32_TEMPERATURE_TARGET it exists
-	   on older CPUs but not in this register */
+	   on older CPUs but not in this register, Atoms don't have it either */
 
-	if (data->x86_model > 0xe) {
+	if ((data->x86_model > 0xe) && (data->x86_model != 0x1c)) {
 		err = rdmsr_safe_on_pcpu(pdev->id, 0x1a2, &eax, &edx);
 		if (err < 0) {
 			dev_warn(&pdev->dev, "Unable to read"
@@ -360,11 +387,15 @@ static int coretemp_device_add(unsigned 
 	if (err)
 		goto exit_entry_free;
 
-	/* check if family 6, models 0xe, 0xf, 0x16, 0x17, 0x1A */
+	/* check if family 6, models 0xe (Pentium M DC),
+	  0xf (Core 2 DC 65nm), 0x16 (Core 2 SC 65nm),
+	  0x17 (Penryn 45nm), 0x1a (Nehalem), 0x1c (Atom),
+	  0x1e (Lynnfield) */
 	if (info.x86 != 0x6 ||
 	    !((pdev_entry->x86_model == 0xe) || (pdev_entry->x86_model == 0xf) ||
 		(pdev_entry->x86_model == 0x16) || (pdev_entry->x86_model == 0x17) ||
-		(pdev_entry->x86_model == 0x1A))) {
+		(pdev_entry->x86_model == 0x1a) || (pdev_entry->x86_model == 0x1c) ||
+		(pdev_entry->x86_model == 0x1e))) {
 
 		/* supported CPU not found, but report the unknown
 		   family 6 CPU */
--- 12.2.orig/drivers/net/Kconfig	2012-02-08 11:35:09.000000000 +0100
+++ 12.2/drivers/net/Kconfig	2012-02-08 12:16:22.000000000 +0100
@@ -336,7 +336,7 @@ config XEN_NETDEV_BACKEND
 
 config VMXNET3
 	tristate "VMware VMXNET3 ethernet driver"
-	depends on PCI && INET
+	depends on PCI && INET && !XEN
 	help
 	  This driver supports VMware's vmxnet3 virtual ethernet NIC.
 	  To compile this driver as a module, choose M here: the
--- 12.2.orig/drivers/pci/msi-xen.c	2012-04-04 11:03:28.000000000 +0200
+++ 12.2/drivers/pci/msi-xen.c	2012-04-04 11:03:37.000000000 +0200
@@ -16,12 +16,11 @@
 #include <linux/proc_fs.h>
 #include <linux/msi.h>
 #include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/io.h>
 
 #include <xen/evtchn.h>
 
-#include <asm/errno.h>
-#include <asm/io.h>
-
 #include "pci.h"
 #include "msi.h"
 
@@ -470,7 +469,7 @@ static int msix_capability_init(struct p
  * to determine if MSI/-X are supported for the device. If MSI/-X is
  * supported return 0, else return an error code.
  **/
-static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type)
+static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
 {
 	struct pci_bus *bus;
 	int ret;
@@ -487,8 +486,9 @@ static int pci_msi_check_device(struct p
 	if (nvec < 1)
 		return -ERANGE;
 
-	/* Any bridge which does NOT route MSI transactions from it's
-	 * secondary bus to it's primary bus must set NO_MSI flag on
+	/*
+	 * Any bridge which does NOT route MSI transactions from its
+	 * secondary bus to its primary bus must set NO_MSI flag on
 	 * the secondary pci_bus.
 	 * We expect only arch-specific PCI host bus controller driver
 	 * or quirks for specific PCI bridges to be setting NO_MSI.
@@ -606,7 +606,7 @@ void pci_msi_shutdown(struct pci_dev *de
 	dev->msi_enabled = 0;
 }
 
-void pci_disable_msi(struct pci_dev* dev)
+void pci_disable_msi(struct pci_dev *dev)
 {
 	pci_msi_shutdown(dev);
 }
@@ -646,14 +646,14 @@ int pci_msix_table_size(struct pci_dev *
  **/
 extern int pci_frontend_enable_msix(struct pci_dev *dev,
 		struct msix_entry *entries, int nvec);
-int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
+int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
 {
 	int status, nr_entries;
 	int i, j, temp;
 	struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
 
 	if (!entries)
- 		return -EINVAL;
+		return -EINVAL;
 
 	if (!is_initial_xendomain()) {
 #ifdef CONFIG_XEN_PCIDEV_FRONTEND
@@ -730,7 +730,7 @@ int pci_enable_msix(struct pci_dev* dev,
 EXPORT_SYMBOL(pci_enable_msix);
 
 extern void pci_frontend_disable_msix(struct pci_dev* dev);
-void pci_msix_shutdown(struct pci_dev* dev)
+void pci_msix_shutdown(struct pci_dev *dev)
 {
 	if (!pci_msi_enable || !dev || !dev->msix_enabled)
 		return;
@@ -751,7 +751,8 @@ void pci_msix_shutdown(struct pci_dev* d
 	}
 	dev->msix_enabled = 0;
 }
-void pci_disable_msix(struct pci_dev* dev)
+
+void pci_disable_msix(struct pci_dev *dev)
 {
 	pci_msix_shutdown(dev);
 }
@@ -766,14 +767,14 @@ EXPORT_SYMBOL(pci_disable_msix);
  * allocated for this device function, are reclaimed to unused state,
  * which may be used later on.
  **/
-void msi_remove_pci_irq_vectors(struct pci_dev* dev)
+void msi_remove_pci_irq_vectors(struct pci_dev *dev)
 {
 	unsigned long flags;
 	struct msi_dev_list *msi_dev_entry;
 	struct msi_pirq_entry *pirq_entry, *tmp;
 
 	if (!pci_msi_enable || !dev)
- 		return;
+		return;
 
 	msi_dev_entry = get_msi_dev_pirq_list(dev);
 
--- 12.2.orig/drivers/pci/probe.c	2012-04-10 16:51:44.000000000 +0200
+++ 12.2/drivers/pci/probe.c	2012-04-10 17:01:36.000000000 +0200
@@ -1429,13 +1429,20 @@ int pci_scan_slot(struct pci_bus *bus, i
 		return 0; /* Already scanned the entire slot */
 
 	dev = pci_scan_single_device(bus, devfn);
-	if (!dev)
+	if (!dev) {
+#ifdef pcibios_scan_all_fns
+		if (!pcibios_scan_all_fns(bus, devfn))
+#endif
 		return 0;
-	if (!dev->is_added)
+	} else if (!dev->is_added)
 		nr++;
 
 	if (pci_ari_enabled(bus))
 		next_fn = next_ari_fn;
+#ifdef pcibios_scan_all_fns
+	else if (pcibios_scan_all_fns(bus, devfn))
+		next_fn = next_trad_fn;
+#endif
 	else if (dev->multifunction)
 		next_fn = next_trad_fn;
 
--- 12.2.orig/drivers/sfi/sfi_core.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/sfi/sfi_core.c	2011-02-01 14:54:13.000000000 +0100
@@ -486,6 +486,11 @@ void __init sfi_init(void)
 	if (!acpi_disabled)
 		disable_sfi();
 
+#ifdef CONFIG_XEN
+	if (!is_initial_xendomain())
+		disable_sfi();
+#endif
+
 	if (sfi_disabled)
 		return;
 
--- 12.2.orig/drivers/hv/Kconfig	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/drivers/hv/Kconfig	2012-02-08 12:16:15.000000000 +0100
@@ -2,7 +2,7 @@ menu "Microsoft Hyper-V guest support"
 
 config HYPERV
 	tristate "Microsoft Hyper-V client drivers"
-	depends on X86 && ACPI && PCI
+	depends on X86 && ACPI && PCI && !XEN
 	help
 	  Select this option to run Linux as a Hyper-V client operating
 	  system.
--- 12.2.orig/drivers/xen/Kconfig	2012-04-03 13:15:22.000000000 +0200
+++ 12.2/drivers/xen/Kconfig	2012-02-17 14:34:14.000000000 +0100
@@ -22,6 +22,7 @@ config XEN_UNPRIVILEGED_GUEST
 	select PM
 	select PM_SLEEP
 	select PM_SLEEP_SMP if SMP
+	select PM_RUNTIME if PCI
 	select SUSPEND
 
 config XEN_PRIVCMD
@@ -339,6 +340,10 @@ config HAVE_IRQ_IGNORE_UNHANDLED
 config NO_IDLE_HZ
 	def_bool y
 
+config ARCH_HAS_WALK_MEMORY
+	def_bool y
+	depends on X86
+
 config XEN_SMPBOOT
 	def_bool y
 	depends on SMP && !PPC_XEN
--- 12.2.orig/drivers/xen/Makefile	2011-11-03 12:27:36.000000000 +0100
+++ 12.2/drivers/xen/Makefile	2011-02-01 14:54:13.000000000 +0100
@@ -10,6 +10,11 @@ obj-$(CONFIG_XEN)		+= char/
 
 xen-backend-$(CONFIG_XEN_BACKEND)	:= util.o
 
+nostackp := $(call cc-option, -fno-stack-protector)
+ifeq ($(CONFIG_PARAVIRT_XEN),y)
+CFLAGS_features.o			:= $(nostackp)
+endif
+
 obj-$(CONFIG_XEN)			+= features.o $(xen-backend-y) $(xen-backend-m)
 obj-$(CONFIG_HOTPLUG_CPU)		+= $(xen-hotplug-y)
 obj-$(CONFIG_XEN_XENCOMM)		+= xencomm.o
--- 12.2.orig/drivers/xen/balloon/balloon.c	2012-06-06 14:02:30.000000000 +0200
+++ 12.2/drivers/xen/balloon/balloon.c	2012-06-06 14:03:00.000000000 +0200
@@ -72,6 +72,11 @@ static DEFINE_MUTEX(balloon_mutex);
  */
 DEFINE_SPINLOCK(balloon_lock);
 
+#ifndef MODULE
+#include <linux/pagevec.h>
+static struct pagevec free_pagevec;
+#endif
+
 struct balloon_stats balloon_stats;
 
 /* We increase/decrease in batches which fit in a page */
@@ -192,14 +197,27 @@ static struct page *balloon_next_page(st
 static inline void balloon_free_page(struct page *page)
 {
 #ifndef MODULE
-	if (put_page_testzero(page))
-		free_cold_page(page);
+	if (put_page_testzero(page) && !pagevec_add(&free_pagevec, page)) {
+		__pagevec_free(&free_pagevec);
+		pagevec_reinit(&free_pagevec);
+	}
 #else
-	/* free_cold_page() is not being exported. */
+	/* pagevec interface is not being exported. */
 	__free_page(page);
 #endif
 }
 
+static inline void balloon_free_and_unlock(unsigned long flags)
+{
+#ifndef MODULE
+	if (pagevec_count(&free_pagevec)) {
+		__pagevec_free(&free_pagevec);
+		pagevec_reinit(&free_pagevec);
+	}
+#endif
+	balloon_unlock(flags);
+}
+
 static void balloon_alarm(unsigned long unused)
 {
 	schedule_work(&balloon_worker);
@@ -311,7 +329,7 @@ static int increase_reservation(unsigned
 	totalram_pages = bs.current_pages - totalram_bias;
 
  out:
-	balloon_unlock(flags);
+	balloon_free_and_unlock(flags);
 
 #ifndef MODULE
 	setup_per_zone_wmarks();
@@ -548,6 +566,7 @@ static int __init balloon_init(void)
 	IPRINTK("Initialising balloon driver.\n");
 
 #ifdef CONFIG_XEN
+	pagevec_init(&free_pagevec, true);
 	bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
 	totalram_pages   = bs.current_pages;
 #else 
@@ -705,8 +724,8 @@ struct page **alloc_empty_pages_and_page
 		}
 
 		if (ret != 0) {
-			balloon_unlock(flags);
 			balloon_free_page(page);
+			balloon_free_and_unlock(flags);
 			goto err;
 		}
 
--- 12.2.orig/drivers/xen/blkfront/vbd.c	2012-03-12 16:15:59.000000000 +0100
+++ 12.2/drivers/xen/blkfront/vbd.c	2012-03-12 13:50:03.000000000 +0100
@@ -126,7 +126,7 @@ static struct xlbd_major_info *major_inf
 
 #define XLBD_MAJOR_VBD_ALT(idx) ((idx) ^ XLBD_MAJOR_VBD_START ^ (XLBD_MAJOR_VBD_START + 1))
 
-static struct block_device_operations xlvbd_block_fops =
+static const struct block_device_operations xlvbd_block_fops =
 {
 	.owner = THIS_MODULE,
 	.open = blkif_open,
--- 12.2.orig/drivers/xen/blktap/blktap.c	2012-05-23 13:34:28.000000000 +0200
+++ 12.2/drivers/xen/blktap/blktap.c	2012-05-23 13:35:02.000000000 +0200
@@ -276,13 +276,13 @@ static inline unsigned int OFFSET_TO_SEG
     } while(0)
 
 
-static char *blktap_nodename(struct device *dev)
+static char *blktap_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "xen/blktap%u", MINOR(dev->devt));
 }
 
 static struct device_type blktap_type = {
-	.nodename = blktap_nodename
+	.devnode = blktap_devnode
 };
 
 /******************************************************************
@@ -1730,7 +1730,7 @@ static int __init blkif_init(void)
 	tap_blkif_xenbus_init();
 
 	/* Dynamically allocate a major for this device */
-	ret = register_chrdev(0, "blktap", &blktap_fops);
+	ret = __register_chrdev(0, 0, MAX_TAP_DEV, "blktap", &blktap_fops);
 
 	if (ret < 0) {
 		WPRINTK("Couldn't register /dev/xen/blktap\n");
--- 12.2.orig/drivers/xen/blktap2/control.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/drivers/xen/blktap2/control.c	2011-02-01 14:54:13.000000000 +0100
@@ -154,7 +154,7 @@ static const struct file_operations blkt
 static struct miscdevice blktap_misc = {
 	.minor    = MISC_DYNAMIC_MINOR,
 	.name     = "blktap-control",
-	.devnode  = BLKTAP2_DEV_DIR "control",
+	.nodename = BLKTAP2_DEV_DIR "control",
 	.fops     = &blktap_control_file_operations,
 };
 
--- 12.2.orig/drivers/xen/blktap2/device.c	2012-02-16 13:29:12.000000000 +0100
+++ 12.2/drivers/xen/blktap2/device.c	2012-02-16 13:32:37.000000000 +0100
@@ -119,7 +119,7 @@ blktap_device_ioctl(struct block_device 
 	return 0;
 }
 
-static struct block_device_operations blktap_device_file_operations = {
+static const struct block_device_operations blktap_device_file_operations = {
 	.owner     = THIS_MODULE,
 	.open      = blktap_device_open,
 	.release   = blktap_device_release,
@@ -1069,7 +1069,7 @@ blktap_device_destroy(struct blktap *tap
 	return 0;
 }
 
-static char *blktap_nodename(struct gendisk *gd)
+static char *blktap_devnode(struct gendisk *gd, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "tapdev%u",
 			 gd->first_minor);
@@ -1111,7 +1111,7 @@ blktap_device_create(struct blktap *tap)
 
 	gd->major = blktap_device_major;
 	gd->first_minor = minor;
-	gd->nodename = blktap_nodename;
+	gd->devnode = blktap_devnode;
 	gd->fops = &blktap_device_file_operations;
 	gd->private_data = dev;
 
--- 12.2.orig/drivers/xen/blktap2/sysfs.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/drivers/xen/blktap2/sysfs.c	2011-02-01 14:54:13.000000000 +0100
@@ -436,7 +436,7 @@ blktap_sysfs_free(void)
 	class_destroy(class);
 }
 
-static char *blktap_nodename(struct device *dev)
+static char *blktap_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "blktap%u",
 			 MINOR(dev->devt));
@@ -455,7 +455,7 @@ blktap_sysfs_init(void)
 	if (IS_ERR(cls))
 		return PTR_ERR(cls);
 
-	cls->nodename = blktap_nodename;
+	cls->devnode = blktap_devnode;
 
 	err = class_create_file(cls, &class_attr_verbosity);
 	if (!err) {
--- 12.2.orig/drivers/xen/blktap2-new/ring.c	2011-02-24 15:14:47.000000000 +0100
+++ 12.2/drivers/xen/blktap2-new/ring.c	2011-02-24 14:19:13.000000000 +0100
@@ -8,7 +8,6 @@
 #include "blktap.h"
 
 int blktap_ring_major;
-static struct cdev blktap_ring_cdev;
 
  /* 
   * BLKTAP - immediately before the mmap area,
@@ -511,26 +510,16 @@ blktap_ring_debug(struct blktap *tap, ch
 int __init
 blktap_ring_init(void)
 {
-	dev_t dev = 0;
 	int err;
 
-	cdev_init(&blktap_ring_cdev, &blktap_ring_file_operations);
-	blktap_ring_cdev.owner = THIS_MODULE;
-
-	err = alloc_chrdev_region(&dev, 0, MAX_BLKTAP_DEVICE, "blktap2");
+	err = __register_chrdev(0, 0, MAX_BLKTAP_DEVICE, "blktap2",
+				&blktap_ring_file_operations);
 	if (err < 0) {
 		BTERR("error registering ring devices: %d\n", err);
 		return err;
 	}
 
-	err = cdev_add(&blktap_ring_cdev, dev, MAX_BLKTAP_DEVICE);
-	if (err) {
-		BTERR("error adding ring device: %d\n", err);
-		unregister_chrdev_region(dev, MAX_BLKTAP_DEVICE);
-		return err;
-	}
-
-	blktap_ring_major = MAJOR(dev);
+	blktap_ring_major = err;
 	BTINFO("blktap ring major: %d\n", blktap_ring_major);
 
 	return 0;
@@ -542,9 +531,8 @@ blktap_ring_exit(void)
 	if (!blktap_ring_major)
 		return;
 
-	cdev_del(&blktap_ring_cdev);
-	unregister_chrdev_region(MKDEV(blktap_ring_major, 0),
-				 MAX_BLKTAP_DEVICE);
+	__unregister_chrdev(blktap_ring_major, 0, MAX_BLKTAP_DEVICE,
+			    "blktap2");
 
 	blktap_ring_major = 0;
 }
--- 12.2.orig/drivers/xen/core/evtchn.c	2012-06-06 14:02:11.000000000 +0200
+++ 12.2/drivers/xen/core/evtchn.c	2011-02-01 14:54:13.000000000 +0100
@@ -143,13 +143,13 @@ unsigned int irq_from_evtchn(unsigned in
 EXPORT_SYMBOL_GPL(irq_from_evtchn);
 
 /* IRQ <-> VIRQ mapping. */
-DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
+DEFINE_PER_CPU(int[NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
 
 /* IRQ <-> IPI mapping. */
 #ifndef NR_IPIS
 #define NR_IPIS 1
 #endif
-DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
+DEFINE_PER_CPU(int[NR_IPIS], ipi_to_irq) = {[0 ... NR_IPIS-1] = -1};
 
 #ifdef CONFIG_SMP
 
--- 12.2.orig/drivers/xen/core/reboot.c	2011-02-01 14:38:38.000000000 +0100
+++ 12.2/drivers/xen/core/reboot.c	2011-02-01 14:54:13.000000000 +0100
@@ -82,7 +82,7 @@ static int xen_suspend(void *__unused)
 	int err, old_state;
 
 	daemonize("suspend");
-	err = set_cpus_allowed(current, cpumask_of_cpu(0));
+	err = set_cpus_allowed_ptr(current, cpumask_of(0));
 	if (err) {
 		pr_err("Xen suspend can't run on CPU0 (%d)\n", err);
 		goto fail;
--- 12.2.orig/drivers/xen/evtchn.c	2011-04-13 13:55:08.000000000 +0200
+++ 12.2/drivers/xen/evtchn.c	2011-02-01 14:54:13.000000000 +0100
@@ -530,7 +530,7 @@ static const struct file_operations evtc
 static struct miscdevice evtchn_miscdev = {
 	.minor        = MISC_DYNAMIC_MINOR,
 	.name         = "xen/evtchn",
-	.devnode      = "xen/evtchn",
+	.nodename     = "xen/evtchn",
 	.fops         = &evtchn_fops,
 };
 static int __init evtchn_init(void)
--- 12.2.orig/drivers/xen/gntdev/gntdev.c	2012-05-23 13:34:31.000000000 +0200
+++ 12.2/drivers/xen/gntdev/gntdev.c	2012-05-23 13:34:59.000000000 +0200
@@ -356,7 +356,7 @@ nomem_out:
 static struct miscdevice gntdev_miscdev = {
 	.minor        = MISC_DYNAMIC_MINOR,
 	.name         = GNTDEV_NAME,
-	.devnode      = "xen/" GNTDEV_NAME,
+	.nodename     = "xen/" GNTDEV_NAME,
 	.fops         = &gntdev_fops,
 };
 
--- 12.2.orig/drivers/xen/netback/interface.c	2012-01-24 11:32:49.000000000 +0100
+++ 12.2/drivers/xen/netback/interface.c	2012-01-24 14:11:36.000000000 +0100
@@ -209,7 +209,7 @@ static void netbk_get_strings(struct net
 	}
 }
 
-static struct ethtool_ops network_ethtool_ops =
+static const struct ethtool_ops network_ethtool_ops =
 {
 	.get_drvinfo = netbk_get_drvinfo,
 
--- 12.2.orig/drivers/xen/netback/loopback.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/drivers/xen/netback/loopback.c	2011-03-01 11:52:41.000000000 +0100
@@ -136,7 +136,7 @@ static int loopback_start_xmit(struct sk
 	if (!skb_remove_foreign_references(skb)) {
 		dev->stats.tx_dropped++;
 		dev_kfree_skb(skb);
-		return 0;
+		return NETDEV_TX_OK;
 	}
 
 	dst_release(skb_dst(skb));
@@ -162,7 +162,7 @@ static int loopback_start_xmit(struct sk
 
 	netif_rx(skb);
 
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
@@ -172,7 +172,7 @@ static void get_drvinfo(struct net_devic
 		 loopback_priv(dev)->loop_idx);
 }
 
-static struct ethtool_ops network_ethtool_ops =
+static const struct ethtool_ops network_ethtool_ops =
 {
 	.get_drvinfo = get_drvinfo,
 
--- 12.2.orig/drivers/xen/netback/netback.c	2012-06-06 14:01:11.000000000 +0200
+++ 12.2/drivers/xen/netback/netback.c	2011-04-11 15:05:22.000000000 +0200
@@ -351,12 +351,12 @@ int netif_be_start_xmit(struct sk_buff *
 	skb_queue_tail(&rx_queue, skb);
 	tasklet_schedule(&net_rx_tasklet);
 
-	return 0;
+	return NETDEV_TX_OK;
 
  drop:
 	dev->stats.tx_dropped++;
 	dev_kfree_skb(skb);
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 #if 0
--- 12.2.orig/drivers/xen/netfront/netfront.c	2012-03-12 13:49:33.000000000 +0100
+++ 12.2/drivers/xen/netfront/netfront.c	2012-06-06 14:03:04.000000000 +0200
@@ -951,7 +951,7 @@ static int network_start_xmit(struct sk_
  	if (np->accel_vif_state.hooks && 
  	    np->accel_vif_state.hooks->start_xmit(skb, dev)) { 
  		/* Fast path has sent this packet */ 
- 		return 0; 
+ 		return NETDEV_TX_OK;
  	} 
 
 	frags += DIV_ROUND_UP(offset + len, PAGE_SIZE);
@@ -1037,12 +1037,12 @@ static int network_start_xmit(struct sk_
 
 	spin_unlock_irq(&np->tx_lock);
 
-	return 0;
+	return NETDEV_TX_OK;
 
  drop:
 	dev->stats.tx_dropped++;
 	dev_kfree_skb(skb);
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 static irqreturn_t netif_int(int irq, void *dev_id)
@@ -1909,7 +1909,7 @@ static void netif_uninit(struct net_devi
 	gnttab_free_grant_references(np->gref_rx_head);
 }
 
-static struct ethtool_ops network_ethtool_ops =
+static const struct ethtool_ops network_ethtool_ops =
 {
 	.get_drvinfo = netfront_get_drvinfo,
 	.get_tx_csum = ethtool_op_get_tx_csum,
--- 12.2.orig/drivers/xen/sfc_netback/accel_fwd.c	2011-01-31 17:56:27.000000000 +0100
+++ 12.2/drivers/xen/sfc_netback/accel_fwd.c	2011-02-01 14:54:13.000000000 +0100
@@ -181,11 +181,10 @@ int netback_accel_fwd_add(const __u8 *ma
 	unsigned long flags;
 	cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
 	struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
-	DECLARE_MAC_BUF(buf);
 
 	BUG_ON(fwd_priv == NULL);
 
-	DPRINTK("Adding mac %s\n", print_mac(buf, mac));
+	DPRINTK("Adding mac %pM\n", mac);
        
 	spin_lock_irqsave(&fwd_set->fwd_lock, flags);
 	
@@ -200,8 +199,7 @@ int netback_accel_fwd_add(const __u8 *ma
 	if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table,
 			       (cuckoo_hash_key *)(&key), &rc) != 0) {
 		spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
-		EPRINTK("MAC address %s already accelerated.\n",
-			print_mac(buf, mac));
+		EPRINTK("MAC address %pM already accelerated.\n", mac);
 		return -EEXIST;
 	}
 
@@ -236,9 +234,8 @@ void netback_accel_fwd_remove(const __u8
 	unsigned long flags;
 	cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
 	struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
-	DECLARE_MAC_BUF(buf);
 
-	DPRINTK("Removing mac %s\n", print_mac(buf, mac));
+	DPRINTK("Removing mac %pM\n", mac);
 
 	BUG_ON(fwd_priv == NULL);
 
@@ -396,16 +393,14 @@ void netback_accel_tx_packet(struct sk_b
 
 	if (is_broadcast_ether_addr(skb_mac_header(skb))
 	    && packet_is_arp_reply(skb)) {
-		DECLARE_MAC_BUF(buf);
-
 		/*
 		 * update our fast path forwarding to reflect this
 		 * gratuitous ARP
 		 */ 
 		mac = skb_mac_header(skb)+ETH_ALEN;
 
-		DPRINTK("%s: found gratuitous ARP for %s\n",
-			__FUNCTION__, print_mac(buf, mac));
+		DPRINTK("%s: found gratuitous ARP for %pM\n",
+			__FUNCTION__, mac);
 
 		spin_lock_irqsave(&fwd_set->fwd_lock, flags);
 		/*
--- 12.2.orig/drivers/xen/sfc_netback/accel_msg.c	2011-01-31 17:56:27.000000000 +0100
+++ 12.2/drivers/xen/sfc_netback/accel_msg.c	2011-02-01 14:54:13.000000000 +0100
@@ -57,11 +57,10 @@ static void netback_accel_msg_tx_localma
 {
 	unsigned long lock_state;
 	struct net_accel_msg *msg;
-	DECLARE_MAC_BUF(buf);
 
 	BUG_ON(bend == NULL || mac == NULL);
 
-	VPRINTK("Sending local mac message: %s\n", print_mac(buf, mac));
+	VPRINTK("Sending local mac message: %pM\n", mac);
 	
 	msg = net_accel_msg_start_send(bend->shared_page, &bend->to_domU,
 				       &lock_state);
--- 12.2.orig/drivers/xen/sfc_netfront/accel_msg.c	2011-02-01 14:44:12.000000000 +0100
+++ 12.2/drivers/xen/sfc_netfront/accel_msg.c	2011-02-01 14:54:13.000000000 +0100
@@ -327,10 +327,8 @@ static int vnic_process_localmac_msg(net
 	cuckoo_hash_mac_key key;
 
 	if (msg->u.localmac.flags & NET_ACCEL_MSG_ADD) {
-		DECLARE_MAC_BUF(buf);
-
-		DPRINTK("MAC has moved, could be local: %s\n",
-			print_mac(buf, msg->u.localmac.mac));
+		DPRINTK("MAC has moved, could be local: %pM\n",
+			msg->u.localmac.mac);
 		key = cuckoo_mac_to_key(msg->u.localmac.mac);
 		spin_lock_irqsave(&vnic->table_lock, flags);
 		/* Try to remove it, not a big deal if not there */
--- 12.2.orig/drivers/xen/sfc_netfront/accel_vi.c	2011-06-30 16:33:02.000000000 +0200
+++ 12.2/drivers/xen/sfc_netfront/accel_vi.c	2011-06-30 17:09:25.000000000 +0200
@@ -645,10 +645,7 @@ netfront_accel_vi_tx_post(netfront_accel
 					  (cuckoo_hash_key *)(&key), &value);
 
 	if (!try_fastpath) {
-		DECLARE_MAC_BUF(buf);
-
-		VPRINTK("try fast path false for mac: %s\n",
-			print_mac(buf, skb->data));
+		VPRINTK("try fast path false for mac: %pM\n", skb->data);
 		
 		return NETFRONT_ACCEL_STATUS_CANT;
 	}
@@ -774,10 +771,9 @@ static void  netfront_accel_vi_rx_comple
 	if (compare_ether_addr(skb->data, vnic->mac)) {
 		struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN);
 		u16 port;
-		DECLARE_MAC_BUF(buf);
 
-		DPRINTK("%s: saw wrong MAC address %s\n",
-			__FUNCTION__, print_mac(buf, skb->data));
+		DPRINTK("%s: saw wrong MAC address %pM\n",
+			__FUNCTION__, skb->data);
 
 		if (ip->protocol == IPPROTO_TCP) {
 			struct tcphdr *tcp = (struct tcphdr *)
--- 12.2.orig/drivers/xen/xenbus/xenbus_dev.c	2011-06-30 16:41:18.000000000 +0200
+++ 12.2/drivers/xen/xenbus/xenbus_dev.c	2011-02-01 14:54:13.000000000 +0100
@@ -36,6 +36,7 @@
 #include <linux/errno.h>
 #include <linux/uio.h>
 #include <linux/notifier.h>
+#include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/fs.h>
 #include <linux/poll.h>
--- 12.2.orig/drivers/xen/xenbus/xenbus_probe.c	2012-03-22 14:09:32.000000000 +0100
+++ 12.2/drivers/xen/xenbus/xenbus_probe.c	2011-06-10 12:05:59.000000000 +0200
@@ -42,6 +42,7 @@
 #include <linux/ctype.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
+#include <linux/sched.h>
 #include <linux/proc_fs.h>
 #include <linux/notifier.h>
 #include <linux/mutex.h>
--- 12.2.orig/fs/proc/kcore.c	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/fs/proc/kcore.c	2011-04-13 13:58:56.000000000 +0200
@@ -130,7 +130,7 @@ static void __kcore_update_ram(struct li
 }
 
 
-#ifdef CONFIG_HIGHMEM
+#if defined(CONFIG_HIGHMEM) || defined(CONFIG_XEN)
 /*
  * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
  * because memory hole is not as big as !HIGHMEM case.
@@ -146,7 +146,11 @@ static int kcore_update_ram(void)
 	if (!ent)
 		return -ENOMEM;
 	ent->addr = (unsigned long)__va(0);
+#ifdef CONFIG_HIGHMEM
 	ent->size = max_low_pfn << PAGE_SHIFT;
+#else
+	ent->size = max_pfn << PAGE_SHIFT;
+#endif
 	ent->type = KCORE_RAM;
 	list_add(&ent->list, &head);
 	__kcore_update_ram(&head);
--- 12.2.orig/include/linux/nmi.h	2012-06-20 12:12:05.000000000 +0200
+++ 12.2/include/linux/nmi.h	2011-02-16 09:06:03.000000000 +0100
@@ -18,6 +18,9 @@
 #include <asm/nmi.h>
 extern void touch_nmi_watchdog(void);
 #else
+#ifdef CONFIG_XEN
+#include <asm/nmi.h>
+#endif
 static inline void touch_nmi_watchdog(void)
 {
 	touch_softlockup_watchdog();
--- 12.2.orig/lib/swiotlb-xen.c	2011-02-01 14:50:44.000000000 +0100
+++ 12.2/lib/swiotlb-xen.c	2011-02-01 14:54:13.000000000 +0100
@@ -119,79 +119,11 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
-void *__init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
-{
-	void *start = alloc_bootmem_pages(size);
-	unsigned int i;
-	int rc;
-
-	dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
-	for (i = 0; i < nslabs; i += IO_TLB_SEGSIZE) {
-		do {
-			rc = xen_create_contiguous_region(
-				(unsigned long)start + (i << IO_TLB_SHIFT),
-				get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
-				dma_bits);
-		} while (rc && dma_bits++ < max_dma_bits);
-		if (rc) {
-			if (i == 0)
-				panic("No suitable physical memory available for SWIOTLB buffer!\n"
-				      "Use dom0_mem Xen boot parameter to reserve\n"
-				      "some DMA memory (e.g., dom0_mem=-128M).\n");
-			io_tlb_nslabs = i;
-			i <<= IO_TLB_SHIFT;
-			free_bootmem(__pa(start + i), size - i);
-			size = i;
-			for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) {
-				unsigned int bits = fls64(virt_to_bus(start + i - 1));
-
-				if (bits > dma_bits)
-					dma_bits = bits;
-			}
-			break;
-		}
-	}
-
-	return start;
-}
-
-#ifndef CONFIG_XEN
-void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
-{
-	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
-}
-#endif
-
-dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
-{
-	return phys_to_machine(paddr);
-}
-
-phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
-{
-	return machine_to_phys(baddr);
-}
-
+/* Note that this doesn't work with highmem page */
 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 				      volatile void *address)
 {
-	return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
-}
-
-void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
-{
-	return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
-}
-
-int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
-					       dma_addr_t addr, size_t size)
-{
-	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
-}
-
-int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
-{
-	return 0;
+	return phys_to_dma(hwdev, virt_to_phys(address));
 }
 
 static void swiotlb_print_info(unsigned long bytes)
@@ -224,10 +156,35 @@ swiotlb_init_with_default_size(size_t de
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs);
+	io_tlb_start = alloc_bootmem_pages(bytes);
 	if (!io_tlb_start)
 		panic("Cannot allocate SWIOTLB buffer!\n");
-	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+	dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
+	for (i = 0; i < io_tlb_nslabs; i += IO_TLB_SEGSIZE) {
+		do {
+			rc = xen_create_contiguous_region(
+				(unsigned long)io_tlb_start + (i << IO_TLB_SHIFT),
+				get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
+				dma_bits);
+		} while (rc && dma_bits++ < max_dma_bits);
+		if (rc) {
+			if (i == 0)
+				panic("No suitable physical memory available for SWIOTLB buffer!\n"
+				      "Use dom0_mem Xen boot parameter to reserve\n"
+				      "some DMA memory (e.g., dom0_mem=-128M).\n");
+			io_tlb_nslabs = i;
+			i <<= IO_TLB_SHIFT;
+			free_bootmem(__pa(io_tlb_start + i), bytes - i);
+			bytes = i;
+			for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) {
+				unsigned int bits = fls64(virt_to_bus(io_tlb_start + i - 1));
+
+				if (bits > dma_bits)
+					dma_bits = bits;
+			}
+			break;
+		}
+	}
 	io_tlb_end = io_tlb_start + bytes;
 
 	/*
@@ -291,13 +248,10 @@ static inline int range_needs_mapping(ph
 static int is_swiotlb_buffer(dma_addr_t addr)
 {
 	unsigned long pfn = mfn_to_local_pfn(PFN_DOWN(addr));
-	char *va = pfn_valid(pfn) ? __va(pfn << PAGE_SHIFT) : NULL;
+	phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT;
 
-#ifdef CONFIG_HIGHMEM
-	if (pfn >= highstart_pfn)
-		return 0;
-#endif
-	return va >= io_tlb_start && va < io_tlb_end;
+	return paddr >= virt_to_phys(io_tlb_start) &&
+		paddr < virt_to_phys(io_tlb_end);
 }
 
 /*
@@ -537,12 +491,15 @@ swiotlb_full(struct device *dev, size_t 
 	printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %zu bytes at "
 	       "device %s\n", size, dev ? dev_name(dev) : "?");
 
-	if (size > io_tlb_overflow && do_panic) {
-		if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
-			panic("PCI-DMA: Memory would be corrupted\n");
-		if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
-			panic("PCI-DMA: Random memory would be DMAed\n");
-	}
+	if (size <= io_tlb_overflow || !do_panic)
+		return;
+
+	if (dir == DMA_BIDIRECTIONAL)
+		panic("DMA: Random memory could be DMA accessed\n");
+	if (dir == DMA_FROM_DEVICE)
+		panic("DMA: Random memory could be DMA written\n");
+	if (dir == DMA_TO_DEVICE)
+		panic("DMA: Random memory could be DMA read\n");
 }
 
 /*
@@ -568,7 +525,7 @@ dma_addr_t swiotlb_map_page(struct devic
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
-	if (!address_needs_mapping(dev, dev_addr, size) &&
+	if (dma_capable(dev, dev_addr, size) &&
 	    !range_needs_mapping(phys, size))
 		return dev_addr;
 
@@ -598,12 +555,12 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
 static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 			 size_t size, int dir)
 {
-	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
 	if (is_swiotlb_buffer(dev_addr)) {
-		do_unmap_single(hwdev, dma_addr, size, dir);
+		do_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
 		return;
 	}
 
@@ -632,12 +589,12 @@ static void
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 		    size_t size, int dir, int target)
 {
-	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
 	if (is_swiotlb_buffer(dev_addr))
-		sync_single(hwdev, dma_addr, size, dir, target);
+		sync_single(hwdev, phys_to_virt(paddr), size, dir, target);
 }
 
 void
@@ -718,8 +675,8 @@ swiotlb_map_sg_attrs(struct device *hwde
 		phys_addr_t paddr = page_to_pseudophys(sg_page(sg))
 				   + sg->offset;
 
-		if (range_needs_mapping(paddr, sg->length)
-		    || address_needs_mapping(hwdev, dev_addr, sg->length)) {
+		if (range_needs_mapping(paddr, sg->length) ||
+		    !dma_capable(hwdev, dev_addr, sg->length)) {
 			void *map;
 
 			gnttab_dma_unmap_page(dev_addr);