Blob Blame History Raw
From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.33
Patch-mainline: 2.6.33

 This patch contains the differences between 2.6.33 and 2.6.33.

Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.kernel.org/patch-2.6.33" by xen-port-patches.py

--- head.orig/arch/ia64/include/asm/xen/hypervisor.h	2011-02-01 14:44:12.000000000 +0100
+++ head/arch/ia64/include/asm/xen/hypervisor.h	2011-02-01 14:55:46.000000000 +0100
@@ -34,11 +34,11 @@
 #define _ASM_IA64_XEN_HYPERVISOR_H
 
 #include <linux/err.h>
+#include <xen/xen.h>
 #ifdef CONFIG_PARAVIRT_XEN
 #include <xen/interface/xen.h>
 #include <xen/interface/version.h>	/* to compile feature.c */
 #include <xen/features.h>		/* to comiple xen-netfront.c */
-#include <xen/xen.h>
 #include <asm/xen/hypercall.h>
 
 extern struct shared_info *HYPERVISOR_shared_info;
--- head.orig/arch/x86/Kconfig	2012-04-10 17:00:47.000000000 +0200
+++ head/arch/x86/Kconfig	2012-02-08 12:19:19.000000000 +0100
@@ -21,7 +21,7 @@ config X86
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_PCSPKR_PLATFORM
-	select HAVE_PERF_EVENTS if !XEN
+	select HAVE_PERF_EVENTS
 	select HAVE_IRQ_WORK
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
@@ -54,7 +54,7 @@ config X86
 	select HAVE_KERNEL_BZIP2 if !XEN
 	select HAVE_KERNEL_LZMA if !XEN
 	select HAVE_KERNEL_XZ
-	select HAVE_KERNEL_LZO
+	select HAVE_KERNEL_LZO if !XEN
 	select HAVE_HW_BREAKPOINT
 	select HAVE_MIXED_BREAKPOINTS_REGS
 	select PERF_EVENTS
--- head.orig/arch/x86/ia32/ia32entry-xen.S	2011-02-01 14:54:13.000000000 +0100
+++ head/arch/x86/ia32/ia32entry-xen.S	2011-02-01 14:55:46.000000000 +0100
@@ -534,7 +534,7 @@ ia32_sys_call_table:
 	.quad compat_sys_writev
 	.quad sys_getsid
 	.quad sys_fdatasync
-	.quad sys32_sysctl	/* sysctl */
+	.quad compat_sys_sysctl	/* sysctl */
 	.quad sys_mlock		/* 150 */
 	.quad sys_munlock
 	.quad sys_mlockall
@@ -577,7 +577,7 @@ ia32_sys_call_table:
 	.quad quiet_ni_syscall		/* streams2 */
 	.quad stub32_vfork            /* 190 */
 	.quad compat_sys_getrlimit
-	.quad sys32_mmap2
+	.quad sys_mmap_pgoff
 	.quad sys32_truncate64
 	.quad sys32_ftruncate64
 	.quad sys32_stat64		/* 195 */
@@ -722,4 +722,5 @@ ia32_sys_call_table:
 	.quad compat_sys_pwritev
 	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
 	.quad sys_perf_event_open
+	.quad compat_sys_recvmmsg
 ia32_syscall_end:
--- head.orig/arch/x86/include/asm/hw_irq.h	2011-09-07 15:58:21.000000000 +0200
+++ head/arch/x86/include/asm/hw_irq.h	2011-09-07 16:01:06.000000000 +0200
@@ -108,6 +108,7 @@ struct irq_2_iommu {
 	u8  irte_mask;
 };
 
+#ifndef CONFIG_XEN
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -123,6 +124,9 @@ struct irq_cfg {
 	struct irq_2_iommu	irq_2_iommu;
 #endif
 };
+#else
+struct irq_cfg;
+#endif
 
 extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
 extern void send_cleanup_vector(struct irq_cfg *);
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ head/arch/x86/include/mach-xen/asm/percpu.h	2011-12-23 10:46:09.000000000 +0100
@@ -0,0 +1,24 @@
+#ifndef _ASM_X86_XEN_PERCPU_H
+#define _ASM_X86_XEN_PERCPU_H
+
+#include_next <asm/percpu.h>
+
+#define this_vcpu_read_1 this_cpu_read_1
+#define this_vcpu_read_2 this_cpu_read_2
+#define this_vcpu_read_4 this_cpu_read_4
+
+#ifdef CONFIG_64BIT
+# define this_vcpu_read_8 this_cpu_read_8
+#else
+# define this_vcpu_read_8(pcp) ({ \
+	typeof(pcp) res__; \
+	__asm__ ("movl %%ebx,%%eax\n" \
+		 "movl %%ecx,%%edx\n" \
+		 "cmpxchg8b " __percpu_arg(1) \
+		 : "=&A" (res__) : "m" (pcp)); \
+	res__; })
+#endif
+
+#define this_vcpu_read(pcp) __pcpu_size_call_return(this_vcpu_read_, pcp)
+
+#endif /* _ASM_X86_XEN_PERCPU_H */
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ head/arch/x86/include/mach-xen/asm/perf_event.h	2011-02-01 14:55:46.000000000 +0100
@@ -0,0 +1,17 @@
+#ifndef _ASM_X86_PERF_EVENT_H
+#define _ASM_X86_PERF_EVENT_H
+
+#ifdef CONFIG_PERF_EVENTS
+
+/*
+ * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
+ * This flag is otherwise unused and ABI specified to be 0, so nobody should
+ * care what we do with it.
+ */
+#define PERF_EFLAGS_EXACT	(1UL << 3)
+
+#endif
+
+static inline void init_hw_perf_events(void) {}
+
+#endif /* _ASM_X86_PERF_EVENT_H */
--- head.orig/arch/x86/include/mach-xen/asm/pgtable.h	2011-03-23 10:00:14.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable.h	2011-03-23 10:00:38.000000000 +0100
@@ -16,6 +16,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/x86_init.h>
+
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
@@ -264,9 +266,9 @@ static inline int is_new_memtype_allowed
 					 unsigned long new_flags)
 {
 	/*
-	 * PAT type is always WB for ISA. So no need to check.
+	 * PAT type is always WB for untracked ranges, so no need to check.
 	 */
-	if (is_ISA_range(paddr, paddr + size - 1))
+	if (x86_platform.is_untracked_pat_range(paddr, paddr + size))
 		return 1;
 
 	/*
--- head.orig/arch/x86/include/mach-xen/asm/processor.h	2012-05-23 09:35:22.000000000 +0200
+++ head/arch/x86/include/mach-xen/asm/processor.h	2011-03-03 16:46:54.000000000 +0100
@@ -31,6 +31,7 @@ struct mm_struct;
 #include <linux/init.h>
 #include <xen/interface/physdev.h>
 
+#define HBP_NUM 4
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
@@ -191,7 +192,7 @@ static inline void xen_cpuid(unsigned in
 			     unsigned int *ecx, unsigned int *edx)
 {
 	/* ecx is often an input as well as an output. */
-	asm(XEN_CPUID
+	asm volatile(XEN_CPUID
 	    : "=a" (*eax),
 	      "=b" (*ebx),
 	      "=c" (*ecx),
@@ -440,6 +441,8 @@ extern unsigned int xstate_size;
 extern void free_thread_xstate(struct task_struct *);
 extern struct kmem_cache *task_xstate_cachep;
 
+struct perf_event;
+
 struct thread_struct {
 	/* Cached TLS descriptors: */
 	struct desc_struct	tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -460,13 +463,12 @@ struct thread_struct {
 	unsigned long		fs;
 #endif
 	unsigned long		gs;
-	/* Hardware debugging registers: */
-	unsigned long		debugreg0;
-	unsigned long		debugreg1;
-	unsigned long		debugreg2;
-	unsigned long		debugreg3;
-	unsigned long		debugreg6;
-	unsigned long		debugreg7;
+	/* Save middle states of ptrace breakpoints */
+	struct perf_event	*ptrace_bps[HBP_NUM];
+	/* Debug status used for traps, single steps, etc... */
+	unsigned long           debugreg6;
+	/* Keep track of the exact dr7 value set by the user */
+	unsigned long           ptrace_dr7;
 	/* Fault info: */
 	unsigned long		cr2;
 	unsigned long		trap_no;
--- head.orig/arch/x86/include/mach-xen/asm/spinlock.h	2012-04-03 08:28:14.000000000 +0200
+++ head/arch/x86/include/mach-xen/asm/spinlock.h	2012-04-03 08:28:39.000000000 +0200
@@ -49,14 +49,14 @@
 int xen_spinlock_init(unsigned int cpu);
 void xen_spinlock_cleanup(unsigned int cpu);
 #if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
-unsigned int xen_spin_adjust(const raw_spinlock_t *, unsigned int token);
+unsigned int xen_spin_adjust(const arch_spinlock_t *, unsigned int token);
 #else
 #define xen_spin_adjust(lock, token) (token)
 #define xen_spin_wait(l, t, f) xen_spin_wait(l, t)
 #endif
-bool xen_spin_wait(raw_spinlock_t *, unsigned int *token,
+bool xen_spin_wait(arch_spinlock_t *, unsigned int *token,
 		   unsigned int flags);
-void xen_spin_kick(const raw_spinlock_t *, unsigned int token);
+void xen_spin_kick(const arch_spinlock_t *, unsigned int token);
 
 /*
  * Ticket locks are conceptually two parts, one indicating the current head of
@@ -106,7 +106,7 @@ void xen_spin_kick(const raw_spinlock_t 
 	    : \
 	    : "memory", "cc")
 
-static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
 {
 	int tmp, new;
 
@@ -169,7 +169,7 @@ static __always_inline int __ticket_spin
 		    : "memory", "cc"); \
 	} while (0)
 
-static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
 {
 	int tmp;
 	int new;
@@ -193,7 +193,7 @@ static __always_inline int __ticket_spin
 #endif
 
 #if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
-static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned int token, count;
 	unsigned int flags = __raw_local_irq_save();
@@ -215,7 +215,7 @@ static __always_inline void __ticket_spi
 #define __ticket_spin_lock(lock) __ticket_spin_lock_flags(lock, -1)
 #endif
 
-static __always_inline void __ticket_spin_lock_flags(raw_spinlock_t *lock,
+static __always_inline void __ticket_spin_lock_flags(arch_spinlock_t *lock,
 						     unsigned long flags)
 {
 	unsigned int token, count;
@@ -231,7 +231,7 @@ static __always_inline void __ticket_spi
 	} while (unlikely(!count) && !xen_spin_wait(lock, &token, flags));
 }
 
-static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 {
 	unsigned int token;
 	bool kick;
@@ -247,38 +247,38 @@ static __always_inline void __ticket_spi
 #undef __ticket_spin_unlock_body
 #endif
 
-static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
+static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
 {
 	int tmp = ACCESS_ONCE(lock->slock);
 
 	return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
 }
 
-static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
+static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
 {
 	int tmp = ACCESS_ONCE(lock->slock);
 
 	return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
 }
 
-#define __raw_spin(n) __ticket_spin_##n
+#define __arch_spin(n) __ticket_spin_##n
 
 #else /* TICKET_SHIFT */
 
 static inline int xen_spinlock_init(unsigned int cpu) { return 0; }
 static inline void xen_spinlock_cleanup(unsigned int cpu) {}
 
-static inline int __byte_spin_is_locked(raw_spinlock_t *lock)
+static inline int __byte_spin_is_locked(arch_spinlock_t *lock)
 {
 	return lock->lock != 0;
 }
 
-static inline int __byte_spin_is_contended(raw_spinlock_t *lock)
+static inline int __byte_spin_is_contended(arch_spinlock_t *lock)
 {
 	return lock->spinners != 0;
 }
 
-static inline void __byte_spin_lock(raw_spinlock_t *lock)
+static inline void __byte_spin_lock(arch_spinlock_t *lock)
 {
 	s8 val = 1;
 
@@ -297,7 +297,7 @@ static inline void __byte_spin_lock(raw_
 
 #define __byte_spin_lock_flags(lock, flags) __byte_spin_lock(lock)
 
-static inline int __byte_spin_trylock(raw_spinlock_t *lock)
+static inline int __byte_spin_trylock(arch_spinlock_t *lock)
 {
 	u8 old = 1;
 
@@ -307,53 +307,53 @@ static inline int __byte_spin_trylock(ra
 	return old == 0;
 }
 
-static inline void __byte_spin_unlock(raw_spinlock_t *lock)
+static inline void __byte_spin_unlock(arch_spinlock_t *lock)
 {
 	smp_wmb();
 	lock->lock = 0;
 }
 
-#define __raw_spin(n) __byte_spin_##n
+#define __arch_spin(n) __byte_spin_##n
 
 #endif /* TICKET_SHIFT */
 
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-	return __raw_spin(is_locked)(lock);
+	return __arch_spin(is_locked)(lock);
 }
 
-static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
-	return __raw_spin(is_contended)(lock);
+	return __arch_spin(is_contended)(lock);
 }
-#define __raw_spin_is_contended	__raw_spin_is_contended
+#define arch_spin_is_contended	arch_spin_is_contended
 
-static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
+static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
 {
-	__raw_spin(lock)(lock);
+	__arch_spin(lock)(lock);
 }
 
-static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
-	return __raw_spin(trylock)(lock);
+	return __arch_spin(trylock)(lock);
 }
 
-static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
-	__raw_spin(unlock)(lock);
+	__arch_spin(unlock)(lock);
 }
 
-static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
 						  unsigned long flags)
 {
-	__raw_spin(lock_flags)(lock, flags);
+	__arch_spin(lock_flags)(lock, flags);
 }
 
-#undef __raw_spin
+#undef __arch_spin
 
-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
-	while (__raw_spin_is_locked(lock))
+	while (arch_spin_is_locked(lock))
 		cpu_relax();
 }
 
@@ -375,7 +375,7 @@ static inline void __raw_spin_unlock_wai
  * read_can_lock - would read_trylock() succeed?
  * @lock: the rwlock in question.
  */
-static inline int __raw_read_can_lock(raw_rwlock_t *lock)
+static inline int arch_read_can_lock(arch_rwlock_t *lock)
 {
 	return (int)(lock)->lock > 0;
 }
@@ -384,12 +384,12 @@ static inline int __raw_read_can_lock(ra
  * write_can_lock - would write_trylock() succeed?
  * @lock: the rwlock in question.
  */
-static inline int __raw_write_can_lock(raw_rwlock_t *lock)
+static inline int arch_write_can_lock(arch_rwlock_t *lock)
 {
 	return (lock)->lock == RW_LOCK_BIAS;
 }
 
-static inline void __raw_read_lock(raw_rwlock_t *rw)
+static inline void arch_read_lock(arch_rwlock_t *rw)
 {
 	asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
 		     "jns 1f\n"
@@ -398,7 +398,7 @@ static inline void __raw_read_lock(raw_r
 		     ::LOCK_PTR_REG (rw) : "memory");
 }
 
-static inline void __raw_write_lock(raw_rwlock_t *rw)
+static inline void arch_write_lock(arch_rwlock_t *rw)
 {
 	asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
 		     "jz 1f\n"
@@ -407,7 +407,7 @@ static inline void __raw_write_lock(raw_
 		     ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
 }
 
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
+static inline int arch_read_trylock(arch_rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
 
@@ -417,7 +417,7 @@ static inline int __raw_read_trylock(raw
 	return 0;
 }
 
-static inline int __raw_write_trylock(raw_rwlock_t *lock)
+static inline int arch_write_trylock(arch_rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
 
@@ -427,23 +427,23 @@ static inline int __raw_write_trylock(ra
 	return 0;
 }
 
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
+static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
 	asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
 }
 
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
+static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
 	asm volatile(LOCK_PREFIX "addl %1, %0"
 		     : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
 }
 
-#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
-#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
 
 /* The {read|write|spin}_lock() on x86 are full memory barriers. */
 static inline void smp_mb__after_lock(void) { }
--- head.orig/arch/x86/include/mach-xen/asm/spinlock_types.h	2012-01-31 17:31:06.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/spinlock_types.h	2012-01-31 18:04:48.000000000 +0100
@@ -41,14 +41,14 @@ typedef union {
 #endif
 #endif /* def CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING */
 	};
-} raw_spinlock_t;
+} arch_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
 
 typedef struct {
 	unsigned int lock;
-} raw_rwlock_t;
+} arch_rwlock_t;
 
-#define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
+#define __ARCH_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
 
 #endif /* _ASM_X86_SPINLOCK_TYPES_H */
--- head.orig/arch/x86/include/mach-xen/asm/swiotlb.h	2011-01-31 18:07:35.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/swiotlb.h	2011-02-01 14:55:46.000000000 +0100
@@ -1,4 +1,6 @@
 #include_next <asm/swiotlb.h>
 
+#define pci_swiotlb_detect() 1
+
 dma_addr_t swiotlb_map_single_phys(struct device *, phys_addr_t, size_t size,
 				   int dir);
--- head.orig/arch/x86/kernel/apic/Makefile	2012-02-08 12:11:33.000000000 +0100
+++ head/arch/x86/kernel/apic/Makefile	2012-02-08 12:23:16.000000000 +0100
@@ -30,4 +30,4 @@ obj-$(CONFIG_XEN)		+= nmi.o
 
 probe_64-$(CONFIG_XEN)		:= probe_32.o
 
-disabled-obj-$(CONFIG_XEN)	:= apic_flat_$(BITS).o
+disabled-obj-$(CONFIG_XEN)	:= apic_%.o
--- head.orig/arch/x86/kernel/apic/io_apic-xen.c	2011-02-01 14:54:13.000000000 +0100
+++ head/arch/x86/kernel/apic/io_apic-xen.c	2011-02-01 14:55:46.000000000 +0100
@@ -150,20 +150,6 @@ static struct irq_pin_list *get_one_free
 	return pin;
 }
 
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * Most irqs are mapped 1:1 with pins.
- */
-struct irq_cfg {
-	struct irq_pin_list *irq_2_pin;
-	cpumask_var_t domain;
-	cpumask_var_t old_domain;
-	unsigned move_cleanup_count;
-	u8 vector;
-	u8 move_in_progress : 1;
-};
-
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 #ifdef CONFIG_SPARSE_IRQ
 static struct irq_cfg irq_cfgx[] = {
@@ -219,7 +205,7 @@ int __init arch_early_irq_init(void)
 }
 
 #ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
 {
 	struct irq_cfg *cfg = NULL;
 	struct irq_desc *desc;
@@ -371,7 +357,7 @@ void arch_free_chip_data(struct irq_desc
 /* end for move_irq_desc */
 
 #else
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
 {
 	return irq < nr_irqs ? irq_cfgx + irq : NULL;
 }
@@ -594,23 +580,41 @@ static void __init replace_pin_at_irq_no
 	add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
+static void __io_apic_modify_irq(struct irq_pin_list *entry,
+				 int mask_and, int mask_or,
+				 void (*final)(struct irq_pin_list *entry))
+{
+	unsigned int reg, pin;
+
+	pin = entry->pin;
+	reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+	reg &= mask_and;
+	reg |= mask_or;
+	io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+	if (final)
+		final(entry);
+}
+
 static void io_apic_modify_irq(struct irq_cfg *cfg,
 			       int mask_and, int mask_or,
 			       void (*final)(struct irq_pin_list *entry))
 {
-	int pin;
 	struct irq_pin_list *entry;
 
-	for_each_irq_pin(entry, cfg->irq_2_pin) {
-		unsigned int reg;
-		pin = entry->pin;
-		reg = io_apic_read(entry->apic, 0x10 + pin * 2);
-		reg &= mask_and;
-		reg |= mask_or;
-		io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
-		if (final)
-			final(entry);
-	}
+	for_each_irq_pin(entry, cfg->irq_2_pin)
+		__io_apic_modify_irq(entry, mask_and, mask_or, final);
+}
+
+static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
+{
+	__io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+			     IO_APIC_REDIR_MASKED, NULL);
+}
+
+static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
+{
+	__io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
+			     IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
 }
 
 static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
@@ -634,18 +638,6 @@ static void __mask_IO_APIC_irq(struct ir
 	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 }
 
-static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
-{
-	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
-			IO_APIC_REDIR_MASKED, NULL);
-}
-
-static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
-{
-	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
-			IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
-}
-
 static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
 {
 	struct irq_cfg *cfg = desc->chip_data;
@@ -1225,7 +1217,7 @@ __assign_irq_vector(int irq, struct irq_
 	int cpu, err;
 	cpumask_var_t tmp_mask;
 
-	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+	if (cfg->move_in_progress)
 		return -EBUSY;
 
 	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
@@ -1289,8 +1281,7 @@ next:
 	return err;
 }
 
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 {
 	int err;
 	unsigned long flags;
@@ -1668,9 +1659,6 @@ __apicdebuginit(void) print_IO_APIC(void
 	struct irq_desc *desc;
 	unsigned int irq;
 
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
 	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
 	for (i = 0; i < nr_ioapics; i++)
 		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
@@ -1777,9 +1765,6 @@ __apicdebuginit(void) print_APIC_field(i
 {
 	int i;
 
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
 	printk(KERN_DEBUG);
 
 	for (i = 0; i < 8; i++)
@@ -1793,9 +1778,6 @@ __apicdebuginit(void) print_local_APIC(v
 	unsigned int i, v, ver, maxlvt;
 	u64 icr;
 
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
 	printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 		smp_processor_id(), hard_smp_processor_id());
 	v = apic_read(APIC_ID);
@@ -1893,13 +1875,19 @@ __apicdebuginit(void) print_local_APIC(v
 	printk("\n");
 }
 
-__apicdebuginit(void) print_all_local_APICs(void)
+__apicdebuginit(void) print_local_APICs(int maxcpu)
 {
 	int cpu;
 
+	if (!maxcpu)
+		return;
+
 	preempt_disable();
-	for_each_online_cpu(cpu)
+	for_each_online_cpu(cpu) {
+		if (cpu >= maxcpu)
+			break;
 		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+	}
 	preempt_enable();
 }
 
@@ -1908,7 +1896,7 @@ __apicdebuginit(void) print_PIC(void)
 	unsigned int v;
 	unsigned long flags;
 
-	if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs)
+	if (!nr_legacy_irqs)
 		return;
 
 	printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1935,21 +1923,41 @@ __apicdebuginit(void) print_PIC(void)
 	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
 }
 
-__apicdebuginit(int) print_all_ICs(void)
+static int __initdata show_lapic = 1;
+static __init int setup_show_lapic(char *arg)
+{
+	int num = -1;
+
+	if (strcmp(arg, "all") == 0) {
+		show_lapic = CONFIG_NR_CPUS;
+	} else {
+		get_option(&arg, &num);
+		if (num >= 0)
+			show_lapic = num;
+	}
+
+	return 1;
+}
+__setup("show_lapic=", setup_show_lapic);
+
+__apicdebuginit(int) print_ICs(void)
 {
+	if (apic_verbosity == APIC_QUIET)
+		return 0;
+
 	print_PIC();
 
 	/* don't print out if apic is not there */
 	if (!cpu_has_apic && !apic_from_smp_config())
 		return 0;
 
-	print_all_local_APICs();
+	print_local_APICs(show_lapic);
 	print_IO_APIC();
 
 	return 0;
 }
 
-fs_initcall(print_all_ICs);
+fs_initcall(print_ICs);
 
 
 /* Where if anywhere is the i8259 connect in external int mode */
@@ -2106,7 +2114,7 @@ void __init setup_ioapic_ids_from_mpc(vo
 	 * This is broken; anything with a real cpu count has to
 	 * circumvent this idiocy regardless.
 	 */
-	phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
+	apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
 
 	/*
 	 * Set the IOAPIC ID to the value stored in the MPC table.
@@ -2133,7 +2141,7 @@ void __init setup_ioapic_ids_from_mpc(vo
 		 * system must have a unique ID or we get lots of nice
 		 * 'stuck on smp_invalidate_needed IPI wait' messages.
 		 */
-		if (apic->check_apicid_used(phys_id_present_map,
+		if (apic->check_apicid_used(&phys_id_present_map,
 					mp_ioapics[apic_id].apicid)) {
 			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
 				apic_id, mp_ioapics[apic_id].apicid);
@@ -2148,7 +2156,7 @@ void __init setup_ioapic_ids_from_mpc(vo
 			mp_ioapics[apic_id].apicid = i;
 		} else {
 			physid_mask_t tmp;
-			tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
+			apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp);
 			apic_printk(APIC_VERBOSE, "Setting %d in the "
 					"phys_id_present_map\n",
 					mp_ioapics[apic_id].apicid);
@@ -2303,20 +2311,16 @@ static int ioapic_retrigger_irq(unsigned
  */
 
 #ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
+void send_cleanup_vector(struct irq_cfg *cfg)
 {
 	cpumask_var_t cleanup_mask;
 
 	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
 		unsigned int i;
-		cfg->move_cleanup_count = 0;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			cfg->move_cleanup_count++;
 		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
 			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
 	} else {
 		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
 		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 		free_cpumask_var(cleanup_mask);
 	}
@@ -2347,31 +2351,30 @@ static void __target_IO_APIC_irq(unsigne
 	}
 }
 
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
 /*
  * Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
  * leaves desc->affinity untouched.
  */
-static unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
+		  unsigned int *dest_id)
 {
 	struct irq_cfg *cfg;
 	unsigned int irq;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
-		return BAD_APICID;
+		return -1;
 
 	irq = desc->irq;
 	cfg = desc->chip_data;
 	if (assign_irq_vector(irq, cfg, mask))
-		return BAD_APICID;
+		return -1;
 
 	cpumask_copy(desc->affinity, mask);
 
-	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+	*dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+	return 0;
 }
 
 static int
@@ -2387,12 +2390,11 @@ set_ioapic_affinity_irq_desc(struct irq_
 	cfg = desc->chip_data;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	dest = set_desc_affinity(desc, mask);
-	if (dest != BAD_APICID) {
+	ret = set_desc_affinity(desc, mask, &dest);
+	if (!ret) {
 		/* Only the high 8 bits are valid. */
 		dest = SET_APIC_LOGICAL_ID(dest);
 		__target_IO_APIC_irq(irq, dest, cfg);
-		ret = 0;
 	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
@@ -2507,8 +2509,13 @@ asmlinkage void smp_irq_move_cleanup_int
 			continue;
 
 		cfg = irq_cfg(irq);
-		spin_lock(&desc->lock);
-		if (!cfg->move_cleanup_count)
+		raw_spin_lock(&desc->lock);
+
+		/*
+		 * Check if the irq migration is in progress. If so, we
+		 * haven't received the cleanup request yet for this irq.
+		 */
+		if (cfg->move_in_progress)
 			goto unlock;
 
 		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
@@ -2527,29 +2534,40 @@ asmlinkage void smp_irq_move_cleanup_int
 			goto unlock;
 		}
 		__get_cpu_var(vector_irq)[vector] = -1;
-		cfg->move_cleanup_count--;
 unlock:
-		spin_unlock(&desc->lock);
+		raw_spin_unlock(&desc->lock);
 	}
 
 	irq_exit();
 }
 
-static void irq_complete_move(struct irq_desc **descp)
+static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
 {
 	struct irq_desc *desc = *descp;
 	struct irq_cfg *cfg = desc->chip_data;
-	unsigned vector, me;
+	unsigned me;
 
 	if (likely(!cfg->move_in_progress))
 		return;
 
-	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
 
 	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
 		send_cleanup_vector(cfg);
 }
+
+static void irq_complete_move(struct irq_desc **descp)
+{
+	__irq_complete_move(descp, ~get_irq_regs()->orig_ax);
+}
+
+void irq_force_complete_move(int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg = desc->chip_data;
+
+	__irq_complete_move(&desc, cfg->vector);
+}
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
 #endif
@@ -2565,6 +2583,59 @@ static void ack_apic_edge(unsigned int i
 
 atomic_t irq_mis_count;
 
+/*
+ * IO-APIC versions below 0x20 don't support EOI register.
+ * For the record, here is the information about various versions:
+ *     0Xh     82489DX
+ *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
+ *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
+ *     30h-FFh Reserved
+ *
+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
+ * version as 0x2. This is an error with documentation and these ICH chips
+ * use io-apic's of version 0x20.
+ *
+ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
+ * Otherwise, we simulate the EOI message manually by changing the trigger
+ * mode to edge and then back to level, with RTE being masked during this.
+*/
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+	struct irq_pin_list *entry;
+
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
+		if (mp_ioapics[entry->apic].apicver >= 0x20) {
+			/*
+			 * Intr-remapping uses pin number as the virtual vector
+			 * in the RTE. Actual vector is programmed in
+			 * intr-remapping table entry. Hence for the io-apic
+			 * EOI we use the pin number.
+			 */
+			if (irq_remapped(irq))
+				io_apic_eoi(entry->apic, entry->pin);
+			else
+				io_apic_eoi(entry->apic, cfg->vector);
+		} else {
+			__mask_and_edge_IO_APIC_irq(entry);
+			__unmask_and_level_IO_APIC_irq(entry);
+		}
+	}
+}
+
+static void eoi_ioapic_irq(struct irq_desc *desc)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int irq;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__eoi_ioapic_irq(irq, cfg);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
 static void ack_apic_level(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -2600,6 +2671,19 @@ static void ack_apic_level(unsigned int 
 	 * level-triggered interrupt.  We mask the source for the time of the
 	 * operation to prevent an edge-triggered interrupt escaping meanwhile.
 	 * The idea is from Manfred Spraul.  --macro
+	 *
+	 * Also in the case when cpu goes offline, fixup_irqs() will forward
+	 * any unhandled interrupt on the offlined cpu to the new cpu
+	 * destination that is handling the corresponding interrupt. This
+	 * interrupt forwarding is done via IPI's. Hence, in this case also
+	 * level-triggered io-apic interrupt will be seen as an edge
+	 * interrupt in the IRR. And we can't rely on the cpu's EOI
+	 * to be broadcasted to the IO-APIC's which will clear the remoteIRR
+	 * corresponding to the level-triggered interrupt. Hence on IO-APIC's
+	 * supporting EOI register, we do an explicit EOI to clear the
+	 * remote IRR and on IO-APIC's which don't have an EOI register,
+	 * we use the above logic (mask+edge followed by unmask+level) from
+	 * Manfred Spraul to clear the remote IRR.
 	 */
 	cfg = desc->chip_data;
 	i = cfg->vector;
@@ -2611,6 +2695,19 @@ static void ack_apic_level(unsigned int 
 	 */
 	ack_APIC_irq();
 
+	/*
+	 * Tail end of clearing remote IRR bit (either by delivering the EOI
+	 * message via io-apic EOI register write or simulating it using
+	 * mask+edge followed by unnask+level logic) manually when the
+	 * level triggered interrupt is seen as the edge triggered interrupt
+	 * at the cpu.
+	 */
+	if (!(v & (1 << (i & 0x1f)))) {
+		atomic_inc(&irq_mis_count);
+
+		eoi_ioapic_irq(desc);
+	}
+
 	/* Now we can move and renable the irq */
 	if (unlikely(do_unmask_irq)) {
 		/* Only migrate the irq if the ack has been received.
@@ -2644,41 +2741,9 @@ static void ack_apic_level(unsigned int 
 			move_masked_irq(irq);
 		unmask_IO_APIC_irq_desc(desc);
 	}
-
-	/* Tail end of version 0x11 I/O APIC bug workaround */
-	if (!(v & (1 << (i & 0x1f)))) {
-		atomic_inc(&irq_mis_count);
-		spin_lock(&ioapic_lock);
-		__mask_and_edge_IO_APIC_irq(cfg);
-		__unmask_and_level_IO_APIC_irq(cfg);
-		spin_unlock(&ioapic_lock);
-	}
 }
 
 #ifdef CONFIG_INTR_REMAP
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
-	struct irq_pin_list *entry;
-
-	for_each_irq_pin(entry, cfg->irq_2_pin)
-		io_apic_eoi(entry->apic, entry->pin);
-}
-
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int irq;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__eoi_ioapic_irq(irq, cfg);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
 static void ir_ack_apic_edge(unsigned int irq)
 {
 	ack_APIC_irq();
@@ -3256,6 +3321,7 @@ unsigned int create_irq_nr(unsigned int 
 			continue;
 
 		desc_new = move_irq_desc(desc_new, node);
+		cfg_new = desc_new->chip_data;
 
 		if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
 			irq = new;
@@ -3311,7 +3377,8 @@ void destroy_irq(unsigned int irq)
  * MSI message composition
  */
 #if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+			   struct msi_msg *msg, u8 hpet_id)
 {
 	struct irq_cfg *cfg;
 	int err;
@@ -3345,7 +3412,10 @@ static int msi_compose_msg(struct pci_de
 		irte.dest_id = IRTE_DEST(dest);
 
 		/* Set source-id of interrupt request */
-		set_msi_sid(&irte, pdev);
+		if (pdev)
+			set_msi_sid(&irte, pdev);
+		else
+			set_hpet_sid(&irte, hpet_id);
 
 		modify_irte(irq, &irte);
 
@@ -3391,8 +3461,7 @@ static int set_msi_irq_affinity(unsigned
 	struct msi_msg msg;
 	unsigned int dest;
 
-	dest = set_desc_affinity(desc, mask);
-	if (dest == BAD_APICID)
+	if (set_desc_affinity(desc, mask, &dest))
 		return -1;
 
 	cfg = desc->chip_data;
@@ -3424,8 +3493,7 @@ ir_set_msi_irq_affinity(unsigned int irq
 	if (get_irte(irq, &irte))
 		return -1;
 
-	dest = set_desc_affinity(desc, mask);
-	if (dest == BAD_APICID)
+	if (set_desc_affinity(desc, mask, &dest))
 		return -1;
 
 	irte.vector = cfg->vector;
@@ -3510,7 +3578,7 @@ static int setup_msi_irq(struct pci_dev 
 	int ret;
 	struct msi_msg msg;
 
-	ret = msi_compose_msg(dev, irq, &msg);
+	ret = msi_compose_msg(dev, irq, &msg, -1);
 	if (ret < 0)
 		return ret;
 
@@ -3607,8 +3675,7 @@ static int dmar_msi_set_affinity(unsigne
 	struct msi_msg msg;
 	unsigned int dest;
 
-	dest = set_desc_affinity(desc, mask);
-	if (dest == BAD_APICID)
+	if (set_desc_affinity(desc, mask, &dest))
 		return -1;
 
 	cfg = desc->chip_data;
@@ -3643,7 +3710,7 @@ int arch_setup_dmar_msi(unsigned int irq
 	int ret;
 	struct msi_msg msg;
 
-	ret = msi_compose_msg(NULL, irq, &msg);
+	ret = msi_compose_msg(NULL, irq, &msg, -1);
 	if (ret < 0)
 		return ret;
 	dmar_msi_write(irq, &msg);
@@ -3663,8 +3730,7 @@ static int hpet_msi_set_affinity(unsigne
 	struct msi_msg msg;
 	unsigned int dest;
 
-	dest = set_desc_affinity(desc, mask);
-	if (dest == BAD_APICID)
+	if (set_desc_affinity(desc, mask, &dest))
 		return -1;
 
 	cfg = desc->chip_data;
@@ -3683,6 +3749,19 @@ static int hpet_msi_set_affinity(unsigne
 
 #endif /* CONFIG_SMP */
 
+static struct irq_chip ir_hpet_msi_type = {
+	.name = "IR-HPET_MSI",
+	.unmask = hpet_msi_unmask,
+	.mask = hpet_msi_mask,
+#ifdef CONFIG_INTR_REMAP
+	.ack = ir_ack_apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity = ir_set_msi_irq_affinity,
+#endif
+#endif
+	.retrigger = ioapic_retrigger_irq,
+};
+
 static struct irq_chip hpet_msi_type = {
 	.name = "HPET_MSI",
 	.unmask = hpet_msi_unmask,
@@ -3694,20 +3773,36 @@ static struct irq_chip hpet_msi_type = {
 	.retrigger = ioapic_retrigger_irq,
 };
 
-int arch_setup_hpet_msi(unsigned int irq)
+int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
 {
 	int ret;
 	struct msi_msg msg;
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	ret = msi_compose_msg(NULL, irq, &msg);
+	if (intr_remapping_enabled) {
+		struct intel_iommu *iommu = map_hpet_to_ir(id);
+		int index;
+
+		if (!iommu)
+			return -1;
+
+		index = alloc_irte(iommu, irq, 1);
+		if (index < 0)
+			return -1;
+	}
+
+	ret = msi_compose_msg(NULL, irq, &msg, id);
 	if (ret < 0)
 		return ret;
 
 	hpet_msi_write(irq, &msg);
 	desc->status |= IRQ_MOVE_PCNTXT;
-	set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
-		"edge");
+	if (irq_remapped(irq))
+		set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
+					      handle_edge_irq, "edge");
+	else
+		set_irq_chip_and_handler_name(irq, &hpet_msi_type,
+					      handle_edge_irq, "edge");
 
 	return 0;
 }
@@ -3741,8 +3836,7 @@ static int set_ht_irq_affinity(unsigned 
 	struct irq_cfg *cfg;
 	unsigned int dest;
 
-	dest = set_desc_affinity(desc, mask);
-	if (dest == BAD_APICID)
+	if (set_desc_affinity(desc, mask, &dest))
 		return -1;
 
 	cfg = desc->chip_data;
@@ -3808,75 +3902,6 @@ int arch_setup_ht_irq(unsigned int irq, 
 }
 #endif /* CONFIG_HT_IRQ */
 
-#ifdef CONFIG_X86_UV
-/*
- * Re-target the irq to the specified CPU and enable the specified MMR located
- * on the specified blade to allow the sending of MSIs to the specified CPU.
- */
-int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
-		       unsigned long mmr_offset)
-{
-	const struct cpumask *eligible_cpu = cpumask_of(cpu);
-	struct irq_cfg *cfg;
-	int mmr_pnode;
-	unsigned long mmr_value;
-	struct uv_IO_APIC_route_entry *entry;
-	unsigned long flags;
-	int err;
-
-	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-	cfg = irq_cfg(irq);
-
-	err = assign_irq_vector(irq, cfg, eligible_cpu);
-	if (err != 0)
-		return err;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
-				      irq_name);
-	spin_unlock_irqrestore(&vector_lock, flags);
-
-	mmr_value = 0;
-	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	entry->vector		= cfg->vector;
-	entry->delivery_mode	= apic->irq_delivery_mode;
-	entry->dest_mode	= apic->irq_dest_mode;
-	entry->polarity		= 0;
-	entry->trigger		= 0;
-	entry->mask		= 0;
-	entry->dest		= apic->cpu_mask_to_apicid(eligible_cpu);
-
-	mmr_pnode = uv_blade_to_pnode(mmr_blade);
-	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
-	if (cfg->move_in_progress)
-		send_cleanup_vector(cfg);
-
-	return irq;
-}
-
-/*
- * Disable the specified MMR located on the specified blade so that MSIs are
- * longer allowed to be sent.
- */
-void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
-{
-	unsigned long mmr_value;
-	struct uv_IO_APIC_route_entry *entry;
-	int mmr_pnode;
-
-	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-	mmr_value = 0;
-	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	entry->mask = 1;
-
-	mmr_pnode = uv_blade_to_pnode(mmr_blade);
-	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-}
-#endif /* CONFIG_X86_64 */
-
 int __init io_apic_get_redir_entries (int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
@@ -4055,7 +4080,7 @@ int __init io_apic_get_unique_id(int ioa
 	 */
 
 	if (physids_empty(apic_id_map))
-		apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
+		apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	reg_00.raw = io_apic_read(ioapic, 0);
@@ -4071,10 +4096,10 @@ int __init io_apic_get_unique_id(int ioa
 	 * Every APIC in a system must have a unique ID or we get lots of nice
 	 * 'stuck on smp_invalidate_needed IPI wait' messages.
 	 */
-	if (apic->check_apicid_used(apic_id_map, apic_id)) {
+	if (apic->check_apicid_used(&apic_id_map, apic_id)) {
 
 		for (i = 0; i < get_physical_broadcast(); i++) {
-			if (!apic->check_apicid_used(apic_id_map, i))
+			if (!apic->check_apicid_used(&apic_id_map, i))
 				break;
 		}
 
@@ -4087,7 +4112,7 @@ int __init io_apic_get_unique_id(int ioa
 		apic_id = i;
 	}
 
-	tmp = apic->apicid_to_cpu_present(apic_id);
+	apic->apicid_to_cpu_present(apic_id, &tmp);
 	physids_or(apic_id_map, apic_id_map, tmp);
 
 	if (reg_00.bits.ID != apic_id) {
@@ -4218,7 +4243,7 @@ static struct resource * __init ioapic_s
 	for (i = 0; i < nr_ioapics; i++) {
 		res[i].name = mem;
 		res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-		sprintf(mem,  "IOAPIC %u", i);
+		snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
 		mem += IOAPIC_RESOURCE_NAME_SIZE;
 	}
 
@@ -4252,18 +4277,17 @@ void __init ioapic_init_mappings(void)
 #ifdef CONFIG_X86_32
 fake_ioapic_page:
 #endif
-			ioapic_phys = (unsigned long)
-				alloc_bootmem_pages(PAGE_SIZE);
+			ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
 			ioapic_phys = __pa(ioapic_phys);
 		}
 		set_fixmap_nocache(idx, ioapic_phys);
-		apic_printk(APIC_VERBOSE,
-			    "mapped IOAPIC to %08lx (%08lx)\n",
-			    __fix_to_virt(idx), ioapic_phys);
+		apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
+			__fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
+			ioapic_phys);
 		idx++;
 
 		ioapic_res->start = ioapic_phys;
-		ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+		ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
 		ioapic_res++;
 	}
 }
--- head.orig/arch/x86/kernel/cpu/Makefile	2012-02-16 13:32:56.000000000 +0100
+++ head/arch/x86/kernel/cpu/Makefile	2012-02-16 13:33:14.000000000 +0100
@@ -40,7 +40,8 @@ obj-$(CONFIG_MTRR)			+= mtrr/
 
 obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o perf_event_amd_ibs.o
 
-disabled-obj-$(CONFIG_XEN) := hypervisor.o perfctr-watchdog.o sched.o vmware.o
+disabled-obj-$(CONFIG_XEN) := hypervisor.o perfctr-watchdog.o perf_event.o \
+			      sched.o vmware.o
 
 quiet_cmd_mkcapflags = MKCAP   $@
       cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
--- head.orig/arch/x86/kernel/cpu/common-xen.c	2012-04-20 15:14:02.000000000 +0200
+++ head/arch/x86/kernel/cpu/common-xen.c	2012-04-20 15:14:08.000000000 +0200
@@ -69,7 +69,7 @@ void __init setup_cpu_local_masks(void)
 static void __cpuinit default_init(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_X86_64
-	display_cacheinfo(c);
+	cpu_detect_cache_sizes(c);
 #else
 	/* Not much we can do here... */
 	/* Check if at least it has cpuid */
@@ -414,7 +414,7 @@ static void __cpuinit get_model_name(str
 	}
 }
 
-void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
+void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
 {
 	unsigned int n, dummy, ebx, ecx, edx, l2size;
 
@@ -422,8 +422,6 @@ void __cpuinit display_cacheinfo(struct 
 
 	if (n >= 0x80000005) {
 		cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
-		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
-				edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
 		c->x86_cache_size = (ecx>>24) + (edx>>24);
 #ifdef CONFIG_X86_64
 		/* On K8 L1 TLB is inclusive, so don't count it */
@@ -453,9 +451,6 @@ void __cpuinit display_cacheinfo(struct 
 #endif
 
 	c->x86_cache_size = l2size;
-
-	printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
-			l2size, ecx & 0xFF);
 }
 
 void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -463,6 +458,7 @@ void __cpuinit detect_ht(struct cpuinfo_
 #ifdef CONFIG_X86_HT
 	u32 eax, ebx, ecx, edx;
 	int index_msb, core_bits;
+	static bool printed;
 
 	if (!cpu_has(c, X86_FEATURE_HT))
 		return;
@@ -478,7 +474,7 @@ void __cpuinit detect_ht(struct cpuinfo_
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
 
 	if (smp_num_siblings == 1) {
-		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
+		printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
 		goto out;
 	}
 
@@ -505,11 +501,12 @@ void __cpuinit detect_ht(struct cpuinfo_
 				       ((1 << core_bits) - 1);
 
 out:
-	if ((c->x86_max_cores * smp_num_siblings) > 1) {
+	if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
 		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
 		       c->phys_proc_id);
 		printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
 		       c->cpu_core_id);
+		printed = 1;
 	}
 #endif
 }
@@ -694,24 +691,31 @@ void __init early_cpu_init(void)
 	const struct cpu_dev *const *cdev;
 	int count = 0;
 
+#ifdef CONFIG_PROCESSOR_SELECT
 	printk(KERN_INFO "KERNEL supported cpus:\n");
+#endif
+
 	for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
 		const struct cpu_dev *cpudev = *cdev;
-		unsigned int j;
 
 		if (count >= X86_VENDOR_NUM)
 			break;
 		cpu_devs[count] = cpudev;
 		count++;
 
-		for (j = 0; j < 2; j++) {
-			if (!cpudev->c_ident[j])
-				continue;
-			printk(KERN_INFO "  %s %s\n", cpudev->c_vendor,
-				cpudev->c_ident[j]);
+#ifdef CONFIG_PROCESSOR_SELECT
+		{
+			unsigned int j;
+
+			for (j = 0; j < 2; j++) {
+				if (!cpudev->c_ident[j])
+					continue;
+				printk(KERN_INFO "  %s %s\n", cpudev->c_vendor,
+					cpudev->c_ident[j]);
+			}
 		}
+#endif
 	}
-
 	early_identify_cpu(&boot_cpu_data);
 }
 
@@ -878,10 +882,8 @@ static void __cpuinit identify_cpu(struc
 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
 	}
 
-#ifdef CONFIG_X86_MCE
 	/* Init Machine Check Exception if available. */
-	mcheck_init(c);
-#endif
+	mcheck_cpu_init(c);
 
 	select_idle_routine(c);
 
@@ -913,6 +915,10 @@ void __init identify_boot_cpu(void)
 	init_hw_perf_events();
 }
 
+#ifdef CONFIG_XEN
+void set_perf_event_pending(void) {}
+#endif
+
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
 {
 	BUG_ON(c == &boot_cpu_data);
@@ -1163,7 +1169,7 @@ static void clear_all_debug_regs(void)
 void __cpuinit cpu_init(void)
 {
 #ifndef CONFIG_X86_NO_TSS
-	struct orig_ist *orig_ist;
+	struct orig_ist *oist;
 	struct tss_struct *t;
 	unsigned long v;
 	int i;
@@ -1177,7 +1183,7 @@ void __cpuinit cpu_init(void)
 		xen_switch_pt();
 #ifndef CONFIG_X86_NO_TSS
 	t = &per_cpu(init_tss, cpu);
-	orig_ist = &per_cpu(orig_ist, cpu);
+	oist = &per_cpu(orig_ist, cpu);
 #endif
 
 #ifdef CONFIG_NUMA
@@ -1191,7 +1197,7 @@ void __cpuinit cpu_init(void)
 	if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
 		panic("CPU#%d already initialized!\n", cpu);
 
-	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+	pr_debug("Initializing CPU#%d\n", cpu);
 
 	clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 
@@ -1214,7 +1220,7 @@ void __cpuinit cpu_init(void)
 	wrmsrl(MSR_KERNEL_GS_BASE, 0);
 	barrier();
 
-	check_efer();
+	x86_configure_nx();
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (cpu != 0)
 		enable_x2apic();
@@ -1224,12 +1230,12 @@ void __cpuinit cpu_init(void)
 	/*
 	 * set up and load the per-CPU TSS
 	 */
-	if (!orig_ist->ist[0]) {
+	if (!oist->ist[0]) {
 		char *estacks = per_cpu(exception_stacks, cpu);
 
 		for (v = 0; v < N_EXCEPTION_STACKS; v++) {
 			estacks += exception_stack_sizes[v];
-			orig_ist->ist[v] = t->x86_tss.ist[v] =
+			oist->ist[v] = t->x86_tss.ist[v] =
 					(unsigned long)estacks;
 		}
 	}
--- head.orig/arch/x86/kernel/e820-xen.c	2011-09-23 15:55:50.000000000 +0200
+++ head/arch/x86/kernel/e820-xen.c	2011-09-23 15:55:57.000000000 +0200
@@ -779,7 +779,7 @@ core_initcall(e820_mark_nvs_memory);
 /*
  * Early reserved memory areas.
  */
-#define MAX_EARLY_RES 20
+#define MAX_EARLY_RES 32
 
 struct early_res {
 	u64 start, end;
@@ -788,7 +788,15 @@ struct early_res {
 };
 static struct early_res early_res[MAX_EARLY_RES] __initdata = {
 #ifndef CONFIG_XEN
-	{ 0, PAGE_SIZE, "BIOS data page" },	/* BIOS data page */
+	{ 0, PAGE_SIZE, "BIOS data page", 1 },	/* BIOS data page */
+#if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE)
+	/*
+	 * But first pinch a few for the stack/trampoline stuff
+	 * FIXME: Don't need the extra page at 4K, but need to fix
+	 * trampoline before removing it. (see the GDT stuff)
+	 */
+	{ PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 },
+#endif
 #endif
 	{}
 };
--- head.orig/arch/x86/kernel/entry_32-xen.S	2012-02-29 14:15:32.000000000 +0100
+++ head/arch/x86/kernel/entry_32-xen.S	2012-02-29 14:16:58.000000000 +0100
@@ -338,6 +338,10 @@ ENTRY(ret_from_fork)
 END(ret_from_fork)
 
 /*
+ * Interrupt exit functions should be protected against kprobes
+ */
+	.pushsection .kprobes.text, "ax"
+/*
  * Return to user mode is not as complex as all this looks,
  * but we want the default path for a system call return to
  * go as quickly as possible which is why some of this is
@@ -387,6 +391,10 @@ need_resched:
 END(resume_kernel)
 #endif
 	CFI_ENDPROC
+/*
+ * End of kprobes section
+ */
+	.popsection
 
 /* SYSENTER_RETURN points to after the "sysenter" instruction in
    the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
@@ -539,10 +547,14 @@ ENTRY(ia32pv_sysenter_target)
 	.align 4
 	.long 1b,syscall_fault
 .previous
-	/* fall through */
+	jmp system_call
 	CFI_ENDPROC
 ENDPROC(ia32pv_sysenter_target)
 
+/*
+ * syscall stub including irq exit should be protected against kprobes
+ */
+	.pushsection .kprobes.text, "ax"
 	# system call handler stub
 ENTRY(system_call)
 	RING0_INT_FRAME			# can't unwind into user space anyway
@@ -775,26 +787,69 @@ syscall_badsys:
 	jmp resume_userspace
 END(syscall_badsys)
 	CFI_ENDPROC
+/*
+ * End of kprobes section
+ */
+	.popsection
 
 /*
  * System calls that need a pt_regs pointer.
  */
-#define PTREGSCALL(name) \
+#define PTREGSCALL0(name) \
 	ALIGN; \
 ptregs_##name: \
 	leal 4(%esp),%eax; \
 	jmp sys_##name;
 
-PTREGSCALL(iopl)
-PTREGSCALL(fork)
-PTREGSCALL(clone)
-PTREGSCALL(vfork)
-PTREGSCALL(execve)
-PTREGSCALL(sigaltstack)
-PTREGSCALL(sigreturn)
-PTREGSCALL(rt_sigreturn)
-PTREGSCALL(vm86)
-PTREGSCALL(vm86old)
+#define PTREGSCALL1(name) \
+	ALIGN; \
+ptregs_##name: \
+	leal 4(%esp),%edx; \
+	movl (PT_EBX+4)(%esp),%eax; \
+	jmp sys_##name;
+
+#define PTREGSCALL2(name) \
+	ALIGN; \
+ptregs_##name: \
+	leal 4(%esp),%ecx; \
+	movl (PT_ECX+4)(%esp),%edx; \
+	movl (PT_EBX+4)(%esp),%eax; \
+	jmp sys_##name;
+
+#define PTREGSCALL3(name) \
+	ALIGN; \
+ptregs_##name: \
+	leal 4(%esp),%eax; \
+	pushl %eax; \
+	movl PT_EDX(%eax),%ecx; \
+	movl PT_ECX(%eax),%edx; \
+	movl PT_EBX(%eax),%eax; \
+	call sys_##name; \
+	addl $4,%esp; \
+	ret
+
+PTREGSCALL1(iopl)
+PTREGSCALL0(fork)
+PTREGSCALL0(vfork)
+PTREGSCALL3(execve)
+PTREGSCALL2(sigaltstack)
+PTREGSCALL0(sigreturn)
+PTREGSCALL0(rt_sigreturn)
+PTREGSCALL2(vm86)
+PTREGSCALL1(vm86old)
+
+/* Clone is an oddball.  The 4th arg is in %edi */
+	ALIGN;
+ptregs_clone:
+	leal 4(%esp),%eax
+	pushl %eax
+	pushl PT_EDI(%eax)
+	movl PT_EDX(%eax),%ecx
+	movl PT_ECX(%eax),%edx
+	movl PT_EBX(%eax),%eax
+	call sys_clone
+	addl $8,%esp
+	ret
 
 #ifndef CONFIG_XEN
 .macro FIXUP_ESPFIX_STACK
@@ -885,6 +940,10 @@ common_interrupt:
 ENDPROC(common_interrupt)
 	CFI_ENDPROC
 
+/*
+ *  Irq entries should be protected against kprobes
+ */
+	.pushsection .kprobes.text, "ax"
 #define BUILD_INTERRUPT3(name, nr, fn)	\
 ENTRY(name)				\
 	RING0_INT_FRAME;		\
@@ -906,6 +965,8 @@ ENDPROC(name)
 #else
 #define UNWIND_ESPFIX_STACK
 
+	.pushsection .kprobes.text, "ax"
+
 # A note on the "critical region" in our callback handler.
 # We want to avoid stacking callback handlers due to events occurring
 # during handling of the last event. To do this, we keep events disabled
@@ -1207,16 +1268,16 @@ ENTRY(fixup_4gb_segment)
 	jmp error_code
 	CFI_ENDPROC
 END(fixup_4gb_segment)
+/*
+ * End of kprobes section
+ */
+	.popsection
 
 ENTRY(kernel_thread_helper)
 	pushl $0		# fake return address for unwinder
 	CFI_STARTPROC
-	movl %edx,%eax
-	push %edx
-	CFI_ADJUST_CFA_OFFSET 4
-	call *%ebx
-	push %eax
-	CFI_ADJUST_CFA_OFFSET 4
+	movl %edi,%eax
+	call *%esi
 	call do_exit
 	ud2			# padding for call trace
 	CFI_ENDPROC
@@ -1317,17 +1378,14 @@ END(ftrace_graph_caller)
 
 .globl return_to_handler
 return_to_handler:
-	pushl $0
 	pushl %eax
-	pushl %ecx
 	pushl %edx
 	movl %ebp, %eax
 	call ftrace_return_to_handler
-	movl %eax, 0xc(%esp)
+	movl %eax, %ecx
 	popl %edx
-	popl %ecx
 	popl %eax
-	ret
+	jmp *%ecx
 #endif
 
 #ifdef TIF_CSTAR
--- head.orig/arch/x86/kernel/entry_64-xen.S	2011-10-07 11:39:43.000000000 +0200
+++ head/arch/x86/kernel/entry_64-xen.S	2011-10-07 11:39:49.000000000 +0200
@@ -160,11 +160,11 @@ GLOBAL(return_to_handler)
 
 	call ftrace_return_to_handler
 
-	movq %rax, 16(%rsp)
+	movq %rax, %rdi
 	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
-	addq $16, %rsp
-	retq
+	addq $24, %rsp
+	jmp *%rdi
 #endif
 
 
@@ -866,8 +866,8 @@ apicinterrupt UV_BAU_MESSAGE \
 #endif
 apicinterrupt LOCAL_TIMER_VECTOR \
 	apic_timer_interrupt smp_apic_timer_interrupt
-apicinterrupt GENERIC_INTERRUPT_VECTOR \
-	generic_interrupt smp_generic_interrupt
+apicinterrupt X86_PLATFORM_IPI_VECTOR \
+	x86_platform_ipi smp_x86_platform_ipi
 
 #ifdef CONFIG_SMP
 apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
@@ -1096,63 +1096,20 @@ zeroentry coprocessor_error do_coprocess
 errorentry alignment_check do_alignment_check
 zeroentry simd_coprocessor_error do_simd_coprocessor_error
 	
-/*
- * Create a kernel thread.
- *
- * C extern interface:
- *	extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
- *
- * asm input arguments:
- *	rdi: fn, rsi: arg, rdx: flags
- */
-ENTRY(kernel_thread)
-	CFI_STARTPROC
-	FAKE_STACK_FRAME $child_rip
-	SAVE_ALL
-
-	# rdi: flags, rsi: usp, rdx: will be &pt_regs
-	movq %rdx,%rdi
-	orq  kernel_thread_flags(%rip),%rdi
-	movq $-1, %rsi
-	movq %rsp, %rdx
-
-	xorl %r8d,%r8d
-	xorl %r9d,%r9d
-
-	# clone now
-	call do_fork
-	movq %rax,RAX(%rsp)
-	xorl %edi,%edi
-
-	/*
-	 * It isn't worth to check for reschedule here,
-	 * so internally to the x86_64 port you can rely on kernel_thread()
-	 * not to reschedule the child before returning, this avoids the need
-	 * of hacks for example to fork off the per-CPU idle tasks.
-	 * [Hopefully no generic code relies on the reschedule -AK]
-	 */
-	RESTORE_ALL
-	UNFAKE_STACK_FRAME
-	ret
-	CFI_ENDPROC
-END(kernel_thread)
-
-ENTRY(child_rip)
+ENTRY(kernel_thread_helper)
 	pushq $0		# fake return address
 	CFI_STARTPROC
 	/*
 	 * Here we are in the child and the registers are set as they were
 	 * at kernel_thread() invocation in the parent.
 	 */
-	movq %rdi, %rax
-	movq %rsi, %rdi
-	call *%rax
+	call *%rsi
 	# exit
 	mov %eax, %edi
 	call do_exit
 	ud2			# padding for call trace
 	CFI_ENDPROC
-END(child_rip)
+END(kernel_thread_helper)
 
 /*
  * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -1332,12 +1289,17 @@ error_kernelspace:
 	leaq irq_return(%rip),%rcx
 	cmpq %rcx,RIP+8(%rsp)
 	je error_swapgs
-	movl %ecx,%ecx	/* zero extend */
-	cmpq %rcx,RIP+8(%rsp)
-	je error_swapgs
+	movl %ecx,%eax	/* zero extend */
+	cmpq %rax,RIP+8(%rsp)
+	je bstep_iret
 	cmpq $gs_change,RIP+8(%rsp)
 	je error_swapgs
 	jmp error_sti
+
+bstep_iret:
+	/* Fix truncated RIP */
+	movq %rcx,RIP+8(%rsp)
+	jmp error_swapgs
 #endif
 END(error_entry)
 
--- head.orig/arch/x86/kernel/head-xen.c	2011-09-07 15:59:12.000000000 +0200
+++ head/arch/x86/kernel/head-xen.c	2011-09-07 16:01:12.000000000 +0200
@@ -1,5 +1,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/pci.h>
 
 #include <asm/setup.h>
 #ifndef CONFIG_XEN
@@ -123,7 +124,7 @@ void __init xen_start_kernel(void)
 		__pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE));
 }
 #else
-	check_efer();
+	x86_configure_nx();
 	xen_init_pt();
 #endif
 
@@ -154,6 +155,8 @@ void __init xen_start_kernel(void)
 	if (is_initial_xendomain()) {
 		x86_platform.get_wallclock = mach_get_cmos_time;
 		x86_platform.set_wallclock = mach_set_rtc_mmss;
+
+		pci_request_acs();
 	}
 }
 
--- head.orig/arch/x86/kernel/head32-xen.c	2011-05-09 11:40:41.000000000 +0200
+++ head/arch/x86/kernel/head32-xen.c	2011-02-01 14:55:46.000000000 +0100
@@ -48,8 +48,6 @@ void __init i386_start_kernel(void)
 	BUG_ON(pte_index(hypervisor_virt_start));
 #endif
 
-	reserve_trampoline_memory();
-
 	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
 
 #ifndef CONFIG_XEN
--- head.orig/arch/x86/kernel/head64-xen.c	2011-02-01 14:54:13.000000000 +0100
+++ head/arch/x86/kernel/head64-xen.c	2011-02-01 14:55:46.000000000 +0100
@@ -119,8 +119,6 @@ void __init x86_64_start_reservations(ch
 {
 	copy_bootdata(__va(real_mode_data));
 
-	reserve_trampoline_memory();
-
 	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
 
 	/*
--- head.orig/arch/x86/kernel/head_64-xen.S	2011-08-09 11:03:49.000000000 +0200
+++ head/arch/x86/kernel/head_64-xen.S	2011-08-09 11:07:15.000000000 +0200
@@ -52,9 +52,9 @@ startup_64:
 
 #define NEXT_PAGE(name) \
 	.balign	PAGE_SIZE; \
-	phys_##name = . - .head.text; \
 ENTRY(name)
 
+	__PAGE_ALIGNED_BSS
 NEXT_PAGE(init_level4_pgt)
 	.fill	512,8,0
         /*
@@ -82,7 +82,9 @@ NEXT_PAGE(level2_fixmap_pgt)
 NEXT_PAGE(level1_fixmap_pgt)
 	.fill	512,8,0
 
+	.previous
 NEXT_PAGE(hypercall_page)
+	phys_hypercall_page = . - .head.text
 	CFI_STARTPROC
 	.rept 0x1000 / 0x20
 	.skip 1 /* push %rcx */
--- head.orig/arch/x86/kernel/ioport-xen.c	2011-02-01 14:44:12.000000000 +0100
+++ head/arch/x86/kernel/ioport-xen.c	2011-02-01 14:55:46.000000000 +0100
@@ -75,8 +75,9 @@ asmlinkage long sys_ioperm(unsigned long
  * beyond the 0x3ff range: to get the full 65536 ports bitmapped
  * you'd need 8kB of bitmaps/process, which is a bit excessive.
  */
-static int do_iopl(unsigned int level, struct thread_struct *t)
+long sys_iopl(unsigned int level, struct pt_regs *regs)
 {
+	struct thread_struct *t = &current->thread;
 	unsigned int old = t->iopl >> 12;
 
 	if (level > 3)
@@ -86,27 +87,8 @@ static int do_iopl(unsigned int level, s
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
 	}
-
-	return 0;
-}
-
-#ifdef CONFIG_X86_32
-long sys_iopl(struct pt_regs *regs)
-{
-	unsigned int level = regs->bx;
-#else
-asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
-{
-#endif
-	struct thread_struct *t = &current->thread;
-	int rc;
-
-	rc = do_iopl(level, t);
-	if (rc < 0)
-		goto out;
-
 	t->iopl = level << 12;
 	set_iopl_mask(t->iopl);
-out:
-	return rc;
+
+	return 0;
 }
--- head.orig/arch/x86/kernel/irq-xen.c	2011-02-01 14:54:13.000000000 +0100
+++ head/arch/x86/kernel/irq-xen.c	2011-02-01 14:55:46.000000000 +0100
@@ -19,7 +19,7 @@ atomic_t irq_err_count;
 
 #ifndef CONFIG_XEN
 /* Function pointer for generic interrupt vector handling */
-void (*generic_interrupt_extension)(void) = NULL;
+void (*x86_platform_ipi_callback)(void) = NULL;
 #endif
 
 /*
@@ -77,10 +77,10 @@ static int show_other_interrupts(struct 
 	seq_printf(p, "  Performance pending work\n");
 #endif
 #ifndef CONFIG_XEN
-	if (generic_interrupt_extension) {
+	if (x86_platform_ipi_callback) {
 		seq_printf(p, "%*s: ", prec, "PLT");
 		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", irq_stats(j)->generic_irqs);
+			seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
 		seq_printf(p, "  Platform interrupts\n");
 	}
 #endif
@@ -162,7 +162,7 @@ int show_interrupts(struct seq_file *p, 
 	if (!desc)
 		return 0;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	for_each_online_cpu(j)
 		any_count |= kstat_irqs_cpu(i, j);
 	action = desc->action;
@@ -183,7 +183,7 @@ int show_interrupts(struct seq_file *p, 
 
 	seq_putc(p, '\n');
 out:
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
 }
 
@@ -201,8 +201,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += irq_stats(cpu)->apic_pending_irqs;
 #endif
 #ifndef CONFIG_XEN
-	if (generic_interrupt_extension)
-		sum += irq_stats(cpu)->generic_irqs;
+	if (x86_platform_ipi_callback)
+		sum += irq_stats(cpu)->x86_platform_ipis;
 #endif
 #ifdef CONFIG_SMP
 	sum += irq_stats(cpu)->irq_resched_count;
@@ -271,9 +271,9 @@ unsigned int __irq_entry do_IRQ(struct p
 }
 
 /*
- * Handler for GENERIC_INTERRUPT_VECTOR.
+ * Handler for X86_PLATFORM_IPI_VECTOR.
  */
-void smp_generic_interrupt(struct pt_regs *regs)
+void smp_x86_platform_ipi(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
@@ -283,13 +283,95 @@ void smp_generic_interrupt(struct pt_reg
 
 	irq_enter();
 
-	inc_irq_stat(generic_irqs);
+	inc_irq_stat(x86_platform_ipis);
 
-	if (generic_interrupt_extension)
-		generic_interrupt_extension();
+	if (x86_platform_ipi_callback)
+		x86_platform_ipi_callback();
 
 	irq_exit();
 
 	set_irq_regs(old_regs);
 }
 #endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+#include <xen/evtchn.h>
+/* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
+void fixup_irqs(void)
+{
+	unsigned int irq;
+	static int warned;
+	struct irq_desc *desc;
+	static DECLARE_BITMAP(irqs_used, NR_IRQS);
+
+	for_each_irq_desc(irq, desc) {
+		int break_affinity = 0;
+		int set_affinity = 1;
+		const struct cpumask *affinity;
+
+		if (!desc)
+			continue;
+		if (irq == 2)
+			continue;
+
+		/* interrupt's are disabled at this point */
+		raw_spin_lock(&desc->lock);
+
+		affinity = desc->affinity;
+		if (!irq_has_action(irq) ||
+		    cpumask_subset(affinity, cpu_online_mask)) {
+			raw_spin_unlock(&desc->lock);
+			continue;
+		}
+
+		if (cpumask_test_cpu(smp_processor_id(), affinity))
+			__set_bit(irq, irqs_used);
+
+		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+			break_affinity = 1;
+			affinity = cpu_all_mask;
+		}
+
+		if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask)
+			desc->chip->mask(irq);
+
+		if (desc->chip->set_affinity)
+			desc->chip->set_affinity(irq, affinity);
+		else if (desc->chip != &no_irq_chip && !(warned++))
+			set_affinity = 0;
+
+		if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
+			desc->chip->unmask(irq);
+
+		raw_spin_unlock(&desc->lock);
+
+		if (break_affinity && set_affinity)
+			/*printk("Broke affinity for irq %i\n", irq)*/;
+		else if (!set_affinity)
+			printk("Cannot set affinity for irq %i\n", irq);
+	}
+
+	/*
+	 * We can remove mdelay() and then send spuriuous interrupts to
+	 * new cpu targets for all the irqs that were handled previously by
+	 * this cpu. While it works, I have seen spurious interrupt messages
+	 * (nothing wrong but still...).
+	 *
+	 * So for now, retain mdelay(1) and check the IRR and then send those
+	 * interrupts to new targets as this cpu is already offlined...
+	 */
+	mdelay(1);
+
+	for_each_irq_desc(irq, desc) {
+		if (!__test_and_clear_bit(irq, irqs_used))
+			continue;
+
+		if (xen_test_irq_pending(irq)) {
+			raw_spin_lock(&desc->lock);
+			if (desc->chip->retrigger)
+				desc->chip->retrigger(irq);
+			raw_spin_unlock(&desc->lock);
+		}
+	}
+}
+#endif
--- head.orig/arch/x86/kernel/microcode_core-xen.c	2011-02-01 14:54:13.000000000 +0100
+++ head/arch/x86/kernel/microcode_core-xen.c	2011-12-01 15:26:17.000000000 +0100
@@ -21,10 +21,12 @@
  *	as published by the Free Software Foundation; either version
  *	2 of the License, or (at your option) any later version.
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/platform_device.h>
 #include <linux/miscdevice.h>
 #include <linux/capability.h>
-#include <linux/smp_lock.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -88,7 +90,6 @@ static int do_microcode_update(const voi
 
 static int microcode_open(struct inode *unused1, struct file *unused2)
 {
-	cycle_kernel_lock();
 	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
 }
 
@@ -98,7 +99,7 @@ static ssize_t microcode_write(struct fi
 	ssize_t ret = -EINVAL;
 
 	if ((len >> PAGE_SHIFT) > totalram_pages) {
-		pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
+		pr_err("too much data (max %ld pages)\n", totalram_pages);
 		return ret;
  	}
 
@@ -134,7 +135,7 @@ static int __init microcode_dev_init(voi
 
 	error = misc_register(&microcode_dev);
 	if (error) {
-		pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR);
+		pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR);
 		return error;
 	}
 
@@ -193,7 +194,7 @@ static int __init microcode_init(void)
 	else if (c->x86_vendor == X86_VENDOR_AMD)
 		fw_name = "amd-ucode/microcode_amd.bin";
 	else {
-		pr_err("microcode: no support for this CPU vendor\n");
+		pr_err("no support for this CPU vendor\n");
 		return -ENODEV;
 	}
 
@@ -211,8 +212,7 @@ static int __init microcode_init(void)
 	}
 
 	pr_info("Microcode Update Driver: v" MICROCODE_VERSION
-	       " <tigran@aivazian.fsnet.co.uk>,"
-	       " Peter Oruba\n");
+		" <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
 
 	return 0;
 }
--- head.orig/arch/x86/kernel/mpparse-xen.c	2011-02-01 14:54:13.000000000 +0100
+++ head/arch/x86/kernel/mpparse-xen.c	2011-02-01 14:55:46.000000000 +0100
@@ -375,13 +375,6 @@ static int __init smp_read_mpc(struct mp
 		x86_init.mpparse.mpc_record(1);
 	}
 
-#ifdef CONFIG_X86_BIGSMP
-	generic_bigsmp_probe();
-#endif
-
-	if (apic->setup_apic_routing)
-		apic->setup_apic_routing();
-
 	if (!num_processors)
 		printk(KERN_ERR "MPTABLE: no processors registered!\n");
 	return num_processors;
@@ -694,37 +687,21 @@ void __init default_get_smp_config(unsig
 }
 
 #ifndef CONFIG_XEN
-static void __init smp_reserve_bootmem(struct mpf_intel *mpf)
+static void __init smp_reserve_memory(struct mpf_intel *mpf)
 {
 	unsigned long size = get_mpc_size(mpf->physptr);
-#ifdef CONFIG_X86_32
-	/*
-	 * We cannot access to MPC table to compute table size yet,
-	 * as only few megabytes from the bottom is mapped now.
-	 * PC-9800's MPC table places on the very last of physical
-	 * memory; so that simply reserving PAGE_SIZE from mpf->physptr
-	 * yields BUG() in reserve_bootmem.
-	 * also need to make sure physptr is below than max_low_pfn
-	 * we don't need reserve the area above max_low_pfn
-	 */
-	unsigned long end = max_low_pfn * PAGE_SIZE;
-
-	if (mpf->physptr < end) {
-		if (mpf->physptr + size > end)
-			size = end - mpf->physptr;
-		reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
-	}
-#else
-	reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
-#endif
+
+	reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc");
 }
 #endif
 
-static int __init smp_scan_config(unsigned long base, unsigned long length,
-				  unsigned reserve)
+static int __init smp_scan_config(unsigned long base, unsigned long length)
 {
 	unsigned int *bp = _bus_to_virt(base);
 	struct mpf_intel *mpf;
+#ifndef CONFIG_XEN
+	unsigned long mem;
+#endif
 
 	apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
 			bp, length);
@@ -746,12 +723,10 @@ static int __init smp_scan_config(unsign
 			printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
 			       mpf, (u64)virt_to_phys(mpf));
 
-			if (!reserve)
-				return 1;
-			reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf),
-						BOOTMEM_DEFAULT);
+			mem = virt_to_phys(mpf);
+			reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf");
 			if (mpf->physptr)
-				smp_reserve_bootmem(mpf);
+				smp_reserve_memory(mpf);
 #else
 			printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
 			       mpf, ((void *)bp - _bus_to_virt(base)) + base);
@@ -764,7 +739,7 @@ static int __init smp_scan_config(unsign
 	return 0;
 }
 
-void __init default_find_smp_config(unsigned int reserve)
+void __init default_find_smp_config(void)
 {
 #ifndef CONFIG_XEN
 	unsigned int address;
@@ -778,9 +753,9 @@ void __init default_find_smp_config(unsi
 	 * 2) Scan the top 1K of base RAM
 	 * 3) Scan the 64K of bios
 	 */
-	if (smp_scan_config(0x0, 0x400, reserve) ||
-	    smp_scan_config(639 * 0x400, 0x400, reserve) ||
-	    smp_scan_config(0xF0000, 0x10000, reserve))
+	if (smp_scan_config(0x0, 0x400) ||
+	    smp_scan_config(639 * 0x400, 0x400) ||
+	    smp_scan_config(0xF0000, 0x10000))
 		return;
 	/*
 	 * If it is an SMP machine we should know now, unless the
@@ -802,7 +777,7 @@ void __init default_find_smp_config(unsi
 #ifndef CONFIG_XEN
 	address = get_bios_ebda();
 	if (address)
-		smp_scan_config(address, 0x400, reserve);
+		smp_scan_config(address, 0x400);
 #endif
 }
 
@@ -1001,9 +976,6 @@ void __init early_reserve_e820_mpc_new(v
 {
 	if (enable_update_mptable && alloc_mptable) {
 		u64 startt = 0;
-#ifdef CONFIG_X86_TRAMPOLINE
-		startt = TRAMPOLINE_BASE;
-#endif
 		mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
 	}
 }
--- head.orig/arch/x86/kernel/pci-dma-xen.c	2012-04-04 14:30:59.000000000 +0200
+++ head/arch/x86/kernel/pci-dma-xen.c	2012-04-04 14:31:34.000000000 +0200
@@ -7,10 +7,11 @@
 #include <asm/proto.h>
 #include <asm/dma.h>
 #include <asm/iommu.h>
+#include <asm/x86_init.h>
 
 static int forbid_dac __read_mostly;
 
-struct dma_map_ops *dma_ops;
+struct dma_map_ops *dma_ops = &nommu_dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
 static int iommu_sac_force __read_mostly;
@@ -40,9 +41,6 @@ int iommu_detected __read_mostly = 0;
 int iommu_pass_through __read_mostly;
 #endif
 
-dma_addr_t bad_dma_address __read_mostly = 0;
-EXPORT_SYMBOL(bad_dma_address);
-
 /* Dummy device used for NULL arguments (normally ISA). */
 struct device x86_dma_fallback_dev = {
 	.init_name = "fallback device",
@@ -141,20 +139,19 @@ void __init pci_iommu_alloc(void)
 	/* free the range so iommu could get some range less than 4G */
 	dma32_free_bootmem();
 #endif
+	if (pci_swiotlb_detect())
+		goto out;
 
-	/*
-	 * The order of these functions is important for
-	 * fall-back/fail-over reasons
-	 */
 	gart_iommu_hole_init();
 
 	detect_calgary();
 
 	detect_intel_iommu();
 
+	/* needs to be called after gart_iommu_hole_init */
 	amd_iommu_detect();
-
-	swiotlb_init();
+out:
+	swiotlb_init(1);
 	if (swiotlb) {
 		printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
 		dma_ops = &swiotlb_dma_ops;
@@ -266,7 +263,7 @@ static __init int iommu_setup(char *p)
 		if (!strncmp(p, "allowdac", 8))
 			forbid_dac = 0;
 		if (!strncmp(p, "nodac", 5))
-			forbid_dac = -1;
+			forbid_dac = 1;
 		if (!strncmp(p, "usedac", 6)) {
 			forbid_dac = -1;
 			return 1;
@@ -370,25 +367,19 @@ static int __init pci_iommu_init(void)
 #ifdef CONFIG_PCI
 	dma_debug_add_bus(&pci_bus_type);
 #endif
+	x86_init.iommu.iommu_init();
 
-	calgary_iommu_init();
-
-	intel_iommu_init();
-
-	amd_iommu_init();
-
-	gart_iommu_init();
+#ifndef CONFIG_XEN
+	if (swiotlb) {
+		printk(KERN_INFO "PCI-DMA: "
+		       "Using software bounce buffering for IO (SWIOTLB)\n");
+		swiotlb_print_info();
+	} else
+		swiotlb_free();
+#endif
 
-	no_iommu_init();
 	return 0;
 }
-
-void pci_iommu_shutdown(void)
-{
-	gart_iommu_shutdown();
-
-	amd_iommu_shutdown();
-}
 /* Must execute after PCI subsystem */
 rootfs_initcall(pci_iommu_init);
 
--- head.orig/arch/x86/kernel/pci-nommu-xen.c	2012-04-04 14:09:25.000000000 +0200
+++ head/arch/x86/kernel/pci-nommu-xen.c	2012-04-04 14:10:28.000000000 +0200
@@ -110,12 +110,3 @@ struct dma_map_ops nommu_dma_ops = {
 	.sync_sg_for_device	= nommu_sync_sg_for_device,
 	.dma_supported		= nommu_dma_supported,
 };
-
-void __init no_iommu_init(void)
-{
-	if (dma_ops)
-		return;
-
-	force_iommu = 0; /* no HW IOMMU */
-	dma_ops = &nommu_dma_ops;
-}
--- head.orig/arch/x86/kernel/process-xen.c	2011-03-03 16:07:49.000000000 +0100
+++ head/arch/x86/kernel/process-xen.c	2011-03-03 16:09:35.000000000 +0100
@@ -9,7 +9,11 @@
 #include <linux/pm.h>
 #include <linux/clockchips.h>
 #include <linux/random.h>
+#include <linux/user-return-notifier.h>
+#include <linux/dmi.h>
+#include <linux/utsname.h>
 #include <trace/events/power.h>
+#include <linux/hw_breakpoint.h>
 #include <asm/system.h>
 #include <asm/apic.h>
 #include <asm/syscalls.h>
@@ -17,6 +21,7 @@
 #include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/ds.h>
+#include <asm/debugreg.h>
 #include <xen/evtchn.h>
 
 unsigned long idle_halt;
@@ -89,30 +94,30 @@ void exit_thread(void)
 	}
 }
 
-void flush_thread(void)
+void show_regs_common(void)
 {
-	struct task_struct *tsk = current;
+	const char *board, *product;
 
-#ifdef CONFIG_X86_64
-	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
-		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
-		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
-			clear_tsk_thread_flag(tsk, TIF_IA32);
-		} else {
-			set_tsk_thread_flag(tsk, TIF_IA32);
-			current_thread_info()->status |= TS_COMPAT;
-		}
-	}
-#endif
+	board = dmi_get_system_info(DMI_BOARD_NAME);
+	if (!board)
+		board = "";
+	product = dmi_get_system_info(DMI_PRODUCT_NAME);
+	if (!product)
+		product = "";
+
+	printk(KERN_CONT "\n");
+	printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n",
+		current->pid, current->comm, print_tainted(),
+		init_utsname()->release,
+		(int)strcspn(init_utsname()->version, " "),
+		init_utsname()->version, board, product);
+}
 
-	clear_tsk_thread_flag(tsk, TIF_DEBUG);
+void flush_thread(void)
+{
+	struct task_struct *tsk = current;
 
-	tsk->thread.debugreg0 = 0;
-	tsk->thread.debugreg1 = 0;
-	tsk->thread.debugreg2 = 0;
-	tsk->thread.debugreg3 = 0;
-	tsk->thread.debugreg6 = 0;
-	tsk->thread.debugreg7 = 0;
+	flush_ptrace_hw_breakpoint(tsk);
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 	/*
 	 * Forget coprocessor state..
@@ -193,16 +198,6 @@ void __switch_to_xtra(struct task_struct
 	else if (next->debugctlmsr != prev->debugctlmsr)
 		update_debugctlmsr(next->debugctlmsr);
 
-	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
-		set_debugreg(next->debugreg0, 0);
-		set_debugreg(next->debugreg1, 1);
-		set_debugreg(next->debugreg2, 2);
-		set_debugreg(next->debugreg3, 3);
-		/* no 4 and 5 */
-		set_debugreg(next->debugreg6, 6);
-		set_debugreg(next->debugreg7, 7);
-	}
-
 	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
 	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
 		/* prev and next are different */
@@ -211,6 +206,7 @@ void __switch_to_xtra(struct task_struct
 		else
 			hard_enable_TSC();
 	}
+	propagate_user_return_notify(prev_p, next_p);
 }
 
 int sys_fork(struct pt_regs *regs)
@@ -234,6 +230,78 @@ int sys_vfork(struct pt_regs *regs)
 		       NULL, NULL);
 }
 
+long
+sys_clone(unsigned long clone_flags, unsigned long newsp,
+	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
+{
+	if (!newsp)
+		newsp = regs->sp;
+	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
+}
+
+/*
+ * This gets run with %si containing the
+ * function to call, and %di containing
+ * the "args".
+ */
+extern void kernel_thread_helper(void);
+
+/*
+ * Create a kernel thread
+ */
+int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+	struct pt_regs regs;
+
+	memset(&regs, 0, sizeof(regs));
+
+	regs.si = (unsigned long) fn;
+	regs.di = (unsigned long) arg;
+
+#ifdef CONFIG_X86_32
+	regs.ds = __USER_DS;
+	regs.es = __USER_DS;
+	regs.fs = __KERNEL_PERCPU;
+	regs.gs = __KERNEL_STACK_CANARY;
+#else
+	regs.ss = __KERNEL_DS;
+#endif
+
+	regs.orig_ax = -1;
+	regs.ip = (unsigned long) kernel_thread_helper;
+	regs.cs = __KERNEL_CS | get_kernel_rpl();
+	regs.flags = X86_EFLAGS_IF | 0x2;
+
+	/* Ok, create the new process.. */
+	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/*
+ * sys_execve() executes a new program.
+ */
+long sys_execve(char __user *name, char __user * __user *argv,
+		char __user * __user *envp, struct pt_regs *regs)
+{
+	long error;
+	char *filename;
+
+	filename = getname(name);
+	error = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		return error;
+	error = do_execve(filename, argv, envp, regs);
+
+#ifdef CONFIG_X86_32
+	if (error == 0) {
+		/* Make sure we don't return using sysenter.. */
+                set_thread_flag(TIF_IRET);
+        }
+#endif
+
+	putname(filename);
+	return error;
+}
 
 /*
  * Idle related variables and functions
--- head.orig/arch/x86/kernel/process_32-xen.c	2012-02-29 14:15:56.000000000 +0100
+++ head/arch/x86/kernel/process_32-xen.c	2012-02-29 14:16:52.000000000 +0100
@@ -23,7 +23,6 @@
 #include <linux/vmalloc.h>
 #include <linux/user.h>
 #include <linux/interrupt.h>
-#include <linux/utsname.h>
 #include <linux/delay.h>
 #include <linux/reboot.h>
 #include <linux/init.h>
@@ -35,7 +34,6 @@
 #include <linux/tick.h>
 #include <linux/percpu.h>
 #include <linux/prctl.h>
-#include <linux/dmi.h>
 #include <linux/ftrace.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
@@ -60,6 +58,7 @@
 #include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/ds.h>
+#include <asm/debugreg.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork");
@@ -130,39 +129,29 @@ void __show_regs(struct pt_regs *regs, i
 	unsigned long d0, d1, d2, d3, d6, d7;
 	unsigned long sp;
 	unsigned short ss, gs;
-	const char *board;
 
 	if (user_mode_vm(regs)) {
 		sp = regs->sp;
 		ss = regs->ss & 0xffff;
 		gs = get_user_gs(regs);
 	} else {
-		sp = (unsigned long) (&regs->sp);
+		sp = kernel_stack_pointer(regs);
 		savesegment(ss, ss);
 		savesegment(gs, gs);
 	}
 
-	printk("\n");
+	show_regs_common();
 
-	board = dmi_get_system_info(DMI_PRODUCT_NAME);
-	if (!board)
-		board = "";
-	printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
-			task_pid_nr(current), current->comm,
-			print_tainted(), init_utsname()->release,
-			(int)strcspn(init_utsname()->version, " "),
-			init_utsname()->version, board);
-
-	printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
+	printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
 			(u16)regs->cs, regs->ip, regs->flags,
 			smp_processor_id());
 	print_symbol("EIP is at %s\n", regs->ip);
 
-	printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+	printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
 		regs->ax, regs->bx, regs->cx, regs->dx);
-	printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
+	printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
 		regs->si, regs->di, regs->bp, sp);
-	printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
+	printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
 	       (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
 
 	if (!all)
@@ -172,61 +161,28 @@ void __show_regs(struct pt_regs *regs, i
 	cr2 = read_cr2();
 	cr3 = read_cr3();
 	cr4 = read_cr4_safe();
-	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
+	printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
 			cr0, cr2, cr3, cr4);
 
 	get_debugreg(d0, 0);
 	get_debugreg(d1, 1);
 	get_debugreg(d2, 2);
 	get_debugreg(d3, 3);
-	printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
+	printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
 			d0, d1, d2, d3);
 
 	get_debugreg(d6, 6);
 	get_debugreg(d7, 7);
-	printk("DR6: %08lx DR7: %08lx\n",
+	printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n",
 			d6, d7);
 }
 
 void show_regs(struct pt_regs *regs)
 {
-	__show_regs(regs, 1);
+	show_registers(regs);
 	show_trace(NULL, regs, &regs->sp, regs->bp);
 }
 
-/*
- * This gets run with %bx containing the
- * function to call, and %dx containing
- * the "args".
- */
-extern void kernel_thread_helper(void);
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-
-	regs.bx = (unsigned long) fn;
-	regs.dx = (unsigned long) arg;
-
-	regs.ds = __USER_DS;
-	regs.es = __USER_DS;
-	regs.fs = __KERNEL_PERCPU;
-	regs.gs = __KERNEL_STACK_CANARY;
-	regs.orig_ax = -1;
-	regs.ip = (unsigned long) kernel_thread_helper;
-	regs.cs = __KERNEL_CS | get_kernel_rpl();
-	regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
-
-	/* Ok, create the new process.. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
 void release_thread(struct task_struct *dead_task)
 {
 	BUG_ON(dead_task->mm);
@@ -262,7 +218,12 @@ int copy_thread(unsigned long clone_flag
 
 	task_user_gs(p) = get_user_gs(regs);
 
+	p->thread.io_bitmap_ptr = NULL;
 	tsk = current;
+	err = -ENOMEM;
+
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
 #ifdef TIF_CSTAR
 	if (test_tsk_thread_flag(tsk, TIF_CSTAR))
 		p->thread.ip = (unsigned long) cstar_ret_from_fork;
@@ -499,46 +460,6 @@ __switch_to(struct task_struct *prev_p, 
 	return prev_p;
 }
 
-int sys_clone(struct pt_regs *regs)
-{
-	unsigned long clone_flags;
-	unsigned long newsp;
-	int __user *parent_tidptr, *child_tidptr;
-
-	clone_flags = regs->bx;
-	newsp = regs->cx;
-	parent_tidptr = (int __user *)regs->dx;
-	child_tidptr = (int __user *)regs->di;
-	if (!newsp)
-		newsp = regs->sp;
-	return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
-}
-
-/*
- * sys_execve() executes a new program.
- */
-int sys_execve(struct pt_regs *regs)
-{
-	int error;
-	char *filename;
-
-	filename = getname((char __user *) regs->bx);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = do_execve(filename,
-			(char __user * __user *) regs->cx,
-			(char __user * __user *) regs->dx,
-			regs);
-	if (error == 0) {
-		/* Make sure we don't return using sysenter.. */
-		set_thread_flag(TIF_IRET);
-	}
-	putname(filename);
-out:
-	return error;
-}
-
 #define top_esp                (THREAD_SIZE - sizeof(unsigned long))
 #define top_ebp                (THREAD_SIZE - 2*sizeof(unsigned long))
 
--- head.orig/arch/x86/kernel/process_64-xen.c	2011-02-02 08:37:59.000000000 +0100
+++ head/arch/x86/kernel/process_64-xen.c	2011-02-02 08:47:12.000000000 +0100
@@ -29,7 +29,6 @@
 #include <linux/slab.h>
 #include <linux/user.h>
 #include <linux/interrupt.h>
-#include <linux/utsname.h>
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
@@ -41,7 +40,6 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/ftrace.h>
-#include <linux/dmi.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -57,13 +55,12 @@
 #include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/ds.h>
+#include <asm/debugreg.h>
 
 asmlinkage extern void ret_from_fork(void);
 
 static DEFINE_PER_CPU(unsigned char, is_idle);
 
-unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
-
 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
 
 void idle_notifier_register(struct notifier_block *n)
@@ -166,31 +163,21 @@ void __show_regs(struct pt_regs *regs, i
 	unsigned long d0, d1, d2, d3, d6, d7;
 	unsigned int fsindex, gsindex;
 	unsigned int ds, cs, es;
-	const char *board;
 
-	printk("\n");
-	print_modules();
-	board = dmi_get_system_info(DMI_PRODUCT_NAME);
-	if (!board)
-		board = "";
-	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
-		current->pid, current->comm, print_tainted(),
-		init_utsname()->release,
-		(int)strcspn(init_utsname()->version, " "),
-		init_utsname()->version, board);
-	printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+	show_regs_common();
+	printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
 	printk_address(regs->ip, 1);
-	printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
+	printk(KERN_DEFAULT "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
 			regs->sp, regs->flags);
-	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
+	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	       regs->ax, regs->bx, regs->cx);
-	printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
+	printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
 	       regs->dx, regs->si, regs->di);
-	printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
+	printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
 	       regs->bp, regs->r8, regs->r9);
-	printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
+	printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
 	       regs->r10, regs->r11, regs->r12);
-	printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
+	printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
 	       regs->r13, regs->r14, regs->r15);
 
 	asm("movl %%ds,%0" : "=r" (ds));
@@ -211,27 +198,26 @@ void __show_regs(struct pt_regs *regs, i
 	cr3 = read_cr3();
 	cr4 = read_cr4();
 
-	printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+	printk(KERN_DEFAULT "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
 	       fs, fsindex, gs, gsindex, shadowgs);
-	printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
+	printk(KERN_DEFAULT "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
 			es, cr0);
-	printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
+	printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
 			cr4);
 
 	get_debugreg(d0, 0);
 	get_debugreg(d1, 1);
 	get_debugreg(d2, 2);
-	printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
+	printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
 	get_debugreg(d3, 3);
 	get_debugreg(d6, 6);
 	get_debugreg(d7, 7);
-	printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+	printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
 }
 
 void show_regs(struct pt_regs *regs)
 {
-	printk(KERN_INFO "CPU %d:", smp_processor_id());
-	__show_regs(regs, 1);
+	show_registers(regs);
 	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
 }
 
@@ -239,6 +225,7 @@ void xen_load_gs_index(unsigned gs)
 {
 	WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs));
 }
+EXPORT_SYMBOL(xen_load_gs_index);
 
 void release_thread(struct task_struct *dead_task)
 {
@@ -294,8 +281,9 @@ int copy_thread(unsigned long clone_flag
 	*childregs = *regs;
 
 	childregs->ax = 0;
-	childregs->sp = sp;
-	if (sp == ~0UL)
+	if (user_mode(regs))
+		childregs->sp = sp;
+	else
 		childregs->sp = (unsigned long)childregs;
 
 	p->thread.sp = (unsigned long) childregs;
@@ -305,12 +293,16 @@ int copy_thread(unsigned long clone_flag
 
 	p->thread.fs = me->thread.fs;
 	p->thread.gs = me->thread.gs;
+	p->thread.io_bitmap_ptr = NULL;
 
 	savesegment(gs, p->thread.gsindex);
 	savesegment(fs, p->thread.fsindex);
 	savesegment(es, p->thread.es);
 	savesegment(ds, p->thread.ds);
 
+	err = -ENOMEM;
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
 		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
 		if (!p->thread.io_bitmap_ptr) {
@@ -350,28 +342,45 @@ out:
 		kfree(p->thread.io_bitmap_ptr);
 		p->thread.io_bitmap_max = 0;
 	}
+
 	return err;
 }
 
-void
-start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
+static void
+start_thread_common(struct pt_regs *regs, unsigned long new_ip,
+		    unsigned long new_sp,
+		    unsigned int _cs, unsigned int _ss, unsigned int _ds)
 {
 	loadsegment(fs, 0);
-	loadsegment(es, 0);
-	loadsegment(ds, 0);
+	loadsegment(es, _ds);
+	loadsegment(ds, _ds);
 	load_gs_index(0);
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
-	regs->cs		= __USER_CS;
-	regs->ss		= __USER_DS;
-	regs->flags		= 0x200;
+	regs->cs		= _cs;
+	regs->ss		= _ss;
+	regs->flags		= X86_EFLAGS_IF;
 	set_fs(USER_DS);
 	/*
 	 * Free the old FP and other extended state
 	 */
 	free_thread_xstate(current);
 }
-EXPORT_SYMBOL_GPL(start_thread);
+
+void
+start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
+{
+	start_thread_common(regs, new_ip, new_sp,
+			    __USER_CS, __USER_DS, 0);
+}
+
+#ifdef CONFIG_IA32_EMULATION
+void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
+{
+	start_thread_common(regs, new_ip, new_sp,
+			    __USER32_CS, __USER32_DS, __USER32_DS);
+}
+#endif
 
 /*
  *	switch_to(x,y) should switch tasks from x to y.
@@ -561,26 +570,8 @@ __switch_to(struct task_struct *prev_p, 
 	 */
 	if (preload_fpu)
 		__math_state_restore();
-	return prev_p;
-}
 
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage
-long sys_execve(char __user *name, char __user * __user *argv,
-		char __user * __user *envp, struct pt_regs *regs)
-{
-	long error;
-	char *filename;
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return error;
-	error = do_execve(filename, argv, envp, regs);
-	putname(filename);
-	return error;
+	return prev_p;
 }
 
 void set_personality_64bit(void)
@@ -597,13 +588,16 @@ void set_personality_64bit(void)
 	current->personality &= ~READ_IMPLIES_EXEC;
 }
 
-asmlinkage long
-sys_clone(unsigned long clone_flags, unsigned long newsp,
-	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
-{
-	if (!newsp)
-		newsp = regs->sp;
-	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
+void set_personality_ia32(void)
+{
+	/* inherit personality from parent */
+
+	/* Make sure to be in 32bit mode */
+	set_thread_flag(TIF_IA32);
+	current->personality |= force_personality32;
+
+	/* Prepare the first "return" to user space */
+	current_thread_info()->status |= TS_COMPAT;
 }
 
 unsigned long get_wchan(struct task_struct *p)
--- head.orig/arch/x86/kernel/setup-xen.c	2012-06-06 14:03:17.000000000 +0200
+++ head/arch/x86/kernel/setup-xen.c	2012-06-06 14:03:27.000000000 +0200
@@ -73,6 +73,7 @@
 
 #include <asm/mtrr.h>
 #include <asm/apic.h>
+#include <asm/trampoline.h>
 #include <asm/e820.h>
 #include <asm/mpspec.h>
 #include <asm/setup.h>
@@ -106,9 +107,11 @@
 #include <asm/percpu.h>
 #include <asm/topology.h>
 #include <asm/apicdef.h>
+#include <asm/k8.h>
 #ifdef CONFIG_X86_64
 #include <asm/numa_64.h>
 #endif
+#include <asm/mce.h>
 
 #ifdef CONFIG_XEN
 #include <asm/hypervisor.h>
@@ -281,7 +284,7 @@ EXPORT_SYMBOL(edd);
  *              from boot_params into a safe place.
  *
  */
-static inline void copy_edd(void)
+static inline void __init copy_edd(void)
 {
      memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
 	    sizeof(edd.mbr_signature));
@@ -291,7 +294,7 @@ static inline void copy_edd(void)
 }
 #endif
 #else
-static inline void copy_edd(void)
+static inline void __init copy_edd(void)
 {
 }
 #endif
@@ -541,49 +544,18 @@ static void __init reserve_early_setup_d
 #endif
 }
 
+#ifndef CONFIG_XEN
 /*
  * --------- Crashkernel reservation ------------------------------
  */
 
 #ifdef CONFIG_KEXEC
 
-#ifndef CONFIG_XEN
-/**
- * Reserve @size bytes of crashkernel memory at any suitable offset.
- *
- * @size: Size of the crashkernel memory to reserve.
- * Returns the base address on success, and -1ULL on failure.
- */
-static
-unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
-{
-	const unsigned long long alignment = 16<<20; 	/* 16M */
-	unsigned long long start = 0LL;
-
-	while (1) {
-		int ret;
-
-		start = find_e820_area(start, ULONG_MAX, size, alignment);
-		if (start == -1ULL)
-			return start;
-
-		/* try to reserve it */
-		ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
-		if (ret >= 0)
-			return start;
-
-		start += alignment;
-	}
-}
-
 static inline unsigned long long get_total_mem(void)
 {
 	unsigned long long total;
 
-	total = max_low_pfn - min_low_pfn;
-#ifdef CONFIG_HIGHMEM
-	total += highend_pfn - highstart_pfn;
-#endif
+	total = max_pfn - min_low_pfn;
 
 	return total << PAGE_SHIFT;
 }
@@ -603,21 +575,25 @@ static void __init reserve_crashkernel(v
 
 	/* 0 means: find the address automatically */
 	if (crash_base <= 0) {
-		crash_base = find_and_reserve_crashkernel(crash_size);
+		const unsigned long long alignment = 16<<20;	/* 16M */
+
+		crash_base = find_e820_area(alignment, ULONG_MAX, crash_size,
+				 alignment);
 		if (crash_base == -1ULL) {
-			pr_info("crashkernel reservation failed. "
-				"No suitable area found.\n");
+			pr_info("crashkernel reservation failed - No suitable area found.\n");
 			return;
 		}
 	} else {
-		ret = reserve_bootmem_generic(crash_base, crash_size,
-					BOOTMEM_EXCLUSIVE);
-		if (ret < 0) {
-			pr_info("crashkernel reservation failed - "
-				"memory is in use\n");
+		unsigned long long start;
+
+		start = find_e820_area(crash_base, ULONG_MAX, crash_size,
+				 1<<20);
+		if (start != crash_base) {
+			pr_info("crashkernel reservation failed - memory is in use.\n");
 			return;
 		}
 	}
+	reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL");
 
 	printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
 			"for crashkernel (System RAM: %ldMB)\n",
@@ -630,13 +606,11 @@ static void __init reserve_crashkernel(v
 	insert_resource(&iomem_resource, &crashk_res);
 }
 #else
-#define reserve_crashkernel xen_machine_kexec_setup_resources
-#endif
-#else
 static void __init reserve_crashkernel(void)
 {
 }
 #endif
+#endif /* CONFIG_XEN */
 
 static struct resource standard_io_resources[] = {
 	{ .name = "dma1", .start = 0x00, .end = 0x1f,
@@ -735,19 +709,27 @@ static struct dmi_system_id __initdata b
 			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
 		},
 	},
-	{
 	/*
-	 * AMI BIOS with low memory corruption was found on Intel DG45ID board.
-	 * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
+	 * AMI BIOS with low memory corruption was found on Intel DG45ID and
+	 * DG45FC boards.
+	 * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
 	 * match only DMI_BOARD_NAME and see if there is more bad products
 	 * with this vendor.
 	 */
+	{
 		.callback = dmi_low_memory_corruption,
 		.ident = "AMI BIOS",
 		.matches = {
 			DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
 		},
 	},
+	{
+		.callback = dmi_low_memory_corruption,
+		.ident = "AMI BIOS",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "DG45FC"),
+		},
+	},
 #endif
 	{}
 };
@@ -767,6 +749,8 @@ static struct dmi_system_id __initdata b
 
 void __init setup_arch(char **cmdline_p)
 {
+	int acpi = 0;
+	int k8 = 0;
 #ifdef CONFIG_XEN
 	unsigned int i;
 	unsigned long p2m_pages;
@@ -903,21 +887,18 @@ void __init setup_arch(char **cmdline_p)
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 
-#ifdef CONFIG_X86_64
 	/*
-	 * Must call this twice: Once just to detect whether hardware doesn't
-	 * support NX (so that the early EHCI debug console setup can safely
-	 * call set_fixmap(), and then again after parsing early parameters to
-	 * honor the respective command line option.
+	 * x86_configure_nx() is called before parse_early_param() to detect
+	 * whether hardware doesn't support NX (so that the early EHCI debug
+	 * console setup can safely call set_fixmap()). It may then be called
+	 * again from within noexec_setup() during parsing early parameters
+	 * to honor the respective command line option.
 	 */
-	check_efer();
-#endif
+	x86_configure_nx();
 
 	parse_early_param();
 
-#ifdef CONFIG_X86_64
-	check_efer();
-#endif
+	x86_report_nx();
 
 	/* Must be before kernel pagetables are setup */
 	vmi_activate();
@@ -1024,6 +1005,20 @@ void __init setup_arch(char **cmdline_p)
 
 	reserve_brk();
 
+	/*
+	 * Find and reserve possible boot-time SMP configuration:
+	 */
+	find_smp_config();
+
+	reserve_trampoline_memory();
+
+#ifdef CONFIG_ACPI_SLEEP
+	/*
+	 * Reserve low memory region for sleep support.
+	 * even before init_memory_mapping
+	 */
+	acpi_reserve_wakeup_memory();
+#endif
 	init_gbpages();
 
 	/* max_pfn_mapped is updated here */
@@ -1051,6 +1046,8 @@ void __init setup_arch(char **cmdline_p)
 	reserve_initrd();
 
 #ifndef CONFIG_XEN
+	reserve_crashkernel();
+
 	vsmp_init();
 #endif
 
@@ -1074,23 +1071,15 @@ void __init setup_arch(char **cmdline_p)
 	/*
 	 * Parse SRAT to discover nodes.
 	 */
-	acpi_numa_init();
+	acpi = acpi_numa_init();
 #endif
 
-	initmem_init(0, max_pfn);
-
-#ifdef CONFIG_ACPI_SLEEP
-	/*
-	 * Reserve low memory region for sleep support.
-	 */
-	acpi_reserve_bootmem();
+#ifdef CONFIG_K8_NUMA
+	if (!acpi)
+		k8 = !k8_numa_init(0, max_pfn);
 #endif
-	/*
-	 * Find and reserve possible boot-time SMP configuration:
-	 */
-	find_smp_config();
 
-	reserve_crashkernel();
+	initmem_init(0, max_pfn, acpi, k8);
 
 #if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
 	/*
@@ -1118,6 +1107,9 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 #ifdef CONFIG_XEN
+#ifdef CONFIG_KEXEC
+	xen_machine_kexec_setup_resources();
+#endif
 	p2m_pages = max_pfn;
 	if (xen_start_info->nr_pages > max_pfn) {
 		/*
@@ -1250,6 +1242,8 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #endif
 	x86_init.oem.banner();
+
+	mcheck_init();
 }
 
 #ifdef CONFIG_X86_32
--- head.orig/arch/x86/kernel/time-xen.c	2012-02-10 13:28:49.000000000 +0100
+++ head/arch/x86/kernel/time-xen.c	2012-02-10 13:28:55.000000000 +0100
@@ -948,28 +948,23 @@ core_initcall(cpufreq_time_setup);
  */
 static ctl_table xen_subtable[] = {
 	{
-		.ctl_name	= CTL_XEN_INDEPENDENT_WALLCLOCK,
 		.procname	= "independent_wallclock",
 		.data		= &independent_wallclock,
 		.maxlen		= sizeof(independent_wallclock),
 		.mode		= 0644,
-		.strategy	= sysctl_data,
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.ctl_name	= CTL_XEN_PERMITTED_CLOCK_JITTER,
 		.procname	= "permitted_clock_jitter",
 		.data		= &permitted_clock_jitter,
 		.maxlen		= sizeof(permitted_clock_jitter),
 		.mode		= 0644,
-		.strategy	= sysctl_data,
 		.proc_handler	= proc_doulongvec_minmax
 	},
 	{ }
 };
 static ctl_table xen_table[] = {
 	{
-		.ctl_name	= CTL_XEN,
 		.procname	= "xen",
 		.mode		= 0555,
 		.child		= xen_subtable
--- head.orig/arch/x86/kernel/traps-xen.c	2011-02-16 13:56:25.000000000 +0100
+++ head/arch/x86/kernel/traps-xen.c	2011-02-01 14:55:46.000000000 +0100
@@ -522,77 +522,56 @@ asmlinkage __kprobes struct pt_regs *syn
 dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 {
 	struct task_struct *tsk = current;
-	unsigned long condition;
+	unsigned long dr6;
 	int si_code;
 
-	get_debugreg(condition, 6);
+	get_debugreg(dr6, 6);
 
 	/* Catch kmemcheck conditions first of all! */
-	if (condition & DR_STEP && kmemcheck_trap(regs))
+	if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
 		return;
 
+	/* DR6 may or may not be cleared by the CPU */
+	set_debugreg(0, 6);
 	/*
 	 * The processor cleared BTF, so don't mark that we need it set.
 	 */
 	clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
 	tsk->thread.debugctlmsr = 0;
 
-	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
-						SIGTRAP) == NOTIFY_STOP)
+	/* Store the virtualized DR6 value */
+	tsk->thread.debugreg6 = dr6;
+
+	if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
+							SIGTRAP) == NOTIFY_STOP)
 		return;
 
 	/* It's safe to allow irq's after DR6 has been saved */
 	preempt_conditional_sti(regs);
 
-	/* Mask out spurious debug traps due to lazy DR7 setting */
-	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
-		if (!tsk->thread.debugreg7)
-			goto clear_dr7;
-	}
-
-#ifdef CONFIG_X86_32
-	if (regs->flags & X86_VM_MASK)
-		goto debug_vm86;
-#endif
-
-	/* Save debug status register where ptrace can see it */
-	tsk->thread.debugreg6 = condition;
-
-	/*
-	 * Single-stepping through TF: make sure we ignore any events in
-	 * kernel space (but re-enable TF when returning to user mode).
-	 */
-	if (condition & DR_STEP) {
-		if (!user_mode(regs))
-			goto clear_TF_reenable;
+	if (regs->flags & X86_VM_MASK) {
+		handle_vm86_trap((struct kernel_vm86_regs *) regs,
+				error_code, 1);
+		return;
 	}
 
-	si_code = get_si_code(condition);
-	/* Ok, finally something we can handle */
-	send_sigtrap(tsk, regs, error_code, si_code);
-
 	/*
-	 * Disable additional traps. They'll be re-enabled when
-	 * the signal is delivered.
+	 * Single-stepping through system calls: ignore any exceptions in
+	 * kernel space, but re-enable TF when returning to user mode.
+	 *
+	 * We already checked v86 mode above, so we can check for kernel mode
+	 * by just checking the CPL of CS.
 	 */
-clear_dr7:
-	set_debugreg(0, 7);
+	if ((dr6 & DR_STEP) && !user_mode(regs)) {
+		tsk->thread.debugreg6 &= ~DR_STEP;
+		set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+		regs->flags &= ~X86_EFLAGS_TF;
+	}
+	si_code = get_si_code(tsk->thread.debugreg6);
+	if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
+		send_sigtrap(tsk, regs, error_code, si_code);
 	preempt_conditional_cli(regs);
-	return;
 
-#ifdef CONFIG_X86_32
-debug_vm86:
-	/* reenable preemption: handle_vm86_trap() might sleep */
-	dec_preempt_count();
-	handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
-	conditional_cli(regs);
-	return;
-#endif
-
-clear_TF_reenable:
-	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
-	regs->flags &= ~X86_EFLAGS_TF;
-	preempt_conditional_cli(regs);
 	return;
 }
 
--- head.orig/arch/x86/kernel/vmlinux.lds.S	2011-02-01 14:44:12.000000000 +0100
+++ head/arch/x86/kernel/vmlinux.lds.S	2011-02-01 14:55:46.000000000 +0100
@@ -43,7 +43,7 @@ ENTRY(phys_startup_64)
 jiffies_64 = jiffies;
 #endif
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) && !defined(CONFIG_XEN)
 /*
  * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
  * we retain large page mappings for boundaries spanning kernel text, rodata
--- head.orig/arch/x86/kernel/vsyscall_64-xen.c	2011-02-01 14:54:13.000000000 +0100
+++ head/arch/x86/kernel/vsyscall_64-xen.c	2011-02-01 14:55:46.000000000 +0100
@@ -73,7 +73,8 @@ void update_vsyscall_tz(void)
 	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
 }
 
-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
+		     u32 mult)
 {
 	unsigned long flags;
 
@@ -82,7 +83,7 @@ void update_vsyscall(struct timespec *wa
 	vsyscall_gtod_data.clock.vread = clock->vread;
 	vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
 	vsyscall_gtod_data.clock.mask = clock->mask;
-	vsyscall_gtod_data.clock.mult = clock->mult;
+	vsyscall_gtod_data.clock.mult = mult;
 	vsyscall_gtod_data.clock.shift = clock->shift;
 	vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
 	vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
@@ -237,7 +238,7 @@ static ctl_table kernel_table2[] = {
 };
 
 static ctl_table kernel_root_table2[] = {
-	{ .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
+	{ .procname = "kernel", .mode = 0555,
 	  .child = kernel_table2 },
 	{}
 };
--- head.orig/arch/x86/kernel/x8664_ksyms_64.c	2012-06-06 13:23:56.000000000 +0200
+++ head/arch/x86/kernel/x8664_ksyms_64.c	2011-04-13 13:57:11.000000000 +0200
@@ -55,6 +55,6 @@ EXPORT_SYMBOL(__memcpy);
 EXPORT_SYMBOL(memmove);
 
 EXPORT_SYMBOL(empty_zero_page);
-#ifndef CONFIG_PARAVIRT
+#if !defined(CONFIG_PARAVIRT) && !defined(CONFIG_XEN)
 EXPORT_SYMBOL(native_load_gs_index);
 #endif
--- head.orig/arch/x86/kernel/x86_init-xen.c	2011-07-11 12:20:38.000000000 +0200
+++ head/arch/x86/kernel/x86_init-xen.c	2011-07-11 12:53:48.000000000 +0200
@@ -13,10 +13,13 @@
 #include <asm/e820.h>
 #include <asm/time.h>
 #include <asm/irq.h>
+#include <asm/pat.h>
+#include <asm/iommu.h>
 
 void __cpuinit x86_init_noop(void) { }
 void __init x86_init_uint_noop(unsigned int unused) { }
 void __init x86_init_pgd_noop(pgd_t *unused) { }
+int __init iommu_init_noop(void) { return 0; }
 
 /*
  * The platform setup functions are preset with the default functions
@@ -61,10 +64,15 @@ struct x86_init_ops x86_init __initdata 
 		.tsc_pre_init		= x86_init_noop,
 		.timer_init		= x86_init_noop,
 	},
+
+	.iommu = {
+		.iommu_init		= iommu_init_noop,
+	},
 };
 
 struct x86_platform_ops x86_platform = {
 	.calibrate_tsc			= NULL,
 	.get_wallclock			= xen_read_wallclock,
 	.set_wallclock			= xen_write_wallclock,
+	.is_untracked_pat_range		= is_ISA_range,
 };
--- head.orig/arch/x86/mm/fault-xen.c	2011-08-15 11:03:59.000000000 +0200
+++ head/arch/x86/mm/fault-xen.c	2011-08-15 11:04:06.000000000 +0200
@@ -38,7 +38,8 @@ enum x86_pf_error_code {
  * Returns 0 if mmiotrace is disabled, or if the fault is not
  * handled by mmiotrace:
  */
-static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
+static inline int __kprobes
+kmmio_fault(struct pt_regs *regs, unsigned long addr)
 {
 	if (unlikely(is_kmmio_active()))
 		if (kmmio_handler(regs, addr) == 1)
@@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_
 	return 0;
 }
 
-static inline int notify_page_fault(struct pt_regs *regs)
+static inline int __kprobes notify_page_fault(struct pt_regs *regs)
 {
 	int ret = 0;
 
@@ -258,7 +259,7 @@ void vmalloc_sync_all(void)
  *
  *   Handle a fault on the vmalloc or module mapping area
  */
-static noinline int vmalloc_fault(unsigned long address)
+static noinline __kprobes int vmalloc_fault(unsigned long address)
 {
 	unsigned long pgd_paddr;
 	pmd_t *pmd_k;
@@ -377,7 +378,7 @@ void vmalloc_sync_all(void)
  *
  * This assumes no large pages in there.
  */
-static noinline int vmalloc_fault(unsigned long address)
+static noinline __kprobes int vmalloc_fault(unsigned long address)
 {
 	pgd_t *pgd, *pgd_ref;
 	pud_t *pud, *pud_ref;
@@ -679,7 +680,7 @@ no_context(struct pt_regs *regs, unsigne
 	show_fault_oops(regs, error_code, address);
 
 	stackend = end_of_stack(tsk);
-	if (*stackend != STACK_END_MAGIC)
+	if (tsk != &init_task && *stackend != STACK_END_MAGIC)
 		printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
 
 	tsk->thread.cr2		= address;
@@ -881,7 +882,7 @@ static int spurious_fault_check(unsigned
  * There are no security implications to leaving a stale TLB when
  * increasing the permissions on a page.
  */
-static noinline int
+static noinline __kprobes int
 spurious_fault(unsigned long error_code, unsigned long address)
 {
 	pgd_t *pgd;
--- head.orig/arch/x86/mm/init-xen.c	2011-02-01 14:54:13.000000000 +0100
+++ head/arch/x86/mm/init-xen.c	2011-02-01 14:55:46.000000000 +0100
@@ -163,10 +163,6 @@ unsigned long __init_refok init_memory_m
 	use_gbpages = direct_gbpages;
 #endif
 
-	set_nx();
-	if (nx_enabled)
-		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-
 	/* Enable PSE if available */
 	if (cpu_has_pse)
 		set_in_cr4(X86_CR4_PSE);
--- head.orig/arch/x86/mm/init_32-xen.c	2011-02-01 14:54:13.000000000 +0100
+++ head/arch/x86/mm/init_32-xen.c	2011-02-01 14:55:46.000000000 +0100
@@ -448,7 +448,7 @@ static void __init permanent_kmaps_init(
 	pkmap_page_table = pte;
 }
 
-static void __init add_one_highpage_init(struct page *page, int pfn)
+static void __init add_one_highpage_init(struct page *page)
 {
 	ClearPageReserved(page);
 	init_page_count(page);
@@ -481,7 +481,7 @@ static int __init add_highpages_work_fn(
 		if (!pfn_valid(node_pfn))
 			continue;
 		page = pfn_to_page(node_pfn);
-		add_one_highpage_init(page, node_pfn);
+		add_one_highpage_init(page);
 	}
 
 	return 0;
@@ -705,8 +705,8 @@ void __init find_low_pfn_range(void)
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init initmem_init(unsigned long start_pfn,
-				  unsigned long end_pfn)
+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
+				int acpi, int k8)
 {
 #ifdef CONFIG_HIGHMEM
 	highstart_pfn = highend_pfn = max_pfn;
@@ -955,8 +955,7 @@ void __init mem_init(void)
 		reservedpages << (PAGE_SHIFT-10),
 		datasize >> 10,
 		initsize >> 10,
-		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
-	       );
+		totalhigh_pages << (PAGE_SHIFT-10));
 
 	printk(KERN_INFO "virtual kernel memory layout:\n"
 		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
@@ -1062,7 +1061,7 @@ static noinline int do_test_wp_bit(void)
 const int rodata_test_data = 0xC3;
 EXPORT_SYMBOL_GPL(rodata_test_data);
 
-static int kernel_set_to_readonly;
+int kernel_set_to_readonly __read_mostly;
 
 void set_kernel_text_rw(void)
 {
--- head.orig/arch/x86/mm/init_64-xen.c	2011-06-30 17:06:27.000000000 +0200
+++ head/arch/x86/mm/init_64-xen.c	2011-06-30 17:10:32.000000000 +0200
@@ -53,6 +53,7 @@
 #include <asm/cacheflush.h>
 #include <asm/init.h>
 #include <asm/setup.h>
+#include <linux/bootmem.h>
 
 #include <xen/features.h>
 
@@ -815,7 +816,8 @@ kernel_physical_mapping_init(unsigned lo
 }
 
 #ifndef CONFIG_NUMA
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
+				int acpi, int k8)
 {
 	unsigned long bootmap_size, bootmap;
 
@@ -868,6 +870,21 @@ void __init paging_init(void)
  */
 #ifdef CONFIG_MEMORY_HOTPLUG
 /*
+ * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
+ * updating.
+ */
+static void  update_end_of_memory_vars(u64 start, u64 size)
+{
+	unsigned long end_pfn = PFN_UP(start + size);
+
+	if (end_pfn > max_pfn) {
+		max_pfn = end_pfn;
+		max_low_pfn = end_pfn;
+		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+	}
+}
+
+/*
  * Memory is added always to NORMAL zone. This means you will never get
  * additional DMA/DMA32 memory.
  */
@@ -886,6 +903,9 @@ int arch_add_memory(int nid, u64 start, 
 	ret = __add_pages(nid, zone, start_pfn, nr_pages);
 	WARN_ON_ONCE(ret);
 
+	/* update max_pfn, max_low_pfn and high_memory */
+	update_end_of_memory_vars(start, size);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(arch_add_memory);
@@ -954,12 +974,12 @@ void __init mem_init(void)
 const int rodata_test_data = 0xC3;
 EXPORT_SYMBOL_GPL(rodata_test_data);
 
-static int kernel_set_to_readonly;
+int kernel_set_to_readonly;
 
 void set_kernel_text_rw(void)
 {
-	unsigned long start = PFN_ALIGN(_stext);
-	unsigned long end = PFN_ALIGN(__start_rodata);
+	unsigned long start = PFN_ALIGN(_text);
+	unsigned long end = PFN_ALIGN(__stop___ex_table);
 
 	if (!kernel_set_to_readonly)
 		return;
@@ -967,13 +987,18 @@ void set_kernel_text_rw(void)
 	pr_debug("Set kernel text: %lx - %lx for read write\n",
 		 start, end);
 
+	/*
+	 * Make the kernel identity mapping for text RW. Kernel text
+	 * mapping will always be RO. Refer to the comment in
+	 * static_protections() in pageattr.c
+	 */
 	set_memory_rw(start, (end - start) >> PAGE_SHIFT);
 }
 
 void set_kernel_text_ro(void)
 {
-	unsigned long start = PFN_ALIGN(_stext);
-	unsigned long end = PFN_ALIGN(__start_rodata);
+	unsigned long start = PFN_ALIGN(_text);
+	unsigned long end = PFN_ALIGN(__stop___ex_table);
 
 	if (!kernel_set_to_readonly)
 		return;
@@ -981,14 +1006,21 @@ void set_kernel_text_ro(void)
 	pr_debug("Set kernel text: %lx - %lx for read only\n",
 		 start, end);
 
+	/*
+	 * Set the kernel identity mapping for text RO.
+	 */
 	set_memory_ro(start, (end - start) >> PAGE_SHIFT);
 }
 
 void mark_rodata_ro(void)
 {
-	unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
+	unsigned long start = PFN_ALIGN(_text);
 	unsigned long rodata_start =
 		((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+	unsigned long end = (unsigned long) &__end_rodata;
+	unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table);
+	unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata);
+	unsigned long data_start = (unsigned long) &_sdata;
 
 	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
 	       (end - start) >> 10);
@@ -1011,6 +1043,14 @@ void mark_rodata_ro(void)
 	printk(KERN_INFO "Testing CPA: again\n");
 	set_memory_ro(start, (end-start) >> PAGE_SHIFT);
 #endif
+
+	free_init_pages("unused kernel memory",
+			(unsigned long) page_address(virt_to_page(text_end)),
+			(unsigned long)
+				 page_address(virt_to_page(rodata_start)));
+	free_init_pages("unused kernel memory",
+			(unsigned long) page_address(virt_to_page(rodata_end)),
+			(unsigned long) page_address(virt_to_page(data_start)));
 }
 
 #endif
--- head.orig/arch/x86/mm/ioremap-xen.c	2011-02-07 15:41:38.000000000 +0100
+++ head/arch/x86/mm/ioremap-xen.c	2011-02-07 15:41:45.000000000 +0100
@@ -438,32 +438,6 @@ void __iomem *ioremap_cache(resource_siz
 }
 EXPORT_SYMBOL(ioremap_cache);
 
-#ifndef CONFIG_XEN
-static void __iomem *ioremap_default(resource_size_t phys_addr,
-					unsigned long size)
-{
-	unsigned long flags;
-	void __iomem *ret;
-	int err;
-
-	/*
-	 * - WB for WB-able memory and no other conflicting mappings
-	 * - UC_MINUS for non-WB-able memory with no other conflicting mappings
-	 * - Inherit from confliting mappings otherwise
-	 */
-	err = reserve_memtype(phys_addr, phys_addr + size,
-				_PAGE_CACHE_WB, &flags);
-	if (err < 0)
-		return NULL;
-
-	ret = __ioremap_caller(phys_addr, size, flags,
-			       __builtin_return_address(0));
-
-	free_memtype(phys_addr, phys_addr + size);
-	return ret;
-}
-#endif
-
 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
 				unsigned long prot_val)
 {
@@ -539,7 +513,7 @@ void *xlate_dev_mem_ptr(unsigned long ph
 	if (page_is_ram(start >> PAGE_SHIFT))
 		return __va(phys);
 
-	addr = (void __force *)ioremap_default(start, PAGE_SIZE);
+	addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
 	if (addr)
 		addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
 
--- head.orig/arch/x86/mm/pageattr-xen.c	2011-03-23 10:00:03.000000000 +0100
+++ head/arch/x86/mm/pageattr-xen.c	2011-03-23 10:00:33.000000000 +0100
@@ -281,6 +281,22 @@ static inline pgprot_t static_protection
 		   __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
 		pgprot_val(forbidden) |= _PAGE_RW;
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) && !defined(CONFIG_XEN)
+	/*
+	 * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
+	 * kernel text mappings for the large page aligned text, rodata sections
+	 * will be always read-only. For the kernel identity mappings covering
+	 * the holes caused by this alignment can be anything that user asks.
+	 *
+	 * This will preserve the large page mappings for kernel text/data
+	 * at no extra cost.
+	 */
+	if (kernel_set_to_readonly &&
+	    within(address, (unsigned long)_text,
+		   (unsigned long)__end_rodata_hpage_align))
+		pgprot_val(forbidden) |= _PAGE_RW;
+#endif
+
 	prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
 
 	return prot;
@@ -1157,12 +1173,18 @@ EXPORT_SYMBOL(set_memory_array_wb);
 
 int set_memory_x(unsigned long addr, int numpages)
 {
+	if (!(__supported_pte_mask & _PAGE_NX))
+		return 0;
+
 	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
 }
 EXPORT_SYMBOL(set_memory_x);
 
 int set_memory_nx(unsigned long addr, int numpages)
 {
+	if (!(__supported_pte_mask & _PAGE_NX))
+		return 0;
+
 	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
 }
 EXPORT_SYMBOL(set_memory_nx);
--- head.orig/arch/x86/mm/pat-xen.c	2011-02-01 14:54:13.000000000 +0100
+++ head/arch/x86/mm/pat-xen.c	2011-02-01 14:55:46.000000000 +0100
@@ -20,6 +20,7 @@
 #include <asm/cacheflush.h>
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
+#include <asm/x86_init.h>
 #include <asm/pgtable.h>
 #include <asm/fcntl.h>
 #include <asm/e820.h>
@@ -381,9 +382,6 @@ static int free_ram_pages_type(u64 start
  * - _PAGE_CACHE_UC_MINUS
  * - _PAGE_CACHE_UC
  *
- * req_type will have a special case value '-1', when requester want to inherit
- * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
- *
  * If new_type is NULL, function will return an error if it cannot reserve the
  * region with req_type. If new_type is non-NULL, function will return
  * available type in new_type in case of no error. In case of any error
@@ -403,9 +401,7 @@ int reserve_memtype(u64 start, u64 end, 
 	if (!pat_enabled) {
 		/* This is identical to page table setting without PAT */
 		if (new_type) {
-			if (req_type == -1)
-				*new_type = _PAGE_CACHE_WB;
-			else if (req_type == _PAGE_CACHE_WC)
+			if (req_type == _PAGE_CACHE_WC)
 				*new_type = _PAGE_CACHE_UC_MINUS;
 			else
 				*new_type = req_type & _PAGE_CACHE_MASK;
@@ -414,7 +410,7 @@ int reserve_memtype(u64 start, u64 end, 
 	}
 
 	/* Low ISA region is always mapped WB in page table. No need to track */
-	if (is_ISA_range(start, end - 1)) {
+	if (x86_platform.is_untracked_pat_range(start, end)) {
 		if (new_type)
 			*new_type = _PAGE_CACHE_WB;
 		return 0;
@@ -525,7 +521,7 @@ int free_memtype(u64 start, u64 end)
 		return 0;
 
 	/* Low ISA region is always mapped WB. No need to track */
-	if (is_ISA_range(start, end - 1))
+	if (x86_platform.is_untracked_pat_range(start, end))
 		return 0;
 
 	is_range_ram = pat_pagerange_is_ram(start, end);
@@ -609,7 +605,7 @@ static unsigned long lookup_memtype(u64 
 	int rettype = _PAGE_CACHE_WB;
 	struct memtype *entry;
 
-	if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1))
+	if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE))
 		return rettype;
 
 	if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
@@ -736,9 +732,8 @@ int phys_mem_access_prot_allowed(struct 
 	if (!range_is_allowed(mfn, size))
 		return 0;
 
-	if (file->f_flags & O_SYNC) {
+	if (file->f_flags & O_DSYNC)
 		flags = _PAGE_CACHE_UC_MINUS;
-	}
 
 #ifndef CONFIG_X86_32
 #ifndef CONFIG_XEN /* Xen sets correct MTRR type on non-RAM for us. */
@@ -1032,8 +1027,10 @@ static const struct file_operations memt
 
 static int __init pat_memtype_list_init(void)
 {
-	debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
-				NULL, &memtype_fops);
+	if (pat_enabled) {
+		debugfs_create_file("pat_memtype_list", S_IRUSR,
+				    arch_debugfs_dir, NULL, &memtype_fops);
+	}
 	return 0;
 }
 
--- head.orig/arch/x86/vdso/vdso32-setup-xen.c	2012-02-29 14:15:36.000000000 +0100
+++ head/arch/x86/vdso/vdso32-setup-xen.c	2012-02-29 14:17:02.000000000 +0100
@@ -445,7 +445,6 @@ static ctl_table abi_table2[] = {
 
 static ctl_table abi_root_table2[] = {
 	{
-		.ctl_name = CTL_ABI,
 		.procname = "abi",
 		.mode = 0555,
 		.child = abi_table2
--- head.orig/drivers/gpu/drm/vmwgfx/Kconfig	2012-06-06 13:23:56.000000000 +0200
+++ head/drivers/gpu/drm/vmwgfx/Kconfig	2011-02-01 14:55:46.000000000 +0100
@@ -1,6 +1,6 @@
 config DRM_VMWGFX
 	tristate "DRM driver for VMware Virtual GPU"
-	depends on DRM && PCI && FB
+	depends on DRM && PCI && FB && !XEN
 	select FB_DEFERRED_IO
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
--- head.orig/drivers/hwmon/Kconfig	2012-04-10 16:15:23.000000000 +0200
+++ head/drivers/hwmon/Kconfig	2012-04-10 17:02:11.000000000 +0200
@@ -1154,7 +1154,7 @@ config SENSORS_TWL4030_MADC
 
 config SENSORS_VIA_CPUTEMP
 	tristate "VIA CPU temperature sensor"
-	depends on X86
+	depends on X86 && !XEN
 	select HWMON_VID
 	help
 	  If you say yes here you get support for the temperature
--- head.orig/drivers/hwmon/coretemp-xen.c	2011-02-01 14:54:13.000000000 +0100
+++ head/drivers/hwmon/coretemp-xen.c	2011-02-01 14:55:46.000000000 +0100
@@ -31,6 +31,7 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/platform_device.h>
+#include <linux/pci.h>
 #include <asm/msr.h>
 #include <xen/pcpu.h>
 #include "../xen/core/domctl.h"
@@ -166,6 +167,7 @@ static int adjust_tjmax(struct coretemp_
 	int usemsr_ee = 1;
 	int err;
 	u32 eax, edx;
+	struct pci_dev *host_bridge;
 
 	/* Early chips have no MSR for TjMax */
 
@@ -173,11 +175,21 @@ static int adjust_tjmax(struct coretemp_
 		usemsr_ee = 0;
 	}
 
-	/* Atoms seems to have TjMax at 90C */
+	/* Atom CPUs */
 
 	if (c->x86_model == 0x1c) {
 		usemsr_ee = 0;
-		tjmax = 90000;
+
+		host_bridge = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0));
+
+		if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL
+		    && (host_bridge->device == 0xa000	/* NM10 based nettop */
+		    || host_bridge->device == 0xa010))	/* NM10 based netbook */
+			tjmax = 100000;
+		else
+			tjmax = 90000;
+
+		pci_dev_put(host_bridge);
 	}
 
 	if ((c->x86_model > 0xe) && (usemsr_ee)) {
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ head/drivers/hwmon/via-cputemp-xen.c	2011-02-01 14:55:46.000000000 +0100
@@ -0,0 +1,354 @@
+/*
+ * via-cputemp.c - Driver for VIA CPU core temperature monitoring
+ * Copyright (C) 2009 VIA Technologies, Inc.
+ *
+ * based on existing coretemp.c, which is
+ *
+ * Copyright (C) 2007 Rudolf Marek <r.marek@assembler.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/hwmon.h>
+#include <linux/sysfs.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <asm/msr.h>
+#include <xen/pcpu.h>
+#include "../xen/core/domctl.h"
+
+#define DRVNAME	"via_cputemp"
+
+enum { SHOW_TEMP, SHOW_LABEL, SHOW_NAME } SHOW;
+
+/*
+ * Functions declaration
+ */
+
+struct pdev_entry {
+	struct list_head list;
+	struct platform_device *pdev;
+	struct device *hwmon_dev;
+	const char *name;
+	u8 x86_model;
+	u32 msr;
+};
+#define via_cputemp_data pdev_entry
+
+/*
+ * Sysfs stuff
+ */
+
+static ssize_t show_name(struct device *dev, struct device_attribute
+			  *devattr, char *buf)
+{
+	int ret;
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+	struct via_cputemp_data *data = dev_get_drvdata(dev);
+
+	if (attr->index == SHOW_NAME)
+		ret = sprintf(buf, "%s\n", data->name);
+	else	/* show label */
+		ret = sprintf(buf, "Core %d\n", data->pdev->id);
+	return ret;
+}
+
+static ssize_t show_temp(struct device *dev,
+			 struct device_attribute *devattr, char *buf)
+{
+	struct via_cputemp_data *data = dev_get_drvdata(dev);
+	u32 eax, edx;
+	int err;
+
+	err = rdmsr_safe_on_pcpu(data->pdev->id, data->msr, &eax, &edx);
+	if (err < 0)
+		return -EAGAIN;
+
+	return sprintf(buf, "%lu\n", ((unsigned long)eax & 0xffffff) * 1000);
+}
+
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL,
+			  SHOW_TEMP);
+static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, show_name, NULL, SHOW_LABEL);
+static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, SHOW_NAME);
+
+static struct attribute *via_cputemp_attributes[] = {
+	&sensor_dev_attr_name.dev_attr.attr,
+	&sensor_dev_attr_temp1_label.dev_attr.attr,
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group via_cputemp_group = {
+	.attrs = via_cputemp_attributes,
+};
+
+static int via_cputemp_probe(struct platform_device *pdev)
+{
+	struct via_cputemp_data *data = platform_get_drvdata(pdev);
+	int err;
+	u32 eax, edx;
+
+	data->name = "via_cputemp";
+
+	switch (data->x86_model) {
+	case 0xA:
+		/* C7 A */
+	case 0xD:
+		/* C7 D */
+		data->msr = 0x1169;
+		break;
+	case 0xF:
+		/* Nano */
+		data->msr = 0x1423;
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	/* test if we can access the TEMPERATURE MSR */
+	err = rdmsr_safe_on_pcpu(pdev->id, data->msr, &eax, &edx);
+	if (err < 0) {
+		dev_err(&pdev->dev,
+			"Unable to access TEMPERATURE MSR, giving up\n");
+		return err;
+	}
+
+	err = sysfs_create_group(&pdev->dev.kobj, &via_cputemp_group);
+	if (err)
+		return err;
+
+	data->hwmon_dev = hwmon_device_register(&pdev->dev);
+	if (IS_ERR(data->hwmon_dev)) {
+		err = PTR_ERR(data->hwmon_dev);
+		dev_err(&pdev->dev, "Class registration failed (%d)\n",
+			err);
+		goto exit_remove;
+	}
+
+	return 0;
+
+exit_remove:
+	sysfs_remove_group(&pdev->dev.kobj, &via_cputemp_group);
+	return err;
+}
+
+static int via_cputemp_remove(struct platform_device *pdev)
+{
+	struct via_cputemp_data *data = platform_get_drvdata(pdev);
+
+	hwmon_device_unregister(data->hwmon_dev);
+	sysfs_remove_group(&pdev->dev.kobj, &via_cputemp_group);
+	return 0;
+}
+
+static struct platform_driver via_cputemp_driver = {
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = DRVNAME,
+	},
+	.probe = via_cputemp_probe,
+	.remove = via_cputemp_remove,
+};
+
+static LIST_HEAD(pdev_list);
+static DEFINE_MUTEX(pdev_list_mutex);
+
+struct cpu_info {
+	struct pdev_entry *pdev_entry;
+	u8 x86;
+};
+
+static void get_cpuid_info(void *arg)
+{
+	struct cpu_info *info = arg;
+	struct pdev_entry *pdev_entry = info->pdev_entry;
+	u32 val = cpuid_eax(1);
+
+	info->x86 = ((val >> 8) & 0xf) + ((val >> 20) & 0xff);
+	pdev_entry->x86_model = ((val >> 4) & 0xf) | ((val >> 12) & 0xf0);
+}
+
+static int via_cputemp_device_add(unsigned int cpu)
+{
+	int err;
+	struct cpu_info info;
+	struct platform_device *pdev;
+	struct pdev_entry *pdev_entry;
+
+	pdev_entry = kzalloc(sizeof(*pdev_entry), GFP_KERNEL);
+	if (!pdev_entry)
+		return -ENOMEM;
+
+	info.pdev_entry = pdev_entry;
+	err = xen_set_physical_cpu_affinity(cpu);
+	if (!err) {
+		get_cpuid_info(&info);
+		WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1));
+	} else if (err > 0) {
+		static bool warned;
+
+		if (!warned) {
+			warned = true;
+			printk(KERN_WARNING DRVNAME
+			       "Cannot set physical CPU affinity"
+			       " (assuming use of dom0_vcpus_pin)\n");
+		}
+		err = smp_call_function_single(cpu, get_cpuid_info, &info, 1);
+	}
+	if (err)
+		goto exit_entry_free;
+
+	if (info.x86 != 6)
+		goto exit_entry_free;
+
+	if (pdev_entry->x86_model < 0x0a)
+		goto exit_entry_free;
+
+	if (pdev_entry->x86_model > 0x0f) {
+		printk(KERN_WARNING DRVNAME ": Unknown CPU "
+		       "model 0x%x\n", pdev_entry->x86_model);
+		goto exit_entry_free;
+	}
+
+	pdev = platform_device_alloc(DRVNAME, cpu);
+	if (!pdev) {
+		err = -ENOMEM;
+		printk(KERN_ERR DRVNAME ": Device allocation failed\n");
+		goto exit_entry_free;
+	}
+
+	platform_set_drvdata(pdev, pdev_entry);
+	pdev_entry->pdev = pdev;
+
+	err = platform_device_add(pdev);
+	if (err) {
+		printk(KERN_ERR DRVNAME ": Device addition failed (%d)\n",
+		       err);
+		goto exit_device_put;
+	}
+
+	mutex_lock(&pdev_list_mutex);
+	list_add_tail(&pdev_entry->list, &pdev_list);
+	mutex_unlock(&pdev_list_mutex);
+
+	return 0;
+
+exit_device_put:
+	platform_device_put(pdev);
+exit_entry_free:
+	kfree(pdev_entry);
+	return err;
+}
+
+static void via_cputemp_device_remove(unsigned int cpu)
+{
+	struct pdev_entry *p;
+
+	mutex_lock(&pdev_list_mutex);
+	list_for_each_entry(p, &pdev_list, list) {
+		if (p->pdev->id == cpu) {
+			platform_device_unregister(p->pdev);
+			list_del(&p->list);
+			kfree(p);
+			break;
+		}
+	}
+	mutex_unlock(&pdev_list_mutex);
+}
+
+static int via_cputemp_cpu_callback(struct notifier_block *nfb,
+				 unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long) hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+		via_cputemp_device_add(cpu);
+		break;
+	case CPU_DEAD:
+		via_cputemp_device_remove(cpu);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block via_cputemp_cpu_notifier = {
+	.notifier_call = via_cputemp_cpu_callback,
+};
+
+static int __init via_cputemp_init(void)
+{
+	int err;
+
+	if (!is_initial_xendomain())
+		return -ENODEV;
+
+	if (cpu_data(0).x86_vendor != X86_VENDOR_CENTAUR) {
+		printk(KERN_DEBUG DRVNAME ": Not a VIA CPU\n");
+		err = -ENODEV;
+		goto exit;
+	}
+
+	err = platform_driver_register(&via_cputemp_driver);
+	if (err)
+		goto exit;
+
+	err = register_pcpu_notifier(&via_cputemp_cpu_notifier);
+	if (err)
+		goto exit_driver_unreg;
+
+	if (list_empty(&pdev_list)) {
+		err = -ENODEV;
+		goto exit_notifier_unreg;
+	}
+
+	return 0;
+
+exit_notifier_unreg:
+	unregister_pcpu_notifier(&via_cputemp_cpu_notifier);
+exit_driver_unreg:
+	platform_driver_unregister(&via_cputemp_driver);
+exit:
+	return err;
+}
+
+static void __exit via_cputemp_exit(void)
+{
+	struct pdev_entry *p, *n;
+
+	unregister_pcpu_notifier(&via_cputemp_cpu_notifier);
+	mutex_lock(&pdev_list_mutex);
+	list_for_each_entry_safe(p, n, &pdev_list, list) {
+		platform_device_unregister(p->pdev);
+		list_del(&p->list);
+		kfree(p);
+	}
+	mutex_unlock(&pdev_list_mutex);
+	platform_driver_unregister(&via_cputemp_driver);
+}
+
+MODULE_AUTHOR("Harald Welte <HaraldWelte@viatech.com>");
+MODULE_DESCRIPTION("VIA CPU temperature monitor");
+MODULE_LICENSE("GPL");
+
+module_init(via_cputemp_init)
+module_exit(via_cputemp_exit)
--- head.orig/drivers/oprofile/cpu_buffer.c	2011-04-13 13:52:09.000000000 +0200
+++ head/drivers/oprofile/cpu_buffer.c	2011-04-13 13:57:28.000000000 +0200
@@ -432,7 +432,7 @@ void oprofile_add_pc(unsigned long pc, i
  */
 void oprofile_add_mode(int cpu_mode)
 {
-	struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
+	struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
 
 	if (op_add_code(cpu_buf, 1, cpu_mode, current))
 		cpu_buf->sample_lost_overflow++;
--- head.orig/drivers/pci/Kconfig	2012-04-10 16:14:10.000000000 +0200
+++ head/drivers/pci/Kconfig	2012-04-10 17:02:19.000000000 +0200
@@ -133,7 +133,7 @@ config PCI_PASID
 
 config PCI_IOAPIC
 	tristate "PCI IO-APIC hotplug support" if X86
-	depends on PCI
+	depends on PCI && !XEN
 	depends on ACPI
 	depends on HOTPLUG
 	default !X86
--- head.orig/drivers/scsi/Kconfig	2012-06-06 13:23:56.000000000 +0200
+++ head/drivers/scsi/Kconfig	2012-04-10 17:02:26.000000000 +0200
@@ -657,7 +657,7 @@ config SCSI_FLASHPOINT
 
 config VMWARE_PVSCSI
 	tristate "VMware PVSCSI driver support"
-	depends on PCI && SCSI && X86
+	depends on PCI && SCSI && !XEN && X86
 	help
 	  This driver supports VMware's para virtualized SCSI HBA.
 	  To compile this driver as a module, choose M here: the
--- head.orig/drivers/xen/blkfront/vbd.c	2012-03-12 13:50:03.000000000 +0100
+++ head/drivers/xen/blkfront/vbd.c	2012-03-12 16:16:46.000000000 +0100
@@ -33,6 +33,7 @@
  */
 
 #include "block.h"
+#include <linux/bitmap.h>
 #include <linux/blkdev.h>
 #include <linux/list.h>
 
@@ -321,8 +322,7 @@ xlbd_reserve_minors(struct xlbd_major_in
 
 	spin_lock(&ms->lock);
 	if (find_next_bit(ms->bitmap, end, minor) >= end) {
-		for (; minor < end; ++minor)
-			__set_bit(minor, ms->bitmap);
+		bitmap_set(ms->bitmap, minor, nr_minors);
 		rc = 0;
 	} else
 		rc = -EBUSY;
@@ -336,12 +336,10 @@ xlbd_release_minors(struct xlbd_major_in
 		    unsigned int nr_minors)
 {
 	struct xlbd_minor_state *ms = mi->minors;
-	unsigned int end = minor + nr_minors;
 
-	BUG_ON(end > ms->nr);
+	BUG_ON(minor + nr_minors > ms->nr);
 	spin_lock(&ms->lock);
-	for (; minor < end; ++minor)
-		__clear_bit(minor, ms->bitmap);
+	bitmap_clear(ms->bitmap, minor, nr_minors);
 	spin_unlock(&ms->lock);
 }
 
--- head.orig/drivers/xen/char/mem.c	2012-01-20 14:54:43.000000000 +0100
+++ head/drivers/xen/char/mem.c	2012-01-20 15:00:49.000000000 +0100
@@ -5,7 +5,7 @@
  *
  *  Added devfs support. 
  *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
- *  Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
+ *  Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
  */
 
 #include <linux/mm.h>
@@ -17,9 +17,19 @@
 #include <asm/io.h>
 #include <asm/hypervisor.h>
 
+static inline unsigned long size_inside_page(unsigned long start,
+					     unsigned long size)
+{
+	unsigned long sz;
+
+	sz = PAGE_SIZE - (start & (PAGE_SIZE - 1));
+
+	return min(sz, size);
+}
+
 static inline int uncached_access(struct file *file)
 {
-	if (file->f_flags & O_SYNC)
+	if (file->f_flags & O_DSYNC)
 		return 1;
 	/* Xen sets correct MTRR type on non-RAM for us. */
 	return 0;
@@ -53,20 +63,14 @@ static inline int range_is_allowed(unsig
 static ssize_t read_mem(struct file * file, char __user * buf,
 			size_t count, loff_t *ppos)
 {
-	unsigned long p = *ppos, ignored;
+	unsigned long p = *ppos;
 	ssize_t read = 0, sz;
 	void __iomem *v;
 
 	while (count > 0) {
-		/*
-		 * Handle first page in case it's not aligned
-		 */
-		if (-p & (PAGE_SIZE - 1))
-			sz = -p & (PAGE_SIZE - 1);
-		else
-			sz = PAGE_SIZE;
+		unsigned long remaining;
 
-		sz = min_t(unsigned long, sz, count);
+		sz = size_inside_page(p, count);
 
 		if (!range_is_allowed(p >> PAGE_SHIFT, count))
 			return -EPERM;
@@ -87,10 +91,11 @@ static ssize_t read_mem(struct file * fi
 			break;
 		}
 
-		ignored = copy_to_user(buf, v, sz);
+		remaining = copy_to_user(buf, v, sz);
 		iounmap(v);
-		if (ignored)
+		if (remaining)
 			return -EFAULT;
+
 		buf += sz;
 		p += sz;
 		count -= sz;
@@ -109,15 +114,7 @@ static ssize_t write_mem(struct file * f
 	void __iomem *v;
 
 	while (count > 0) {
-		/*
-		 * Handle first page in case it's not aligned
-		 */
-		if (-p & (PAGE_SIZE - 1))
-			sz = -p & (PAGE_SIZE - 1);
-		else
-			sz = PAGE_SIZE;
-
-		sz = min_t(unsigned long, sz, count);
+		sz = size_inside_page(p, count);
 
 		if (!range_is_allowed(p >> PAGE_SHIFT, sz))
 			return -EPERM;
--- head.orig/drivers/xen/core/evtchn.c	2011-02-01 14:54:13.000000000 +0100
+++ head/drivers/xen/core/evtchn.c	2012-04-03 16:47:40.000000000 +0200
@@ -1049,6 +1049,14 @@ void disable_all_local_evtchn(void)
 			sync_set_bit(i, &s->evtchn_mask[0]);
 }
 
+/* Test an irq's pending state. */
+int xen_test_irq_pending(int irq)
+{
+	unsigned int evtchn = evtchn_from_irq(irq);
+
+	return VALID_EVTCHN(evtchn) && test_evtchn(evtchn);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static void restore_cpu_virqs(unsigned int cpu)
 {
--- head.orig/drivers/xen/core/spinlock.c	2012-02-07 11:58:13.000000000 +0100
+++ head/drivers/xen/core/spinlock.c	2012-01-31 18:06:02.000000000 +0100
@@ -16,7 +16,7 @@
 #include <xen/evtchn.h>
 
 struct spinning {
-	raw_spinlock_t *lock;
+	arch_spinlock_t *lock;
 	unsigned int ticket;
 #if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
 	unsigned int irq_count;
@@ -91,7 +91,7 @@ static inline void sequence(unsigned int
 static DEFINE_PER_CPU(unsigned int, _irq_count);
 
 static unsigned int spin_adjust(struct spinning *spinning,
-				const raw_spinlock_t *lock,
+				const arch_spinlock_t *lock,
 				unsigned int ticket)
 {
 	for (; spinning; spinning = spinning->prev) {
@@ -117,7 +117,7 @@ static unsigned int spin_adjust(struct s
 	return ticket;
 }
 
-unsigned int xen_spin_adjust(const raw_spinlock_t *lock, unsigned int token)
+unsigned int xen_spin_adjust(const arch_spinlock_t *lock, unsigned int token)
 {
 	token = spin_adjust(percpu_read(_spinning), lock,
 			    token >> TICKET_SHIFT);
@@ -127,7 +127,7 @@ unsigned int xen_spin_adjust(const raw_s
 static unsigned int ticket_drop(struct spinning *spinning,
 				unsigned int ticket, unsigned int cpu)
 {
-	raw_spinlock_t *lock = spinning->lock;
+	arch_spinlock_t *lock = spinning->lock;
 	unsigned int token;
 	bool kick;
 
@@ -137,7 +137,7 @@ static unsigned int ticket_drop(struct s
 	return kick ? (ticket + 1) & ((1 << TICKET_SHIFT) - 1) : -1;
 }
 
-static unsigned int ticket_get(raw_spinlock_t *lock, struct spinning *prev)
+static unsigned int ticket_get(arch_spinlock_t *lock, struct spinning *prev)
 {
 	unsigned int token;
 	bool free;
@@ -155,7 +155,7 @@ void xen_spin_irq_enter(void)
 	percpu_add(_irq_count, 1);
 	smp_mb();
 	for (; spinning; spinning = spinning->prev) {
-		raw_spinlock_t *lock = spinning->lock;
+		arch_spinlock_t *lock = spinning->lock;
 
 		/*
 		 * Return the ticket if we now own the lock. While just being
@@ -206,7 +206,7 @@ void xen_spin_irq_exit(void)
 	 * acquiring them.
 	 */
 	for (; spinning; spinning = spinning->prev) {
-		raw_spinlock_t *lock = spinning->lock;
+		arch_spinlock_t *lock = spinning->lock;
 
 		if (spinning->irq_count < irq_count)
 			break;
@@ -217,7 +217,7 @@ void xen_spin_irq_exit(void)
 }
 #endif
 
-bool xen_spin_wait(raw_spinlock_t *lock, unsigned int *ptok,
+bool xen_spin_wait(arch_spinlock_t *lock, unsigned int *ptok,
 		   unsigned int flags)
 {
 	typeof(vcpu_info(0)->evtchn_upcall_mask) upcall_mask
@@ -312,7 +312,7 @@ bool xen_spin_wait(raw_spinlock_t *lock,
 	return false;
 }
 
-void xen_spin_kick(const raw_spinlock_t *lock, unsigned int token)
+void xen_spin_kick(const arch_spinlock_t *lock, unsigned int token)
 {
 	unsigned int cpu = raw_smp_processor_id(), anchor = cpu;
 
--- head.orig/drivers/xen/evtchn.c	2011-02-01 14:54:13.000000000 +0100
+++ head/drivers/xen/evtchn.c	2011-02-01 14:55:46.000000000 +0100
@@ -48,15 +48,14 @@
 #include <linux/mutex.h>
 #include <linux/cpu.h>
 
-#ifdef CONFIG_PARAVIRT_XEN
 #include <xen/xen.h>
+#ifdef CONFIG_PARAVIRT_XEN
 #include <xen/events.h>
 #include <xen/evtchn.h>
 #include <asm/xen/hypervisor.h>
 #else
 #include <xen/evtchn.h>
 #include <xen/public/evtchn.h>
-#define xen_domain() is_running_on_xen()
 #define bind_evtchn_to_irqhandler bind_caller_port_to_irqhandler
 #endif
 
--- head.orig/drivers/xen/privcmd/compat_privcmd.c	2011-01-31 17:29:16.000000000 +0100
+++ head/drivers/xen/privcmd/compat_privcmd.c	2011-02-01 14:55:46.000000000 +0100
@@ -26,17 +26,16 @@
 #include <xen/public/privcmd.h>
 #include <xen/compat_ioctl.h>
 
-int privcmd_ioctl_32(int fd, unsigned int cmd, unsigned long arg)
+int privcmd_ioctl_32(int fd, unsigned int cmd, void __user *arg)
 {
 	int ret;
 
 	switch (cmd) {
 	case IOCTL_PRIVCMD_MMAP_32: {
-		struct privcmd_mmap *p;
-		struct privcmd_mmap_32 *p32;
+		struct privcmd_mmap __user *p;
+		struct privcmd_mmap_32 __user *p32 = arg;
 		struct privcmd_mmap_32 n32;
 
-		p32 = compat_ptr(arg);
 		p = compat_alloc_user_space(sizeof(*p));
 		if (copy_from_user(&n32, p32, sizeof(n32)) ||
 		    put_user(n32.num, &p->num) ||
@@ -48,8 +47,8 @@ int privcmd_ioctl_32(int fd, unsigned in
 	}
 		break;
 	case IOCTL_PRIVCMD_MMAPBATCH_32: {
-		struct privcmd_mmapbatch *p;
-		struct privcmd_mmapbatch_32 *p32;
+		struct privcmd_mmapbatch __user *p;
+		struct privcmd_mmapbatch_32 __user *p32 = arg;
 		struct privcmd_mmapbatch_32 n32;
 #ifdef xen_pfn32_t
 		xen_pfn_t *__user arr;
@@ -57,7 +56,6 @@ int privcmd_ioctl_32(int fd, unsigned in
 		unsigned int i;
 #endif
 
-		p32 = compat_ptr(arg);
 		p = compat_alloc_user_space(sizeof(*p));
 		if (copy_from_user(&n32, p32, sizeof(n32)) ||
 		    put_user(n32.num, &p->num) ||
@@ -97,8 +95,8 @@ int privcmd_ioctl_32(int fd, unsigned in
 	}
 		break;
 	case IOCTL_PRIVCMD_MMAPBATCH_V2_32: {
-		struct privcmd_mmapbatch_v2 *p;
-		struct privcmd_mmapbatch_v2_32 *p32;
+		struct privcmd_mmapbatch_v2 __user *p;
+		struct privcmd_mmapbatch_v2_32 __user *p32 = arg;
 		struct privcmd_mmapbatch_v2_32 n32;
 #ifdef xen_pfn32_t
 		xen_pfn_t *__user arr;
@@ -106,7 +104,6 @@ int privcmd_ioctl_32(int fd, unsigned in
 		unsigned int i;
 #endif
 
-		p32 = compat_ptr(arg);
 		p = compat_alloc_user_space(sizeof(*p));
 		if (copy_from_user(&n32, p32, sizeof(n32)) ||
 		    put_user(n32.num, &p->num) ||
--- head.orig/drivers/xen/xenbus/xenbus_probe.c	2011-06-10 12:05:59.000000000 +0200
+++ head/drivers/xen/xenbus/xenbus_probe.c	2011-06-10 12:06:43.000000000 +0200
@@ -62,6 +62,8 @@
 #endif
 #else
 #include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
 #include <xen/xenbus.h>
 #include <xen/events.h>
 #include <xen/page.h>
--- head.orig/fs/compat_ioctl.c	2012-04-10 15:41:34.000000000 +0200
+++ head/fs/compat_ioctl.c	2012-04-10 17:02:34.000000000 +0200
@@ -1423,9 +1423,6 @@ IGNORE_IOCTL(FBIOGCURSOR32)
 #endif
 
 #ifdef CONFIG_XEN
-HANDLE_IOCTL(IOCTL_PRIVCMD_MMAP_32, privcmd_ioctl_32)
-HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_32, privcmd_ioctl_32)
-HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_V2_32, privcmd_ioctl_32)
 COMPATIBLE_IOCTL(IOCTL_PRIVCMD_HYPERCALL)
 COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_VIRQ)
 COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_INTERDOMAIN)
@@ -1490,6 +1487,12 @@ static long do_ioctl_trans(int fd, unsig
 		return do_video_stillpicture(fd, cmd, argp);
 	case VIDEO_SET_SPU_PALETTE:
 		return do_video_set_spu_palette(fd, cmd, argp);
+#ifdef CONFIG_XEN
+	case IOCTL_PRIVCMD_MMAP_32:
+	case IOCTL_PRIVCMD_MMAPBATCH_32:
+	case IOCTL_PRIVCMD_MMAPBATCH_V2_32:
+		return privcmd_ioctl_32(fd, cmd, argp);
+#endif
 	}
 
 	/*
--- head.orig/include/acpi/processor.h	2011-12-21 11:11:31.000000000 +0100
+++ head/include/acpi/processor.h	2012-05-23 13:35:16.000000000 +0200
@@ -318,7 +318,7 @@ static inline void acpi_processor_ppc_ex
 	return;
 }
 #ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
-int acpi_processor_ppc_has_changed(struct acpi_processor *pr);
+int acpi_processor_ppc_has_changed(struct acpi_processor *, int event_flag);
 #else
 static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr,
 								int event_flag)
@@ -333,11 +333,11 @@ static inline int acpi_processor_ppc_has
 	}
 	return 0;
 }
-#endif				/* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
 static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
 {
 	return -ENODEV;
 }
+#endif				/* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
 
 #endif				/* CONFIG_CPU_FREQ */
 
--- head.orig/include/xen/compat_ioctl.h	2010-01-18 15:23:12.000000000 +0100
+++ head/include/xen/compat_ioctl.h	2011-02-01 14:55:46.000000000 +0100
@@ -29,7 +29,7 @@
 #define xen_pfn32_t __u32
 #endif
 
-extern int privcmd_ioctl_32(int fd, unsigned int cmd, unsigned long arg);
+extern int privcmd_ioctl_32(int fd, unsigned int cmd, void __user *arg);
 struct privcmd_mmap_32 {
 	int num;
 	domid_t dom;
--- head.orig/include/xen/evtchn.h	2011-12-09 14:50:16.000000000 +0100
+++ head/include/xen/evtchn.h	2012-01-26 13:45:40.000000000 +0100
@@ -48,6 +48,7 @@
  * LOW-LEVEL DEFINITIONS
  */
 
+#ifdef CONFIG_XEN
 struct irq_cfg {
 	u32 info;
 	union {
@@ -57,8 +58,7 @@ struct irq_cfg {
 #endif
 	};
 };
-
-int assign_irq_vector(int irq, struct irq_cfg *, const struct cpumask *);
+#endif
 
 /*
  * Dynamically bind an event source to an IRQ-like callback handler.
@@ -161,6 +161,9 @@ static inline int close_evtchn(int port)
 	return HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
 }
 
+/* Test an irq's pending state. */
+int xen_test_irq_pending(int irq);
+
 /*
  * Use these to access the event channel underlying the IRQ handle returned
  * by bind_*_to_irqhandler().
--- head.orig/include/xen/xen.h	2012-06-06 13:23:56.000000000 +0200
+++ head/include/xen/xen.h	2011-02-01 14:55:46.000000000 +0100
@@ -7,8 +7,10 @@ enum xen_domain_type {
 	XEN_HVM_DOMAIN,		/* running in a Xen hvm domain */
 };
 
-#ifdef CONFIG_XEN
+#if defined(CONFIG_PARAVIRT_XEN)
 extern enum xen_domain_type xen_domain_type;
+#elif defined(CONFIG_XEN)
+#define xen_domain_type		XEN_PV_DOMAIN
 #else
 #define xen_domain_type		XEN_NATIVE
 #endif
@@ -25,6 +27,8 @@ extern enum xen_domain_type xen_domain_t
 
 #define xen_initial_domain()	(xen_pv_domain() && \
 				 xen_start_info->flags & SIF_INITDOMAIN)
+#elif defined(CONFIG_XEN)
+#define xen_initial_domain()	is_initial_xendomain()
 #else  /* !CONFIG_XEN_DOM0 */
 #define xen_initial_domain()	(0)
 #endif	/* CONFIG_XEN_DOM0 */
--- head.orig/kernel/kexec.c	2012-04-10 16:44:54.000000000 +0200
+++ head/kernel/kexec.c	2012-04-10 17:02:39.000000000 +0200
@@ -1157,6 +1157,7 @@ size_t crash_get_memory_size(void)
 	return size;
 }
 
+#ifndef CONFIG_XEN
 void __weak crash_free_reserved_phys_range(unsigned long begin,
 					   unsigned long end)
 {
@@ -1220,6 +1221,7 @@ unlock:
 	mutex_unlock(&kexec_mutex);
 	return ret;
 }
+#endif /* !CONFIG_XEN */
 
 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
 			    size_t data_len)
--- head.orig/kernel/ksysfs.c	2012-06-06 13:23:56.000000000 +0200
+++ head/kernel/ksysfs.c	2011-12-02 14:07:10.000000000 +0100
@@ -107,6 +107,7 @@ static ssize_t kexec_crash_size_show(str
 {
 	return sprintf(buf, "%zu\n", crash_get_memory_size());
 }
+#ifndef CONFIG_XEN
 static ssize_t kexec_crash_size_store(struct kobject *kobj,
 				   struct kobj_attribute *attr,
 				   const char *buf, size_t count)
@@ -121,6 +122,9 @@ static ssize_t kexec_crash_size_store(st
 	return ret < 0 ? ret : count;
 }
 KERNEL_ATTR_RW(kexec_crash_size);
+#else
+KERNEL_ATTR_RO(kexec_crash_size);
+#endif
 
 static ssize_t vmcoreinfo_show(struct kobject *kobj,
 			       struct kobj_attribute *attr, char *buf)
--- head.orig/kernel/sysctl_binary.c	2011-01-31 18:01:51.000000000 +0100
+++ head/kernel/sysctl_binary.c	2011-02-01 14:55:46.000000000 +0100
@@ -874,9 +874,10 @@ static const struct bin_table bin_bus_ta
 
 
 #ifdef CONFIG_XEN
-static const struct trans_ctl_table trans_xen_table[] = {
-	{ CTL_XEN_INDEPENDENT_WALLCLOCK,	"independent_wallclock" },
-	{ CTL_XEN_PERMITTED_CLOCK_JITTER,	"permitted_clock_jitter" },
+#include <xen/sysctl.h>
+static const struct bin_table bin_xen_table[] = {
+	{ CTL_INT,	CTL_XEN_INDEPENDENT_WALLCLOCK,	"independent_wallclock" },
+	{ CTL_ULONG,	CTL_XEN_PERMITTED_CLOCK_JITTER,	"permitted_clock_jitter" },
 	{}
 };
 #endif
@@ -921,7 +922,7 @@ static const struct bin_table bin_root_t
 	{ CTL_DIR,	CTL_ABI,	"abi" },
 	/* CTL_CPU not used */
 #ifdef CONFIG_XEN
-	{ CTL_XEN,	"xen",		trans_xen_table },
+	{ CTL_DIR,	CTL_XEN,	"xen",		bin_xen_table },
 #endif
 	/* CTL_ARLAN "arlan" no longer used */
 	{ CTL_DIR,	CTL_S390DBF,	"s390dbf",	bin_s390dbf_table },
--- head.orig/lib/swiotlb-xen.c	2011-02-01 14:54:13.000000000 +0100
+++ head/lib/swiotlb-xen.c	2011-02-01 14:55:46.000000000 +0100
@@ -114,6 +114,7 @@ setup_io_tlb_npages(char *str)
 		swiotlb_force = 1;
 	else if (!strcmp(str, "off"))
 		swiotlb_force = -1;
+
 	return 1;
 }
 __setup("swiotlb=", setup_io_tlb_npages);
@@ -126,8 +127,10 @@ static dma_addr_t swiotlb_virt_to_bus(st
 	return phys_to_dma(hwdev, virt_to_phys(address));
 }
 
-static void swiotlb_print_info(unsigned long bytes)
+void swiotlb_print_info(void)
 {
+	unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
 	printk(KERN_INFO "Software IO TLB enabled: \n"
 	       " Aperture:     %lu megabytes\n"
 	       " Address size: %u bits\n"
@@ -141,7 +144,7 @@ static void swiotlb_print_info(unsigned 
  * structures for the software IO TLB used to implement the PCI DMA API.
  */
 void __init
-swiotlb_init_with_default_size(size_t default_size)
+swiotlb_init_with_default_size(size_t default_size, int verbose)
 {
 	unsigned long i, bytes;
 	int rc;
@@ -212,12 +215,12 @@ swiotlb_init_with_default_size(size_t de
 	} while (rc && dma_bits++ < max_dma_bits);
 	if (rc)
 		panic("No suitable physical memory available for SWIOTLB overflow buffer!\n");
-
-	swiotlb_print_info(bytes);
+	if (verbose)
+		swiotlb_print_info();
 }
 
 void __init
-swiotlb_init(void)
+swiotlb_init(int verbose)
 {
 	long ram_end;
 	size_t defsz = 64 * (1 << 20); /* 64MB default size */
@@ -235,7 +238,7 @@ swiotlb_init(void)
 	}
 
 	if (swiotlb)
-		swiotlb_init_with_default_size(defsz);
+		swiotlb_init_with_default_size(defsz, verbose);
 	else
 		printk(KERN_INFO "Software IO TLB disabled\n");
 }
@@ -424,7 +427,7 @@ do_unmap_single(struct device *hwdev, ch
 
 	/*
 	 * Return the buffer to the free list by setting the corresponding
-	 * entries to indicate the number of contigous entries available.
+	 * entries to indicate the number of contiguous entries available.
 	 * While returning the entries to the free list, we merge the entries
 	 * with slots below and above the pool being returned.
 	 */