Blob Blame History Raw
Automatically created from "patch-3.2" by xen-port-patches.py

From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 3.2
Patch-mainline: 3.2

 This patch contains the differences between 3.1 and 3.2.

Acked-by: jbeulich@suse.com

--- 12.2.orig/arch/x86/ia32/ia32entry-xen.S	2011-09-08 16:54:08.000000000 +0200
+++ 12.2/arch/x86/ia32/ia32entry-xen.S	2011-11-17 15:56:06.000000000 +0100
@@ -738,4 +738,6 @@ ia32_sys_call_table:
 	.quad sys_syncfs
 	.quad compat_sys_sendmmsg	/* 345 */
 	.quad sys_setns
+	.quad compat_sys_process_vm_readv
+	.quad compat_sys_process_vm_writev
 ia32_syscall_end:
--- 12.2.orig/arch/x86/include/asm/mach_traps.h	2012-06-20 12:12:04.000000000 +0200
+++ 12.2/arch/x86/include/asm/mach_traps.h	2011-11-28 11:36:37.000000000 +0100
@@ -5,6 +5,8 @@
 #ifndef _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H
 #define _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H
 
+#include <linux/nmi.h>
+#include <asm/delay.h>
 #include <asm/mc146818rtc.h>
 
 #define NMI_REASON_PORT		0x61
@@ -22,6 +24,29 @@ static inline unsigned char default_get_
 	return inb(NMI_REASON_PORT);
 }
 
+static inline void clear_serr_error(unsigned char reason)
+{
+	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
+	outb(reason, NMI_REASON_PORT);
+}
+
+static inline void clear_io_check_error(unsigned char reason)
+{
+	unsigned long i;
+
+	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
+	outb(reason, NMI_REASON_PORT);
+
+	i = 20000;
+	while (--i) {
+		touch_nmi_watchdog();
+		udelay(100);
+	}
+
+	reason &= ~NMI_REASON_CLEAR_IOCHK;
+	outb(reason, NMI_REASON_PORT);
+}
+
 static inline void reassert_nmi(void)
 {
 	int old_reg = -1;
--- 12.2.orig/arch/x86/include/mach-xen/asm/irq_vectors.h	2011-02-15 17:50:13.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/irq_vectors.h	2011-11-17 16:00:04.000000000 +0100
@@ -65,22 +65,13 @@ static inline int invalid_vm86_irq(int i
 		   ? (n) : (1 << (PAGE_SHIFT + 3)) - NR_VECTORS)
 
 #define IO_APIC_VECTOR_LIMIT		PIRQ_MAX(32 * MAX_IO_APICS)
-
-#ifdef CONFIG_SPARSE_IRQ
-# define CPU_VECTOR_LIMIT		PIRQ_MAX(64 * NR_CPUS)
-#else
-# define CPU_VECTOR_LIMIT		PIRQ_MAX(32 * NR_CPUS)
-#endif
+#define CPU_VECTOR_LIMIT		PIRQ_MAX(64 * NR_CPUS)
 
 #if defined(CONFIG_X86_IO_APIC)
-# ifdef CONFIG_SPARSE_IRQ
-#  define NR_PIRQS			(NR_VECTORS + IO_APIC_VECTOR_LIMIT)
-# else
-#  define NR_PIRQS					\
-	(CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ?	\
+# define NR_PIRQS					\
+	(CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ?	\
 		(NR_VECTORS + CPU_VECTOR_LIMIT)  :	\
 		(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
-# endif
 #elif defined(CONFIG_XEN_PCIDEV_FRONTEND)
 # define NR_PIRQS			(NR_VECTORS + CPU_VECTOR_LIMIT)
 #else /* !CONFIG_X86_IO_APIC: */
--- 12.2.orig/arch/x86/include/mach-xen/asm/mach_traps.h	2011-02-07 12:21:00.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/mach_traps.h	2011-11-28 10:08:44.000000000 +0100
@@ -16,7 +16,7 @@
 static inline void clear_serr_error(unsigned char reason) {}
 static inline void clear_io_check_error(unsigned char reason) {}
 
-static inline unsigned char get_nmi_reason(void)
+static inline unsigned char xen_get_nmi_reason(void)
 {
 	shared_info_t *s = HYPERVISOR_shared_info;
 	unsigned char reason = 0;
--- 12.2.orig/arch/x86/include/mach-xen/asm/processor.h	2011-09-08 16:54:08.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/processor.h	2011-11-17 16:53:30.000000000 +0100
@@ -123,6 +123,9 @@ struct cpuinfo_x86 {
 	/* Index into per_cpu list: */
 	u16			cpu_index;
 #endif
+#ifndef CONFIG_XEN
+	u32			microcode;
+#endif
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define X86_VENDOR_INTEL	0
@@ -190,7 +193,8 @@ static inline void xen_cpuid(unsigned in
 	      "=b" (*ebx),
 	      "=c" (*ecx),
 	      "=d" (*edx)
-	    : "0" (*eax), "2" (*ecx));
+	    : "0" (*eax), "2" (*ecx)
+	    : "memory");
 }
 
 static inline void load_cr3(pgd_t *pgdir)
--- 12.2.orig/arch/x86/include/mach-xen/asm/spinlock.h	2012-04-03 08:29:07.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/spinlock.h	2012-02-01 09:13:39.000000000 +0100
@@ -48,14 +48,15 @@
 int xen_spinlock_init(unsigned int cpu);
 void xen_spinlock_cleanup(unsigned int cpu);
 #if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
-unsigned int xen_spin_adjust(const arch_spinlock_t *, unsigned int token);
+struct __raw_tickets xen_spin_adjust(const arch_spinlock_t *,
+				     struct __raw_tickets);
 #else
-#define xen_spin_adjust(lock, token) (token)
+#define xen_spin_adjust(lock, raw_tickets) (raw_tickets)
 #define xen_spin_wait(l, t, f) xen_spin_wait(l, t)
 #endif
-bool xen_spin_wait(arch_spinlock_t *, unsigned int *token,
+bool xen_spin_wait(arch_spinlock_t *, struct __raw_tickets *,
 		   unsigned int flags);
-void xen_spin_kick(const arch_spinlock_t *, unsigned int token);
+void xen_spin_kick(const arch_spinlock_t *, unsigned int ticket);
 
 /*
  * Ticket locks are conceptually two parts, one indicating the current head of
@@ -69,146 +70,29 @@ void xen_spin_kick(const arch_spinlock_t
  * issues and should be optimal for the uncontended case. Note the tail must be
  * in the high part, because a wide xadd increment of the low part would carry
  * up and contaminate the high part.
- *
- * With fewer than 2^8 possible CPUs, we can use x86's partial registers to
- * save some instructions and make the code more elegant. There really isn't
- * much between them in performance though, especially as locks are out of line.
  */
-#if TICKET_SHIFT == 8
-#define __ticket_spin_lock_preamble \
-	asm(LOCK_PREFIX "xaddw %w0, %2\n\t" \
-	    "cmpb %h0, %b0\n\t" \
-	    "sete %1" \
-	    : "=&Q" (token), "=qm" (free), "+m" (lock->slock) \
-	    : "0" (0x0100) \
-	    : "memory", "cc")
-#define __ticket_spin_lock_body \
-	asm("1:\t" \
-	    "cmpb %h0, %b0\n\t" \
-	    "je 2f\n\t" \
-	    "decl %1\n\t" \
-	    "jz 2f\n\t" \
-	    "rep ; nop\n\t" \
-	    "movb %2, %b0\n\t" \
-	    /* don't need lfence here, because loads are in-order */ \
-	    "jmp 1b\n" \
-	    "2:" \
-	    : "+Q" (token), "+g" (count) \
-	    : "m" (lock->slock) \
-	    : "memory", "cc")
-#define __ticket_spin_unlock_body \
-	asm(UNLOCK_LOCK_PREFIX "incb %2\n\t" \
-	    "movzwl %2, %0\n\t" \
-	    "cmpb %h0, %b0\n\t" \
-	    "setne %1" \
-	    : "=&Q" (token), "=qm" (kick), "+m" (lock->slock) \
-	    : \
-	    : "memory", "cc")
-
-static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
-{
-	int tmp, new;
-
-	asm("movzwl %2, %0\n\t"
-	    "cmpb %h0, %b0\n\t"
-	    "leal 0x100(%" REG_PTR_MODE "0), %1\n\t"
-	    "jne 1f\n\t"
-	    LOCK_PREFIX "cmpxchgw %w1, %2\n\t"
-	    "1:\t"
-	    "sete %b1\n\t"
-	    "movzbl %b1, %0\n\t"
-	    : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
-	    :
-	    : "memory", "cc");
-
-	return tmp;
-}
-#elif TICKET_SHIFT == 16
-#define __ticket_spin_lock_preamble \
-	do { \
-		unsigned int tmp; \
-		asm(LOCK_PREFIX "xaddl %0, %2\n\t" \
-		    "shldl $16, %0, %3\n\t" \
-		    "cmpw %w3, %w0\n\t" \
-		    "sete %1" \
-		    : "=&r" (token), "=qm" (free), "+m" (lock->slock), \
-		      "=&g" (tmp) \
-		    : "0" (0x00010000) \
-		    : "memory", "cc"); \
-	} while (0)
-#define __ticket_spin_lock_body \
-	do { \
-		unsigned int tmp; \
-		asm("shldl $16, %0, %2\n" \
-		    "1:\t" \
-		    "cmpw %w2, %w0\n\t" \
-		    "je 2f\n\t" \
-		    "decl %1\n\t" \
-		    "jz 2f\n\t" \
-		    "rep ; nop\n\t" \
-		    "movw %3, %w0\n\t" \
-		    /* don't need lfence here, because loads are in-order */ \
-		    "jmp 1b\n" \
-		    "2:" \
-		    : "+r" (token), "+g" (count), "=&g" (tmp) \
-		    : "m" (lock->slock) \
-		    : "memory", "cc"); \
-	} while (0)
-#define __ticket_spin_unlock_body \
-	do { \
-		unsigned int tmp; \
-		asm(UNLOCK_LOCK_PREFIX "incw %2\n\t" \
-		    "movl %2, %0\n\t" \
-		    "shldl $16, %0, %3\n\t" \
-		    "cmpw %w3, %w0\n\t" \
-		    "setne %1" \
-		    : "=&r" (token), "=qm" (kick), "+m" (lock->slock), \
-		      "=&r" (tmp) \
-		    : \
-		    : "memory", "cc"); \
-	} while (0)
-
-static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
-{
-	int tmp;
-	int new;
-
-	asm("movl %2, %0\n\t"
-	    "movl %0, %1\n\t"
-	    "roll $16, %0\n\t"
-	    "cmpl %0, %1\n\t"
-	    "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t"
-	    "jne 1f\n\t"
-	    LOCK_PREFIX "cmpxchgl %1, %2\n"
-	    "1:\t"
-	    "sete %b1\n\t"
-	    "movzbl %b1, %0\n\t"
-	    : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
-	    :
-	    : "memory", "cc");
-
-	return tmp;
-}
-#endif
-
 #if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
 static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
 {
-	unsigned int token, count;
-	unsigned int flags = arch_local_irq_save();
-	bool free;
+	struct __raw_tickets inc = { .tail = 1 };
+	unsigned int count, flags = arch_local_irq_save();
 
-	__ticket_spin_lock_preamble;
-	if (likely(free)) {
+	inc = xadd(&lock->tickets, inc);
+	if (likely(inc.head == inc.tail)) {
 		arch_local_irq_restore(flags);
 		return;
 	}
-	token = xen_spin_adjust(lock, token);
+	inc = xen_spin_adjust(lock, inc);
 	arch_local_irq_restore(flags);
+
 	do {
 		count = 1 << 12;
-		__ticket_spin_lock_body;
-	} while (unlikely(!count) && !xen_spin_wait(lock, &token, flags));
+		while (inc.head != inc.tail && --count) {
+			cpu_relax();
+			inc.head = ACCESS_ONCE(lock->tickets.head);
+		}
+	} while (unlikely(!count) && !xen_spin_wait(lock, &inc, flags));
+	barrier();		/* make sure nothing creeps before the lock is taken */
 }
 #else
 #define __ticket_spin_lock(lock) __ticket_spin_lock_flags(lock, -1)
@@ -217,47 +101,74 @@ static __always_inline void __ticket_spi
 static __always_inline void __ticket_spin_lock_flags(arch_spinlock_t *lock,
 						     unsigned long flags)
 {
-	unsigned int token, count;
-	bool free;
+	struct __raw_tickets inc = { .tail = 1 };
+	unsigned int count;
 
-	__ticket_spin_lock_preamble;
-	if (likely(free))
+	inc = xadd(&lock->tickets, inc);
+	if (likely(inc.head == inc.tail))
 		return;
-	token = xen_spin_adjust(lock, token);
+	inc = xen_spin_adjust(lock, inc);
+
 	do {
 		count = 1 << 12;
-		__ticket_spin_lock_body;
-	} while (unlikely(!count) && !xen_spin_wait(lock, &token, flags));
+		while (inc.head != inc.tail && --count) {
+			cpu_relax();
+			inc.head = ACCESS_ONCE(lock->tickets.head);
+		}
+	} while (unlikely(!count) && !xen_spin_wait(lock, &inc, flags));
+	barrier();		/* make sure nothing creeps before the lock is taken */
 }
 
-static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
+static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
 {
-	unsigned int token;
-	bool kick;
+	arch_spinlock_t old, new;
+
+	old.tickets = ACCESS_ONCE(lock->tickets);
+	if (old.tickets.head != old.tickets.tail)
+		return 0;
 
-	__ticket_spin_unlock_body;
-	if (kick)
-		xen_spin_kick(lock, token);
+	new.head_tail = old.head_tail + (1 << TICKET_SHIFT);
+
+	/* cmpxchg is a full barrier, so nothing can move before it */
+	return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
 }
 
+static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
+{
+	register struct __raw_tickets new;
+
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
+# define UNLOCK_SUFFIX(n) "%z" #n
+#elif TICKET_SHIFT == 8
+# define UNLOCK_SUFFIX(n) "b"
+#elif TICKET_SHIFT == 16
+# define UNLOCK_SUFFIX(n) "w"
+#endif
+	asm volatile(UNLOCK_LOCK_PREFIX "inc" UNLOCK_SUFFIX(0) " %0"
+		     : "+m" (lock->tickets.head)
+		     :
+		     : "memory", "cc");
 #if !defined(XEN_SPINLOCK_SOURCE) || !CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
-#undef __ticket_spin_lock_preamble
-#undef __ticket_spin_lock_body
-#undef __ticket_spin_unlock_body
+# undef UNLOCK_SUFFIX
+# undef UNLOCK_LOCK_PREFIX
 #endif
+	new = ACCESS_ONCE(lock->tickets);
+	if (new.head != new.tail)
+		xen_spin_kick(lock, new.head);
+}
 
 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
 {
-	int tmp = ACCESS_ONCE(lock->slock);
+	struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
 
-	return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
+	return tmp.tail != tmp.head;
 }
 
 static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
 {
-	int tmp = ACCESS_ONCE(lock->slock);
+	struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
 
-	return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
+	return (__ticket_t)(tmp.tail - tmp.head) > 1;
 }
 
 #define __arch_spin(n) __ticket_spin_##n
--- 12.2.orig/arch/x86/include/mach-xen/asm/spinlock_types.h	2012-01-31 18:19:49.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/spinlock_types.h	2012-01-26 13:51:28.000000000 +0100
@@ -5,11 +5,8 @@
 # error "please don't include this file directly"
 #endif
 
-#include <asm/types.h>
+#include <linux/types.h>
 
-typedef union {
-	unsigned int slock;
-	struct {
 #ifdef CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
 /*
  * On Xen we support CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING levels of
@@ -19,13 +16,22 @@ typedef union {
  * a sufficiently smaller number of CPUs.
  */
 #if (CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING + 1) * CONFIG_NR_CPUS < 256
+typedef u8  __ticket_t;
 # define TICKET_SHIFT 8
-		u8 cur, seq;
+typedef u16 __ticketpair_t;
 #else
+typedef u16 __ticket_t;
 # define TICKET_SHIFT 16
-		u16 cur, seq;
+typedef u32 __ticketpair_t;
 #endif
+
+typedef union {
+	__ticketpair_t head_tail;
+	struct __raw_tickets {
+		__ticket_t head, tail;
+	} tickets;
 #else /* ndef CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING */
+typedef struct {
 /*
  * This differs from the pre-2.6.24 spinlock by always using xchgb
  * rather than decb to take the lock; this allows it to use a
@@ -33,14 +39,13 @@ typedef union {
  * contention counter, so that we can implement
  * __byte_spin_is_contended.
  */
-		u8 lock;
+	u8 lock;
 #if CONFIG_NR_CPUS < 256
-		u8 spinners;
+	u8 spinners;
 #else
 # error NR_CPUS >= 256 not implemented
 #endif
 #endif /* def CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING */
-	};
 } arch_spinlock_t;
 
 #define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
--- 12.2.orig/arch/x86/kernel/apic/io_apic-xen.c	2011-09-08 16:54:08.000000000 +0200
+++ 12.2/arch/x86/kernel/apic/io_apic-xen.c	2011-11-28 10:08:44.000000000 +0100
@@ -104,21 +104,21 @@ static struct ioapic {
 	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
 } ioapics[MAX_IO_APICS];
 
-#define mpc_ioapic_ver(id)		ioapics[id].mp_config.apicver
+#define mpc_ioapic_ver(ioapic_idx)	ioapics[ioapic_idx].mp_config.apicver
 
-int mpc_ioapic_id(int id)
+int mpc_ioapic_id(int ioapic_idx)
 {
-	return ioapics[id].mp_config.apicid;
+	return ioapics[ioapic_idx].mp_config.apicid;
 }
 
-unsigned int mpc_ioapic_addr(int id)
+unsigned int mpc_ioapic_addr(int ioapic_idx)
 {
-	return ioapics[id].mp_config.apicaddr;
+	return ioapics[ioapic_idx].mp_config.apicaddr;
 }
 
-struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int id)
+struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
 {
-	return &ioapics[id].gsi_config;
+	return &ioapics[ioapic_idx].gsi_config;
 }
 
 int nr_ioapics;
@@ -201,21 +201,15 @@ static struct irq_pin_list *alloc_irq_pi
 
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-#ifdef CONFIG_SPARSE_IRQ
 static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
-#else
-static struct irq_cfg irq_cfgx[NR_IRQS];
-#endif
 
 int __init arch_early_irq_init(void)
 {
 	struct irq_cfg *cfg;
 	int count, node, i;
 
-	if (!legacy_pic->nr_legacy_irqs) {
-		nr_irqs_gsi = 0;
+	if (!legacy_pic->nr_legacy_irqs)
 		io_apic_irqs = ~0UL;
-	}
 
 	for (i = 0; i < nr_ioapics; i++) {
 		ioapics[i].saved_registers =
@@ -249,7 +243,6 @@ int __init arch_early_irq_init(void)
 	return 0;
 }
 
-#ifdef CONFIG_SPARSE_IRQ
 static struct irq_cfg *irq_cfg(unsigned int irq)
 {
 	return irq_get_chip_data(irq);
@@ -284,22 +277,6 @@ static void free_irq_cfg(unsigned int at
 	kfree(cfg);
 }
 
-#else
-
-struct irq_cfg *irq_cfg(unsigned int irq)
-{
-	return irq < nr_irqs ? irq_cfgx + irq : NULL;
-}
-
-static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
-{
-	return irq_cfgx + irq;
-}
-
-static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { }
-
-#endif
-
 static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
 {
 	int res = irq_alloc_desc_at(at, node);
@@ -436,13 +413,21 @@ union entry_union {
 };
 
 #ifndef CONFIG_XEN
+static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
+{
+	union entry_union eu;
+
+	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+	return eu.entry;
+}
+
 static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
 {
 	union entry_union eu;
 	unsigned long flags;
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
-	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+	eu.entry = __ioapic_read_entry(apic, pin);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 	return eu.entry;
 }
@@ -573,18 +558,6 @@ static void io_apic_modify_irq(struct ir
 		__io_apic_modify_irq(entry, mask_and, mask_or, final);
 }
 
-static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
-{
-	__io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
-			     IO_APIC_REDIR_MASKED, NULL);
-}
-
-static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
-{
-	__io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
-			     IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
-}
-
 static void io_apic_sync(struct irq_pin_list *entry)
 {
 	/*
@@ -629,6 +602,66 @@ static void unmask_ioapic_irq(struct irq
 	unmask_ioapic(data->chip_data);
 }
 
+/*
+ * IO-APIC versions below 0x20 don't support EOI register.
+ * For the record, here is the information about various versions:
+ *     0Xh     82489DX
+ *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
+ *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
+ *     30h-FFh Reserved
+ *
+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
+ * version as 0x2. This is an error with documentation and these ICH chips
+ * use io-apic's of version 0x20.
+ *
+ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
+ * Otherwise, we simulate the EOI message manually by changing the trigger
+ * mode to edge and then back to level, with RTE being masked during this.
+ */
+static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
+{
+	if (mpc_ioapic_ver(apic) >= 0x20) {
+		/*
+		 * Intr-remapping uses pin number as the virtual vector
+		 * in the RTE. Actual vector is programmed in
+		 * intr-remapping table entry. Hence for the io-apic
+		 * EOI we use the pin number.
+		 */
+		if (cfg && irq_remapped(cfg))
+			io_apic_eoi(apic, pin);
+		else
+			io_apic_eoi(apic, vector);
+	} else {
+		struct IO_APIC_route_entry entry, entry1;
+
+		entry = entry1 = __ioapic_read_entry(apic, pin);
+
+		/*
+		 * Mask the entry and change the trigger mode to edge.
+		 */
+		entry1.mask = 1;
+		entry1.trigger = IOAPIC_EDGE;
+
+		__ioapic_write_entry(apic, pin, entry1);
+
+		/*
+		 * Restore the previous level triggered entry.
+		 */
+		__ioapic_write_entry(apic, pin, entry);
+	}
+}
+
+static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+	struct irq_pin_list *entry;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	for_each_irq_pin(entry, cfg->irq_2_pin)
+		__eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 {
 	struct IO_APIC_route_entry entry;
@@ -637,10 +670,44 @@ static void clear_IO_APIC_pin(unsigned i
 	entry = ioapic_read_entry(apic, pin);
 	if (entry.delivery_mode == dest_SMI)
 		return;
+
+	/*
+	 * Make sure the entry is masked and re-read the contents to check
+	 * if it is a level triggered pin and if the remote-IRR is set.
+	 */
+	if (!entry.mask) {
+		entry.mask = 1;
+		ioapic_write_entry(apic, pin, entry);
+		entry = ioapic_read_entry(apic, pin);
+	}
+
+	if (entry.irr) {
+		unsigned long flags;
+
+		/*
+		 * Make sure the trigger mode is set to level. Explicit EOI
+		 * doesn't clear the remote-IRR if the trigger mode is not
+		 * set to level.
+		 */
+		if (!entry.trigger) {
+			entry.trigger = IOAPIC_LEVEL;
+			ioapic_write_entry(apic, pin, entry);
+		}
+
+		raw_spin_lock_irqsave(&ioapic_lock, flags);
+		__eoi_ioapic_pin(apic, pin, entry.vector, NULL);
+		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	}
+
 	/*
-	 * Disable it in the IO-APIC irq-routing table:
+	 * Clear the rest of the bits in the IO-APIC RTE except for the mask
+	 * bit.
 	 */
 	ioapic_mask_entry(apic, pin);
+	entry = ioapic_read_entry(apic, pin);
+	if (entry.irr)
+		printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n",
+		       mpc_ioapic_id(apic), pin);
 }
 
 static void clear_IO_APIC (void)
@@ -762,13 +829,13 @@ int restore_ioapic_entries(void)
 /*
  * Find the IRQ entry number of a certain pin.
  */
-static int find_irq_entry(int apic, int pin, int type)
+static int find_irq_entry(int ioapic_idx, int pin, int type)
 {
 	int i;
 
 	for (i = 0; i < mp_irq_entries; i++)
 		if (mp_irqs[i].irqtype == type &&
-		    (mp_irqs[i].dstapic == mpc_ioapic_id(apic) ||
+		    (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) ||
 		     mp_irqs[i].dstapic == MP_APIC_ALL) &&
 		    mp_irqs[i].dstirq == pin)
 			return i;
@@ -808,12 +875,13 @@ static int __init find_isa_irq_apic(int 
 		    (mp_irqs[i].srcbusirq == irq))
 			break;
 	}
+
 	if (i < mp_irq_entries) {
-		int apic;
-		for(apic = 0; apic < nr_ioapics; apic++) {
-			if (mpc_ioapic_id(apic) == mp_irqs[i].dstapic)
-				return apic;
-		}
+		int ioapic_idx;
+
+		for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+			if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
+				return ioapic_idx;
 	}
 
 	return -1;
@@ -1029,7 +1097,7 @@ static int pin_2_irq(int idx, int apic, 
 int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
 				struct io_apic_irq_attr *irq_attr)
 {
-	int apic, i, best_guess = -1;
+	int ioapic_idx, i, best_guess = -1;
 
 	apic_printk(APIC_DEBUG,
 		    "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
@@ -1042,8 +1110,8 @@ int IO_APIC_get_PCI_irq_vector(int bus, 
 	for (i = 0; i < mp_irq_entries; i++) {
 		int lbus = mp_irqs[i].srcbus;
 
-		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mpc_ioapic_id(apic) == mp_irqs[i].dstapic ||
+		for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+			if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
 			    mp_irqs[i].dstapic == MP_APIC_ALL)
 				break;
 
@@ -1051,13 +1119,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, 
 		    !mp_irqs[i].irqtype &&
 		    (bus == lbus) &&
 		    (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
+			int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq);
 
-			if (!(apic || IO_APIC_IRQ(irq)))
+			if (!(ioapic_idx || IO_APIC_IRQ(irq)))
 				continue;
 
 			if (pin == (mp_irqs[i].srcbusirq & 3)) {
-				set_io_apic_irq_attr(irq_attr, apic,
+				set_io_apic_irq_attr(irq_attr, ioapic_idx,
 						     mp_irqs[i].dstirq,
 						     irq_trigger(i),
 						     irq_polarity(i));
@@ -1068,7 +1136,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, 
 			 * best-guess fuzzy result for broken mptables.
 			 */
 			if (best_guess < 0) {
-				set_io_apic_irq_attr(irq_attr, apic,
+				set_io_apic_irq_attr(irq_attr, ioapic_idx,
 						     mp_irqs[i].dstirq,
 						     irq_trigger(i),
 						     irq_polarity(i));
@@ -1259,7 +1327,6 @@ void __setup_vector_irq(int cpu)
 }
 
 static struct irq_chip ioapic_chip;
-static struct irq_chip ir_ioapic_chip;
 
 #ifdef CONFIG_X86_32
 static inline int IO_APIC_irq_trigger(int irq)
@@ -1303,7 +1370,7 @@ static void ioapic_register_intr(unsigne
 
 	if (irq_remapped(cfg)) {
 		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-		chip = &ir_ioapic_chip;
+		irq_remap_modify_chip_defaults(chip);
 		fasteoi = trigger != 0;
 	}
 
@@ -1311,85 +1378,107 @@ static void ioapic_register_intr(unsigne
 	irq_set_chip_and_handler_name(irq, chip, hdl,
 				      fasteoi ? "fasteoi" : "edge");
 }
-#else /* !CONFIG_XEN */
-#define __clear_irq_vector(irq, cfg) ((void)0)
-#define ioapic_register_intr(irq, cfg, trigger) evtchn_register_pirq(irq)
-#endif
 
-static int setup_ioapic_entry(int apic_id, int irq,
-			      struct IO_APIC_route_entry *entry,
-			      unsigned int destination, int trigger,
-			      int polarity, int vector, int pin)
+
+static int setup_ir_ioapic_entry(int irq,
+			      struct IR_IO_APIC_route_entry *entry,
+			      unsigned int destination, int vector,
+			      struct io_apic_irq_attr *attr)
 {
-	/*
-	 * add it to the IO-APIC irq-routing table:
-	 */
-	memset(entry,0,sizeof(*entry));
+	int index;
+	struct irte irte;
+	int ioapic_id = mpc_ioapic_id(attr->ioapic);
+	struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id);
 
-#ifndef CONFIG_XEN
-	if (intr_remapping_enabled) {
-		struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
-		struct irte irte;
-		struct IR_IO_APIC_route_entry *ir_entry =
-			(struct IR_IO_APIC_route_entry *) entry;
-		int index;
+	if (!iommu) {
+		pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
+		return -ENODEV;
+	}
 
-		if (!iommu)
-			panic("No mapping iommu for ioapic %d\n", apic_id);
+	index = alloc_irte(iommu, irq, 1);
+	if (index < 0) {
+		pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id);
+		return -ENOMEM;
+	}
 
-		index = alloc_irte(iommu, irq, 1);
-		if (index < 0)
-			panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
+	prepare_irte(&irte, vector, destination);
 
-		prepare_irte(&irte, vector, destination);
+	/* Set source-id of interrupt request */
+	set_ioapic_sid(&irte, ioapic_id);
 
-		/* Set source-id of interrupt request */
-		set_ioapic_sid(&irte, apic_id);
+	modify_irte(irq, &irte);
 
-		modify_irte(irq, &irte);
+	apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
+		"Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
+		"Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
+		"Avail:%X Vector:%02X Dest:%08X "
+		"SID:%04X SQ:%X SVT:%X)\n",
+		attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
+		irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
+		irte.avail, irte.vector, irte.dest_id,
+		irte.sid, irte.sq, irte.svt);
+
+	memset(entry, 0, sizeof(*entry));
+
+	entry->index2	= (index >> 15) & 0x1;
+	entry->zero	= 0;
+	entry->format	= 1;
+	entry->index	= (index & 0x7fff);
+	/*
+	 * IO-APIC RTE will be configured with virtual vector.
+	 * irq handler will do the explicit EOI to the io-apic.
+	 */
+	entry->vector	= attr->ioapic_pin;
+	entry->mask	= 0;			/* enable IRQ */
+	entry->trigger	= attr->trigger;
+	entry->polarity	= attr->polarity;
 
-		ir_entry->index2 = (index >> 15) & 0x1;
-		ir_entry->zero = 0;
-		ir_entry->format = 1;
-		ir_entry->index = (index & 0x7fff);
-		/*
-		 * IO-APIC RTE will be configured with virtual vector.
-		 * irq handler will do the explicit EOI to the io-apic.
-		 */
-		ir_entry->vector = pin;
+	/* Mask level triggered irqs.
+	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+	 */
+	if (attr->trigger)
+		entry->mask = 1;
 
-		apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
-			"Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
-			"Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
-			"Avail:%X Vector:%02X Dest:%08X "
-			"SID:%04X SQ:%X SVT:%X)\n",
-			apic_id, irte.present, irte.fpd, irte.dst_mode,
-			irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
-			irte.avail, irte.vector, irte.dest_id,
-			irte.sid, irte.sq, irte.svt);
-	} else
+	return 0;
+}
+#else /* !CONFIG_XEN */
+#define __clear_irq_vector(irq, cfg) ((void)0)
+#define ioapic_register_intr(irq, cfg, trigger) evtchn_register_pirq(irq)
 #endif
-	{
-		entry->delivery_mode = apic->irq_delivery_mode;
-		entry->dest_mode = apic->irq_dest_mode;
-		entry->dest = destination;
-		entry->vector = vector;
-	}
 
-	entry->mask = 0;				/* enable IRQ */
-	entry->trigger = trigger;
-	entry->polarity = polarity;
+static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
+			       unsigned int destination, int vector,
+			       struct io_apic_irq_attr *attr)
+{
+#ifndef CONFIG_XEN
+	if (intr_remapping_enabled)
+		return setup_ir_ioapic_entry(irq,
+			 (struct IR_IO_APIC_route_entry *)entry,
+			 destination, vector, attr);
+#endif
 
-	/* Mask level triggered irqs.
+	memset(entry, 0, sizeof(*entry));
+
+	entry->delivery_mode = apic->irq_delivery_mode;
+	entry->dest_mode     = apic->irq_dest_mode;
+	entry->dest	     = destination;
+	entry->vector	     = vector;
+	entry->mask	     = 0;			/* enable IRQ */
+	entry->trigger	     = attr->trigger;
+	entry->polarity	     = attr->polarity;
+
+	/*
+	 * Mask level triggered irqs.
 	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
 	 */
-	if (trigger)
+	if (attr->trigger)
 		entry->mask = 1;
+
 	return 0;
 }
 
-static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
-			     struct irq_cfg *cfg, int trigger, int polarity)
+static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
+				struct io_apic_irq_attr *attr)
 {
 	struct IO_APIC_route_entry entry;
 	unsigned int dest;
@@ -1418,51 +1507,50 @@ static void setup_ioapic_irq(int apic_id
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
 		    "IRQ %d Mode:%i Active:%i Dest:%d)\n",
-		    apic_id, mpc_ioapic_id(apic_id), pin, cfg->vector,
-		    irq, trigger, polarity, dest);
+		    attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
+		    cfg->vector, irq, attr->trigger, attr->polarity, dest);
 
-
-	if (setup_ioapic_entry(mpc_ioapic_id(apic_id), irq, &entry,
-			       dest, trigger, polarity, cfg->vector, pin)) {
-		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
-		       mpc_ioapic_id(apic_id), pin);
+	if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) {
+		pr_warn("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
 		__clear_irq_vector(irq, cfg);
+
 		return;
 	}
 
-	ioapic_register_intr(irq, cfg, trigger);
+	ioapic_register_intr(irq, cfg, attr->trigger);
 #ifndef CONFIG_XEN
 	if (irq < legacy_pic->nr_legacy_irqs)
 		legacy_pic->mask(irq);
 #endif
 
-	ioapic_write_entry(apic_id, pin, entry);
+	ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry);
 }
 
-static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin)
+static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin)
 {
 	if (idx != -1)
 		return false;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
-		    mpc_ioapic_id(apic_id), pin);
+		    mpc_ioapic_id(ioapic_idx), pin);
 	return true;
 }
 
-static void __init __io_apic_setup_irqs(unsigned int apic_id)
+static void __init __io_apic_setup_irqs(unsigned int ioapic_idx)
 {
 	int idx, node = cpu_to_node(0);
 	struct io_apic_irq_attr attr;
 	unsigned int pin, irq;
 
-	for (pin = 0; pin < ioapics[apic_id].nr_registers; pin++) {
-		idx = find_irq_entry(apic_id, pin, mp_INT);
-		if (io_apic_pin_not_connected(idx, apic_id, pin))
+	for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) {
+		idx = find_irq_entry(ioapic_idx, pin, mp_INT);
+		if (io_apic_pin_not_connected(idx, ioapic_idx, pin))
 			continue;
 
-		irq = pin_2_irq(idx, apic_id, pin);
+		irq = pin_2_irq(idx, ioapic_idx, pin);
 
-		if ((apic_id > 0) && (irq > 16))
+		if ((ioapic_idx > 0) && (irq > 16))
 			continue;
 
 #ifdef CONFIG_XEN
@@ -1474,11 +1562,11 @@ static void __init __io_apic_setup_irqs(
 		 * installed and if it returns 1:
 		 */
 		if (apic->multi_timer_check &&
-		    apic->multi_timer_check(apic_id, irq))
+		    apic->multi_timer_check(ioapic_idx, irq))
 			continue;
 #endif
 
-		set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
+		set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
 				     irq_polarity(idx));
 
 		io_apic_setup_irq_pin(irq, node, &attr);
@@ -1487,12 +1575,12 @@ static void __init __io_apic_setup_irqs(
 
 static void __init setup_IO_APIC_irqs(void)
 {
-	unsigned int apic_id;
+	unsigned int ioapic_idx;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
-	for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
-		__io_apic_setup_irqs(apic_id);
+	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+		__io_apic_setup_irqs(ioapic_idx);
 }
 
 /*
@@ -1502,32 +1590,32 @@ static void __init setup_IO_APIC_irqs(vo
  */
 void setup_IO_APIC_irq_extra(u32 gsi)
 {
-	int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
+	int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0);
 	struct io_apic_irq_attr attr;
 
 	/*
 	 * Convert 'gsi' to 'ioapic.pin'.
 	 */
-	apic_id = mp_find_ioapic(gsi);
-	if (apic_id < 0)
+	ioapic_idx = mp_find_ioapic(gsi);
+	if (ioapic_idx < 0)
 		return;
 
-	pin = mp_find_ioapic_pin(apic_id, gsi);
-	idx = find_irq_entry(apic_id, pin, mp_INT);
+	pin = mp_find_ioapic_pin(ioapic_idx, gsi);
+	idx = find_irq_entry(ioapic_idx, pin, mp_INT);
 	if (idx == -1)
 		return;
 
-	irq = pin_2_irq(idx, apic_id, pin);
+	irq = pin_2_irq(idx, ioapic_idx, pin);
 #ifdef CONFIG_XEN
 	if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
 		return;
 #endif
 
 	/* Only handle the non legacy irqs on secondary ioapics */
-	if (apic_id == 0 || irq < NR_IRQS_LEGACY)
+	if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY)
 		return;
 
-	set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
+	set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
 			     irq_polarity(idx));
 
 	io_apic_setup_irq_pin_once(irq, node, &attr);
@@ -1537,8 +1625,8 @@ void setup_IO_APIC_irq_extra(u32 gsi)
 /*
  * Set up the timer pin, possibly with the 8259A-master behind.
  */
-static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
-					int vector)
+static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
+					 unsigned int pin, int vector)
 {
 	struct IO_APIC_route_entry entry;
 
@@ -1569,45 +1657,29 @@ static void __init setup_timer_IRQ0_pin(
 	/*
 	 * Add it to the IO-APIC irq-routing table:
 	 */
-	ioapic_write_entry(apic_id, pin, entry);
+	ioapic_write_entry(ioapic_idx, pin, entry);
 }
 
-
-__apicdebuginit(void) print_IO_APIC(void)
+__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
 {
-	int apic, i;
+	int i;
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_01 reg_01;
 	union IO_APIC_reg_02 reg_02;
 	union IO_APIC_reg_03 reg_03;
 	unsigned long flags;
-	struct irq_cfg *cfg;
-	unsigned int irq;
-
-	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
-	for (i = 0; i < nr_ioapics; i++)
-		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-		       mpc_ioapic_id(i), ioapics[i].nr_registers);
-
-	/*
-	 * We are a bit conservative about what we expect.  We have to
-	 * know about every hardware change ASAP.
-	 */
-	printk(KERN_INFO "testing the IO APIC.......................\n");
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(apic, 0);
-	reg_01.raw = io_apic_read(apic, 1);
+	reg_00.raw = io_apic_read(ioapic_idx, 0);
+	reg_01.raw = io_apic_read(ioapic_idx, 1);
 	if (reg_01.bits.version >= 0x10)
-		reg_02.raw = io_apic_read(apic, 2);
+		reg_02.raw = io_apic_read(ioapic_idx, 2);
 	if (reg_01.bits.version >= 0x20)
-		reg_03.raw = io_apic_read(apic, 3);
+		reg_03.raw = io_apic_read(ioapic_idx, 3);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	printk("\n");
-	printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(apic));
+	printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
 	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -1657,7 +1729,7 @@ __apicdebuginit(void) print_IO_APIC(void
 			struct IO_APIC_route_entry entry;
 			struct IR_IO_APIC_route_entry *ir_entry;
 
-			entry = ioapic_read_entry(apic, i);
+			entry = ioapic_read_entry(ioapic_idx, i);
 			ir_entry = (struct IR_IO_APIC_route_entry *) &entry;
 			printk(KERN_DEBUG " %02x %04X ",
 				i,
@@ -1678,7 +1750,7 @@ __apicdebuginit(void) print_IO_APIC(void
 		} else {
 			struct IO_APIC_route_entry entry;
 
-			entry = ioapic_read_entry(apic, i);
+			entry = ioapic_read_entry(ioapic_idx, i);
 			printk(KERN_DEBUG " %02x %02X  ",
 				i,
 				entry.dest
@@ -1696,12 +1768,38 @@ __apicdebuginit(void) print_IO_APIC(void
 			);
 		}
 	}
-	}
+}
+
+__apicdebuginit(void) print_IO_APICs(void)
+{
+	int ioapic_idx;
+	struct irq_cfg *cfg;
+	unsigned int irq;
+	struct irq_chip *chip;
+
+	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+		       mpc_ioapic_id(ioapic_idx),
+		       ioapics[ioapic_idx].nr_registers);
+
+	/*
+	 * We are a bit conservative about what we expect.  We have to
+	 * know about every hardware change ASAP.
+	 */
+	printk(KERN_INFO "testing the IO APIC.......................\n");
+
+	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+		print_IO_APIC(ioapic_idx);
 
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
 	for_each_active_irq(irq) {
 		struct irq_pin_list *entry;
 
+		chip = irq_get_chip(irq);
+		if (chip != &ioapic_chip)
+			continue;
+
 		cfg = irq_get_chip_data(irq);
 		if (!cfg)
 			continue;
@@ -1715,8 +1813,6 @@ __apicdebuginit(void) print_IO_APIC(void
 	}
 
 	printk(KERN_INFO ".................................... done.\n");
-
-	return;
 }
 
 __apicdebuginit(void) print_APIC_field(int base)
@@ -1910,7 +2006,7 @@ __apicdebuginit(int) print_ICs(void)
 		return 0;
 
 	print_local_APICs(show_lapic);
-	print_IO_APIC();
+	print_IO_APICs();
 
 	return 0;
 }
@@ -2035,7 +2131,7 @@ void __init setup_ioapic_ids_from_mpc_no
 {
 	union IO_APIC_reg_00 reg_00;
 	physid_mask_t phys_id_present_map;
-	int apic_id;
+	int ioapic_idx;
 	int i;
 	unsigned char old_id;
 	unsigned long flags;
@@ -2049,21 +2145,20 @@ void __init setup_ioapic_ids_from_mpc_no
 	/*
 	 * Set the IOAPIC ID to the value stored in the MPC table.
 	 */
-	for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
-
+	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
 		/* Read the register 0 value */
 		raw_spin_lock_irqsave(&ioapic_lock, flags);
-		reg_00.raw = io_apic_read(apic_id, 0);
+		reg_00.raw = io_apic_read(ioapic_idx, 0);
 		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
-		old_id = mpc_ioapic_id(apic_id);
+		old_id = mpc_ioapic_id(ioapic_idx);
 
-		if (mpc_ioapic_id(apic_id) >= get_physical_broadcast()) {
+		if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) {
 			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
-				apic_id, mpc_ioapic_id(apic_id));
+				ioapic_idx, mpc_ioapic_id(ioapic_idx));
 			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 				reg_00.bits.ID);
-			ioapics[apic_id].mp_config.apicid = reg_00.bits.ID;
+			ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID;
 		}
 
 		/*
@@ -2072,9 +2167,9 @@ void __init setup_ioapic_ids_from_mpc_no
 		 * 'stuck on smp_invalidate_needed IPI wait' messages.
 		 */
 		if (apic->check_apicid_used(&phys_id_present_map,
-					    mpc_ioapic_id(apic_id))) {
+					    mpc_ioapic_id(ioapic_idx))) {
 			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
-				apic_id, mpc_ioapic_id(apic_id));
+				ioapic_idx, mpc_ioapic_id(ioapic_idx));
 			for (i = 0; i < get_physical_broadcast(); i++)
 				if (!physid_isset(i, phys_id_present_map))
 					break;
@@ -2083,14 +2178,14 @@ void __init setup_ioapic_ids_from_mpc_no
 			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 				i);
 			physid_set(i, phys_id_present_map);
-			ioapics[apic_id].mp_config.apicid = i;
+			ioapics[ioapic_idx].mp_config.apicid = i;
 		} else {
 			physid_mask_t tmp;
-			apic->apicid_to_cpu_present(mpc_ioapic_id(apic_id),
+			apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx),
 						    &tmp);
 			apic_printk(APIC_VERBOSE, "Setting %d in the "
 					"phys_id_present_map\n",
-					mpc_ioapic_id(apic_id));
+					mpc_ioapic_id(ioapic_idx));
 			physids_or(phys_id_present_map, phys_id_present_map, tmp);
 		}
 
@@ -2098,35 +2193,35 @@ void __init setup_ioapic_ids_from_mpc_no
 		 * We need to adjust the IRQ routing table
 		 * if the ID changed.
 		 */
-		if (old_id != mpc_ioapic_id(apic_id))
+		if (old_id != mpc_ioapic_id(ioapic_idx))
 			for (i = 0; i < mp_irq_entries; i++)
 				if (mp_irqs[i].dstapic == old_id)
 					mp_irqs[i].dstapic
-						= mpc_ioapic_id(apic_id);
+						= mpc_ioapic_id(ioapic_idx);
 
 		/*
 		 * Update the ID register according to the right value
 		 * from the MPC table if they are different.
 		 */
-		if (mpc_ioapic_id(apic_id) == reg_00.bits.ID)
+		if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID)
 			continue;
 
 		apic_printk(APIC_VERBOSE, KERN_INFO
 			"...changing IO-APIC physical APIC ID to %d ...",
-			mpc_ioapic_id(apic_id));
+			mpc_ioapic_id(ioapic_idx));
 
-		reg_00.bits.ID = mpc_ioapic_id(apic_id);
+		reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
 		raw_spin_lock_irqsave(&ioapic_lock, flags);
-		io_apic_write(apic_id, 0, reg_00.raw);
+		io_apic_write(ioapic_idx, 0, reg_00.raw);
 		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 		/*
 		 * Sanity check
 		 */
 		raw_spin_lock_irqsave(&ioapic_lock, flags);
-		reg_00.raw = io_apic_read(apic_id, 0);
+		reg_00.raw = io_apic_read(ioapic_idx, 0);
 		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-		if (reg_00.bits.ID != mpc_ioapic_id(apic_id))
+		if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
 			printk("could not set ID!\n");
 		else
 			apic_printk(APIC_VERBOSE, " ok.\n");
@@ -2337,7 +2432,7 @@ ioapic_set_affinity(struct irq_data *dat
 	return ret;
 }
 
-#ifdef CONFIG_INTR_REMAP
+#ifdef CONFIG_IRQ_REMAP
 
 /*
  * Migrate the IO-APIC irq in the presence of intr-remapping.
@@ -2349,6 +2444,9 @@ ioapic_set_affinity(struct irq_data *dat
  * updated vector information), by using a virtual vector (io-apic pin number).
  * Real vector that is used for interrupting cpu will be coming from
  * the interrupt-remapping table entry.
+ *
+ * As the migration is a simple atomic update of IRTE, the same mechanism
+ * is used to migrate MSI irq's in the presence of interrupt-remapping.
  */
 static int
 ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
@@ -2373,10 +2471,16 @@ ir_ioapic_set_affinity(struct irq_data *
 	irte.dest_id = IRTE_DEST(dest);
 
 	/*
-	 * Modified the IRTE and flushes the Interrupt entry cache.
+	 * Atomically updates the IRTE with the new destination, vector
+	 * and flushes the interrupt entry cache.
 	 */
 	modify_irte(irq, &irte);
 
+	/*
+	 * After this point, all the interrupts will start arriving
+	 * at the new destination. So, time to cleanup the previous
+	 * vector allocation.
+	 */
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
 
@@ -2489,48 +2593,6 @@ static void ack_apic_edge(struct irq_dat
 
 atomic_t irq_mis_count;
 
-/*
- * IO-APIC versions below 0x20 don't support EOI register.
- * For the record, here is the information about various versions:
- *     0Xh     82489DX
- *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
- *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
- *     30h-FFh Reserved
- *
- * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
- * version as 0x2. This is an error with documentation and these ICH chips
- * use io-apic's of version 0x20.
- *
- * For IO-APIC's with EOI register, we use that to do an explicit EOI.
- * Otherwise, we simulate the EOI message manually by changing the trigger
- * mode to edge and then back to level, with RTE being masked during this.
-*/
-static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
-	struct irq_pin_list *entry;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	for_each_irq_pin(entry, cfg->irq_2_pin) {
-		if (mpc_ioapic_ver(entry->apic) >= 0x20) {
-			/*
-			 * Intr-remapping uses pin number as the virtual vector
-			 * in the RTE. Actual vector is programmed in
-			 * intr-remapping table entry. Hence for the io-apic
-			 * EOI we use the pin number.
-			 */
-			if (irq_remapped(cfg))
-				io_apic_eoi(entry->apic, entry->pin);
-			else
-				io_apic_eoi(entry->apic, cfg->vector);
-		} else {
-			__mask_and_edge_IO_APIC_irq(entry);
-			__unmask_and_level_IO_APIC_irq(entry);
-		}
-	}
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
 static void ack_apic_level(struct irq_data *data)
 {
 	struct irq_cfg *cfg = data->chip_data;
@@ -2634,7 +2696,7 @@ static void ack_apic_level(struct irq_da
 	}
 }
 
-#ifdef CONFIG_INTR_REMAP
+#ifdef CONFIG_IRQ_REMAP
 static void ir_ack_apic_edge(struct irq_data *data)
 {
 	ack_APIC_irq();
@@ -2645,7 +2707,23 @@ static void ir_ack_apic_level(struct irq
 	ack_APIC_irq();
 	eoi_ioapic_irq(data->irq, data->chip_data);
 }
-#endif /* CONFIG_INTR_REMAP */
+
+static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
+{
+	seq_printf(p, " IR-%s", data->chip->name);
+}
+
+static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
+{
+	chip->irq_print_chip = ir_print_prefix;
+	chip->irq_ack = ir_ack_apic_edge;
+	chip->irq_eoi = ir_ack_apic_level;
+
+#ifdef CONFIG_SMP
+	chip->irq_set_affinity = ir_ioapic_set_affinity;
+#endif
+}
+#endif /* CONFIG_IRQ_REMAP */
 
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name			= "IO-APIC",
@@ -2659,21 +2737,6 @@ static struct irq_chip ioapic_chip __rea
 #endif
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
-
-static struct irq_chip ir_ioapic_chip __read_mostly = {
-	.name			= "IR-IO-APIC",
-	.irq_startup		= startup_ioapic_irq,
-	.irq_mask		= mask_ioapic_irq,
-	.irq_unmask		= unmask_ioapic_irq,
-#ifdef CONFIG_INTR_REMAP
-	.irq_ack		= ir_ack_apic_edge,
-	.irq_eoi		= ir_ack_apic_level,
-#ifdef CONFIG_SMP
-	.irq_set_affinity	= ir_ioapic_set_affinity,
-#endif
-#endif
-	.irq_retrigger		= ioapic_retrigger_irq,
-};
 #endif /* !CONFIG_XEN */
 
 static inline void init_IO_APIC_traps(void)
@@ -3046,27 +3109,26 @@ static int __init io_apic_bug_finalize(v
 late_initcall(io_apic_bug_finalize);
 
 #ifndef CONFIG_XEN
-static void resume_ioapic_id(int ioapic_id)
+static void resume_ioapic_id(int ioapic_idx)
 {
 	unsigned long flags;
 	union IO_APIC_reg_00 reg_00;
 
-
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(ioapic_id, 0);
-	if (reg_00.bits.ID != mpc_ioapic_id(ioapic_id)) {
-		reg_00.bits.ID = mpc_ioapic_id(ioapic_id);
-		io_apic_write(ioapic_id, 0, reg_00.raw);
+	reg_00.raw = io_apic_read(ioapic_idx, 0);
+	if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) {
+		reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
+		io_apic_write(ioapic_idx, 0, reg_00.raw);
 	}
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static void ioapic_resume(void)
 {
-	int ioapic_id;
+	int ioapic_idx;
 
-	for (ioapic_id = nr_ioapics - 1; ioapic_id >= 0; ioapic_id--)
-		resume_ioapic_id(ioapic_id);
+	for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--)
+		resume_ioapic_id(ioapic_idx);
 
 	restore_ioapic_entries();
 }
@@ -3247,45 +3309,6 @@ msi_set_affinity(struct irq_data *data, 
 
 	return 0;
 }
-#ifdef CONFIG_INTR_REMAP
-/*
- * Migrate the MSI irq to another cpumask. This migration is
- * done in the process context using interrupt-remapping hardware.
- */
-static int
-ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
-		    bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int dest, irq = data->irq;
-	struct irte irte;
-
-	if (get_irte(irq, &irte))
-		return -1;
-
-	if (__ioapic_set_affinity(data, mask, &dest))
-		return -1;
-
-	irte.vector = cfg->vector;
-	irte.dest_id = IRTE_DEST(dest);
-
-	/*
-	 * atomically update the IRTE with the new destination and vector.
-	 */
-	modify_irte(irq, &irte);
-
-	/*
-	 * After this point, all the interrupts will start arriving
-	 * at the new destination. So, time to cleanup the previous
-	 * vector allocation.
-	 */
-	if (cfg->move_in_progress)
-		send_cleanup_vector(cfg);
-
-	return 0;
-}
-
-#endif
 #endif /* CONFIG_SMP */
 
 /*
@@ -3303,19 +3326,6 @@ static struct irq_chip msi_chip = {
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
-static struct irq_chip msi_ir_chip = {
-	.name			= "IR-PCI-MSI",
-	.irq_unmask		= unmask_msi_irq,
-	.irq_mask		= mask_msi_irq,
-#ifdef CONFIG_INTR_REMAP
-	.irq_ack		= ir_ack_apic_edge,
-#ifdef CONFIG_SMP
-	.irq_set_affinity	= ir_msi_set_affinity,
-#endif
-#endif
-	.irq_retrigger		= ioapic_retrigger_irq,
-};
-
 /*
  * Map the PCI dev to the corresponding remapping hardware unit
  * and allocate 'nvec' consecutive interrupt-remapping table entries
@@ -3358,7 +3368,7 @@ static int setup_msi_irq(struct pci_dev 
 
 	if (irq_remapped(irq_get_chip_data(irq))) {
 		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-		chip = &msi_ir_chip;
+		irq_remap_modify_chip_defaults(chip);
 	}
 
 	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
@@ -3431,7 +3441,7 @@ void native_teardown_msi_irq(unsigned in
 	destroy_irq(irq);
 }
 
-#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
+#ifdef CONFIG_DMAR_TABLE
 #ifdef CONFIG_SMP
 static int
 dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
@@ -3512,19 +3522,6 @@ static int hpet_msi_set_affinity(struct 
 
 #endif /* CONFIG_SMP */
 
-static struct irq_chip ir_hpet_msi_type = {
-	.name			= "IR-HPET_MSI",
-	.irq_unmask		= hpet_msi_unmask,
-	.irq_mask		= hpet_msi_mask,
-#ifdef CONFIG_INTR_REMAP
-	.irq_ack		= ir_ack_apic_edge,
-#ifdef CONFIG_SMP
-	.irq_set_affinity	= ir_msi_set_affinity,
-#endif
-#endif
-	.irq_retrigger		= ioapic_retrigger_irq,
-};
-
 static struct irq_chip hpet_msi_type = {
 	.name = "HPET_MSI",
 	.irq_unmask = hpet_msi_unmask,
@@ -3561,7 +3558,7 @@ int arch_setup_hpet_msi(unsigned int irq
 	hpet_msi_write(irq_get_handler_data(irq), &msg);
 	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
 	if (irq_remapped(irq_get_chip_data(irq)))
-		chip = &ir_hpet_msi_type;
+		irq_remap_modify_chip_defaults(chip);
 
 	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
 	return 0;
@@ -3669,26 +3666,25 @@ io_apic_setup_irq_pin(unsigned int irq, 
 		return -EINVAL;
 	ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
 	if (!ret)
-		setup_ioapic_irq(attr->ioapic, attr->ioapic_pin, irq, cfg,
-				 attr->trigger, attr->polarity);
+		setup_ioapic_irq(irq, cfg, attr);
 	return ret;
 }
 
 int io_apic_setup_irq_pin_once(unsigned int irq, int node,
 			       struct io_apic_irq_attr *attr)
 {
-	unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
+	unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin;
 	int ret;
 
 	/* Avoid redundant programming */
-	if (test_bit(pin, ioapics[id].pin_programmed)) {
+	if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) {
 		pr_debug("Pin %d-%d already programmed\n",
-			 mpc_ioapic_id(id), pin);
+			 mpc_ioapic_id(ioapic_idx), pin);
 		return 0;
 	}
 	ret = io_apic_setup_irq_pin(irq, node, attr);
 	if (!ret)
-		set_bit(pin, ioapics[id].pin_programmed);
+		set_bit(pin, ioapics[ioapic_idx].pin_programmed);
 	return ret;
 }
 
@@ -3725,7 +3721,6 @@ int get_nr_irqs_gsi(void)
 	return nr_irqs_gsi;
 }
 
-#ifdef CONFIG_SPARSE_IRQ
 int __init arch_probe_nr_irqs(void)
 {
 	int nr;
@@ -3745,7 +3740,6 @@ int __init arch_probe_nr_irqs(void)
 
 	return NR_IRQS_LEGACY;
 }
-#endif
 #endif /* CONFIG_XEN */
 
 int io_apic_set_pci_routing(struct device *dev, int irq,
--- 12.2.orig/arch/x86/kernel/cpu/Makefile	2012-02-16 13:35:11.000000000 +0100
+++ 12.2/arch/x86/kernel/cpu/Makefile	2012-02-16 13:45:12.000000000 +0100
@@ -41,7 +41,7 @@ obj-$(CONFIG_MTRR)			+= mtrr/
 obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o perf_event_amd_ibs.o
 
 disabled-obj-$(CONFIG_XEN) := hypervisor.o mshyperv.o perfctr-watchdog.o \
-			      perf_event.o sched.o vmware.o
+			      perf_event.o perf_event_%.o sched.o vmware.o
 
 quiet_cmd_mkcapflags = MKCAP   $@
       cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
--- 12.2.orig/arch/x86/kernel/cpu/amd.c	2012-05-08 10:52:53.000000000 +0200
+++ 12.2/arch/x86/kernel/cpu/amd.c	2012-05-08 10:54:48.000000000 +0200
@@ -484,7 +484,9 @@ static void __cpuinit early_init_amd(str
 
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 {
+#ifndef CONFIG_XEN
 	u32 dummy;
+#endif
 
 #ifdef CONFIG_SMP
 	unsigned long long value;
--- 12.2.orig/arch/x86/kernel/cpu/common-xen.c	2011-09-08 16:54:08.000000000 +0200
+++ 12.2/arch/x86/kernel/cpu/common-xen.c	2011-11-17 15:56:06.000000000 +0100
@@ -15,6 +15,7 @@
 #include <asm/stackprotector.h>
 #include <asm/perf_event.h>
 #include <asm/mmu_context.h>
+#include <asm/archrandom.h>
 #include <asm/hypervisor.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
@@ -716,6 +717,9 @@ static void __init early_identify_cpu(st
 	filter_cpuid_features(c, false);
 
 	setup_smep(c);
+
+	if (this_cpu->c_bsp_init)
+		this_cpu->c_bsp_init(c);
 }
 
 void __init early_cpu_init(void)
@@ -898,6 +902,7 @@ static void __cpuinit identify_cpu(struc
 #endif
 
 	init_hypervisor(c);
+	x86_init_rdrand(c);
 
 	/*
 	 * Clear/Set all flags overriden by options, need do it
--- 12.2.orig/arch/x86/kernel/cpu/intel.c	2012-02-08 12:28:15.000000000 +0100
+++ 12.2/arch/x86/kernel/cpu/intel.c	2011-11-17 16:57:31.000000000 +0100
@@ -52,6 +52,7 @@ static void __cpuinit early_init_intel(s
 		(c->x86 == 0x6 && c->x86_model >= 0x0e))
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 
+#ifndef CONFIG_XEN
 	if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) {
 		unsigned lower_word;
 
@@ -69,14 +70,12 @@ static void __cpuinit early_init_intel(s
 	 * need the microcode to have already been loaded... so if it is
 	 * not, recommend a BIOS update and disable large pages.
 	 */
-#ifdef CONFIG_XEN
-	if (cpu_has(c, X86_FEATURE_PSE))
-#endif
 	if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
 	    c->microcode < 0x20e) {
 		printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
 		clear_cpu_cap(c, X86_FEATURE_PSE);
 	}
+#endif
 
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
--- 12.2.orig/arch/x86/kernel/cpu/mcheck/mce.c	2012-06-06 14:02:38.000000000 +0200
+++ 12.2/arch/x86/kernel/cpu/mcheck/mce.c	2012-02-08 13:08:21.000000000 +0100
@@ -268,9 +268,14 @@ static void print_mce(struct mce *m)
 	 * Note this output is parsed by external tools and old fields
 	 * should not be changed.
 	 */
+#ifndef CONFIG_XEN
 	pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
 		m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
 		cpu_data(m->extcpu).microcode);
+#else
+	pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
+		m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
+#endif
 
 	/*
 	 * Print out human-readable details about the MCE error,
--- 12.2.orig/arch/x86/kernel/cpu/proc.c	2011-03-03 16:38:42.000000000 +0100
+++ 12.2/arch/x86/kernel/cpu/proc.c	2012-02-08 13:08:10.000000000 +0100
@@ -87,8 +87,10 @@ static int show_cpuinfo(struct seq_file 
 		seq_printf(m, "stepping\t: %d\n", c->x86_mask);
 	else
 		seq_printf(m, "stepping\t: unknown\n");
+#ifndef CONFIG_XEN
 	if (c->microcode)
 		seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
+#endif
 
 	if (cpu_has(c, X86_FEATURE_TSC)) {
 		unsigned int freq = cpufreq_quick_get(cpu);
--- 12.2.orig/arch/x86/kernel/e820-xen.c	2011-09-23 16:02:14.000000000 +0200
+++ 12.2/arch/x86/kernel/e820-xen.c	2011-12-21 12:00:26.000000000 +0100
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/crash_dump.h>
+#include <linux/export.h>
 #include <linux/bootmem.h>
 #include <linux/pfn.h>
 #include <linux/suspend.h>
--- 12.2.orig/arch/x86/kernel/entry_64-xen.S	2011-10-07 11:45:32.000000000 +0200
+++ 12.2/arch/x86/kernel/entry_64-xen.S	2011-11-17 15:56:06.000000000 +0100
@@ -365,10 +365,15 @@ NMI_MASK = 0x80000000
 1:	incl PER_CPU_VAR(irq_count)
 	jne 2f
 	mov PER_CPU_VAR(irq_stack_ptr),%rsp
-	EMPTY_FRAME 0
+	CFI_DEF_CFA_REGISTER	rsi
 
 2:	/* Store previous stack value */
 	pushq %rsi
+	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
+			0x77 /* DW_OP_breg7 */, 0, \
+			0x06 /* DW_OP_deref */, \
+			0x08 /* DW_OP_const1u */, SS+8-RBP, \
+			0x22 /* DW_OP_plus */
 	/* We entered an interrupt context - irqs are off: */
 	TRACE_IRQS_OFF
 	.endm
--- 12.2.orig/arch/x86/kernel/head-xen.c	2011-09-07 16:15:33.000000000 +0200
+++ 12.2/arch/x86/kernel/head-xen.c	2012-02-08 16:16:55.000000000 +0100
@@ -57,7 +57,7 @@ void __init reserve_ebda_region(void)
 	memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved");
 }
 #else /* CONFIG_XEN */
-#include <linux/module.h>
+#include <linux/export.h>
 #include <asm/fixmap.h>
 #include <asm/mc146818rtc.h>
 #include <asm/pgtable.h>
--- 12.2.orig/arch/x86/kernel/mpparse-xen.c	2011-07-01 15:19:35.000000000 +0200
+++ 12.2/arch/x86/kernel/mpparse-xen.c	2011-12-21 11:56:23.000000000 +0100
@@ -107,8 +107,8 @@ static void __init MP_bus_info(struct mp
 	}
 #endif
 
+	set_bit(m->busid, mp_bus_not_pci);
 	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
-		set_bit(m->busid, mp_bus_not_pci);
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 		mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
 #endif
--- 12.2.orig/arch/x86/kernel/irq-xen.c	2011-07-01 15:19:34.000000000 +0200
+++ 12.2/arch/x86/kernel/irq-xen.c	2011-11-17 15:56:06.000000000 +0100
@@ -9,6 +9,7 @@
 #include <linux/smp.h>
 #include <linux/ftrace.h>
 #include <linux/delay.h>
+#include <linux/export.h>
 
 #include <asm/apic.h>
 #include <asm/io_apic.h>
--- 12.2.orig/arch/x86/kernel/nmi.c	2012-06-20 12:12:04.000000000 +0200
+++ 12.2/arch/x86/kernel/nmi.c	2011-11-28 09:53:46.000000000 +0100
@@ -232,15 +232,12 @@ pci_serr_error(unsigned char reason, str
 	pr_emerg("Dazed and confused, but trying to continue\n");
 
 	/* Clear and disable the PCI SERR error line. */
-	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
-	outb(reason, NMI_REASON_PORT);
+	clear_serr_error(reason);
 }
 
 static notrace __kprobes void
 io_check_error(unsigned char reason, struct pt_regs *regs)
 {
-	unsigned long i;
-
 	pr_emerg(
 	"NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
 		 reason, smp_processor_id());
@@ -250,17 +247,7 @@ io_check_error(unsigned char reason, str
 		panic("NMI IOCK error: Not continuing");
 
 	/* Re-enable the IOCK line, wait for a few seconds */
-	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
-	outb(reason, NMI_REASON_PORT);
-
-	i = 20000;
-	while (--i) {
-		touch_nmi_watchdog();
-		udelay(100);
-	}
-
-	reason &= ~NMI_REASON_CLEAR_IOCHK;
-	outb(reason, NMI_REASON_PORT);
+	clear_io_check_error(reason);
 }
 
 static notrace __kprobes void
--- 12.2.orig/arch/x86/kernel/pci-dma-xen.c	2012-04-04 14:32:23.000000000 +0200
+++ 12.2/arch/x86/kernel/pci-dma-xen.c	2012-04-04 14:32:31.000000000 +0200
@@ -1,5 +1,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/dma-debug.h>
+#include <linux/export.h>
 #include <linux/bootmem.h>
 #include <linux/gfp.h>
 #include <linux/pci.h>
@@ -180,8 +181,8 @@ void dma_generic_free_coherent(struct de
 #endif
 
 /*
- * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
- * documentation.
+ * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
+ * parameter documentation.
  */
 static __init int iommu_setup(char *p)
 {
--- 12.2.orig/arch/x86/kernel/process-xen.c	2011-09-08 16:54:08.000000000 +0200
+++ 12.2/arch/x86/kernel/process-xen.c	2011-12-21 11:59:08.000000000 +0100
@@ -50,7 +50,7 @@ void free_thread_xstate(struct task_stru
 void free_thread_info(struct thread_info *ti)
 {
 	free_thread_xstate(ti->task);
-	free_pages((unsigned long)ti, get_order(THREAD_SIZE));
+	free_pages((unsigned long)ti, THREAD_ORDER);
 }
 
 void arch_task_cache_init(void)
@@ -355,6 +355,14 @@ void xen_idle(void)
 EXPORT_SYMBOL(default_idle);
 #endif
 
+bool __init set_pm_idle_to_default(void)
+{
+	bool ret = !!pm_idle;
+
+	pm_idle = xen_idle;
+
+	return ret;
+}
 void stop_this_cpu(void *dummy)
 {
 	local_irq_disable();
--- 12.2.orig/arch/x86/kernel/process_32-xen.c	2012-02-29 14:20:17.000000000 +0100
+++ 12.2/arch/x86/kernel/process_32-xen.c	2012-02-29 14:20:36.000000000 +0100
@@ -59,6 +59,7 @@
 #include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
+#include <asm/nmi.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork");
@@ -110,6 +111,7 @@ void cpu_idle(void)
 			if (cpu_is_offline(cpu))
 				play_dead();
 
+			local_touch_nmi();
 			local_irq_disable();
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
@@ -270,7 +272,7 @@ start_thread(struct pt_regs *regs, unsig
 EXPORT_SYMBOL_GPL(start_thread);
 
 /*
- *	switch_to(x,yn) should switch tasks from x to y.
+ *	switch_to(x,y) should switch tasks from x to y.
  *
  * We fsave/fwait so that an exception goes off at the right time
  * (as a call from the fsave or fwait in effect) rather than to
--- 12.2.orig/arch/x86/kernel/process_64-xen.c	2011-09-08 16:54:08.000000000 +0200
+++ 12.2/arch/x86/kernel/process_64-xen.c	2011-11-17 15:56:06.000000000 +0100
@@ -56,6 +56,7 @@
 #include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
+#include <asm/nmi.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -137,6 +138,7 @@ void cpu_idle(void)
 			 * from here on, until they go to idle.
 			 * Otherwise, idle callbacks can misfire.
 			 */
+			local_touch_nmi();
 			local_irq_disable();
 			enter_idle();
 			/* Don't trace irqs off for idle */
--- 12.2.orig/arch/x86/kernel/setup-xen.c	2012-06-08 10:36:40.000000000 +0200
+++ 12.2/arch/x86/kernel/setup-xen.c	2012-06-08 10:38:03.000000000 +0200
@@ -1274,6 +1274,8 @@ void __init setup_arch(char **cmdline_p)
 
 	x86_init.timers.wallclock_init();
 
+	x86_platform.wallclock_init();
+
 	mcheck_init();
 
 	arch_init_ideal_nops();
--- 12.2.orig/arch/x86/kernel/time-xen.c	2012-02-10 13:29:44.000000000 +0100
+++ 12.2/arch/x86/kernel/time-xen.c	2011-12-23 11:26:26.000000000 +0100
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/time.h>
+#include <linux/export.h>
 #include <linux/sysctl.h>
 #include <linux/percpu.h>
 #include <linux/kernel_stat.h>
--- 12.2.orig/arch/x86/kernel/traps-xen.c	2011-09-08 16:54:08.000000000 +0200
+++ 12.2/arch/x86/kernel/traps-xen.c	2011-11-17 16:50:15.000000000 +0100
@@ -85,15 +85,6 @@ DECLARE_BITMAP(used_vectors, NR_VECTORS)
 EXPORT_SYMBOL_GPL(used_vectors);
 #endif
 
-static int ignore_nmis;
-
-int unknown_nmi_panic;
-/*
- * Prevent NMI reason port (0x61) being accessed simultaneously, can
- * only be used in NMI handler.
- */
-static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
-
 static inline void conditional_sti(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
@@ -311,139 +302,6 @@ gp_in_kernel:
 	die("general protection fault", regs, error_code);
 }
 
-static int __init setup_unknown_nmi_panic(char *str)
-{
-	unknown_nmi_panic = 1;
-	return 1;
-}
-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-
-static notrace __kprobes void
-pci_serr_error(unsigned char reason, struct pt_regs *regs)
-{
-	pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
-		 reason, smp_processor_id());
-
-	/*
-	 * On some machines, PCI SERR line is used to report memory
-	 * errors. EDAC makes use of it.
-	 */
-#if defined(CONFIG_EDAC)
-	if (edac_handler_set()) {
-		edac_atomic_assert_error();
-		return;
-	}
-#endif
-
-	if (panic_on_unrecovered_nmi)
-		panic("NMI: Not continuing");
-
-	pr_emerg("Dazed and confused, but trying to continue\n");
-
-	/* Clear and disable the PCI SERR error line. */
-	clear_serr_error(reason);
-}
-
-static notrace __kprobes void
-io_check_error(unsigned char reason, struct pt_regs *regs)
-{
-	pr_emerg(
-	"NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
-		 reason, smp_processor_id());
-	show_registers(regs);
-
-	if (panic_on_io_nmi)
-		panic("NMI IOCK error: Not continuing");
-
-	/* Re-enable the IOCK line, wait for a few seconds */
-	clear_io_check_error(reason);
-}
-
-static notrace __kprobes void
-unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
-{
-	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
-			NOTIFY_STOP)
-		return;
-#ifdef CONFIG_MCA
-	/*
-	 * Might actually be able to figure out what the guilty party
-	 * is:
-	 */
-	if (MCA_bus) {
-		mca_handle_nmi();
-		return;
-	}
-#endif
-	pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
-		 reason, smp_processor_id());
-
-	pr_emerg("Do you have a strange power saving mode enabled?\n");
-	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
-		panic("NMI: Not continuing");
-
-	pr_emerg("Dazed and confused, but trying to continue\n");
-}
-
-static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
-{
-	unsigned char reason = 0;
-
-	/*
-	 * CPU-specific NMI must be processed before non-CPU-specific
-	 * NMI, otherwise we may lose it, because the CPU-specific
-	 * NMI can not be detected/processed on other CPUs.
-	 */
-	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
-		return;
-
-	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
-	raw_spin_lock(&nmi_reason_lock);
-	reason = get_nmi_reason();
-
-	if (reason & NMI_REASON_MASK) {
-		if (reason & NMI_REASON_SERR)
-			pci_serr_error(reason, regs);
-		else if (reason & NMI_REASON_IOCHK)
-			io_check_error(reason, regs);
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
-		/*
-		 * Reassert NMI in case it became active
-		 * meanwhile as it's edge-triggered:
-		 */
-		reassert_nmi();
-#endif
-		raw_spin_unlock(&nmi_reason_lock);
-		return;
-	}
-	raw_spin_unlock(&nmi_reason_lock);
-
-	unknown_nmi_error(reason, regs);
-}
-
-dotraplinkage notrace __kprobes void
-do_nmi(struct pt_regs *regs, long error_code)
-{
-	nmi_enter();
-
-	inc_irq_stat(__nmi_count);
-
-	if (!ignore_nmis)
-		default_do_nmi(regs);
-
-	nmi_exit();
-}
-
-void stop_nmi(void)
-{
-	ignore_nmis++;
-}
-
-void restart_nmi(void)
-{
-	ignore_nmis--;
-}
-
 /* May run on IST stack. */
 dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 {
--- 12.2.orig/arch/x86/kernel/vsyscall_64-xen.c	2011-11-03 12:58:44.000000000 +0100
+++ 12.2/arch/x86/kernel/vsyscall_64-xen.c	2011-11-17 15:56:06.000000000 +0100
@@ -25,6 +25,7 @@
 #include <linux/seqlock.h>
 #include <linux/jiffies.h>
 #include <linux/sysctl.h>
+#include <linux/topology.h>
 #include <linux/clocksource.h>
 #include <linux/getcpu.h>
 #include <linux/cpu.h>
--- 12.2.orig/arch/x86/kernel/x86_init-xen.c	2011-07-11 13:01:32.000000000 +0200
+++ 12.2/arch/x86/kernel/x86_init-xen.c	2011-11-28 10:08:44.000000000 +0100
@@ -20,11 +20,13 @@
 #include <asm/irq.h>
 #include <asm/pat.h>
 #include <asm/iommu.h>
+#include <asm/mach_traps.h>
 
 void __cpuinit x86_init_noop(void) { }
 void __init x86_init_uint_noop(unsigned int unused) { }
 void __init x86_init_pgd_noop(pgd_t *unused) { }
 int __init iommu_init_noop(void) { return 0; }
+void wallclock_init_noop(void) { }
 
 /*
  * The platform setup functions are preset with the default functions
@@ -94,9 +96,11 @@ static int default_i8042_detect(void) { 
 
 struct x86_platform_ops x86_platform = {
 	.calibrate_tsc			= NULL,
+	.wallclock_init			= wallclock_init_noop,
 	.get_wallclock			= xen_read_wallclock,
 	.set_wallclock			= xen_write_wallclock,
 	.is_untracked_pat_range		= is_ISA_range,
+	.get_nmi_reason			= xen_get_nmi_reason,
 	.i8042_detect			= default_i8042_detect
 };
 
--- 12.2.orig/arch/x86/mm/fault-xen.c	2011-09-08 16:54:08.000000000 +0200
+++ 12.2/arch/x86/mm/fault-xen.c	2011-11-17 15:56:06.000000000 +0100
@@ -17,7 +17,7 @@
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
 #include <asm/kmemcheck.h>		/* kmemcheck_*(), ...		*/
-#include <asm/vsyscall.h>
+#include <asm/fixmap.h>			/* VSYSCALL_START		*/
 
 /*
  * Page fault error code bits:
@@ -428,12 +428,14 @@ static noinline __kprobes int vmalloc_fa
 	return 0;
 }
 
+#ifdef CONFIG_CPU_SUP_AMD
 static const char errata93_warning[] =
 KERN_ERR
 "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
 "******* Working around it, but it may cause SEGVs or burn power.\n"
 "******* Please consider a BIOS update.\n"
 "******* Disabling USB legacy in the BIOS may also help.\n";
+#endif
 
 /*
  * No vm86 mode in 64-bit mode:
@@ -513,7 +515,11 @@ bad:
  */
 static int is_errata93(struct pt_regs *regs, unsigned long address)
 {
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && defined(CONFIG_CPU_SUP_AMD)
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD
+	    || boot_cpu_data.x86 != 0xf)
+		return 0;
+
 	if (address != regs->ip)
 		return 0;
 
--- 12.2.orig/arch/x86/mm/highmem_32-xen.c	2011-02-01 15:09:47.000000000 +0100
+++ 12.2/arch/x86/mm/highmem_32-xen.c	2011-12-21 11:56:23.000000000 +0100
@@ -45,6 +45,7 @@ void *kmap_atomic_prot(struct page *page
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	BUG_ON(!pte_none(*(kmap_pte-idx)));
 	set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
+	/*arch_flush_lazy_mmu_mode();*/
 
 	return (void *)vaddr;
 }
@@ -88,6 +89,7 @@ void __kunmap_atomic(void *kvaddr)
 		 */
 		kpte_clear_flush(kmap_pte-idx, vaddr);
 		kmap_atomic_idx_pop();
+		/*arch_flush_lazy_mmu_mode();*/
 	}
 #ifdef CONFIG_DEBUG_HIGHMEM
 	else {
--- 12.2.orig/arch/x86/mm/hypervisor.c	2011-08-09 14:31:55.000000000 +0200
+++ 12.2/arch/x86/mm/hypervisor.c	2011-11-18 15:53:44.000000000 +0100
@@ -42,7 +42,7 @@
 #include <xen/balloon.h>
 #include <xen/features.h>
 #include <xen/interface/memory.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/percpu.h>
 #include <asm/tlbflush.h>
 #include <linux/highmem.h>
--- 12.2.orig/arch/x86/oprofile/xenoprof.c	2011-01-31 17:49:31.000000000 +0100
+++ 12.2/arch/x86/oprofile/xenoprof.c	2011-11-28 10:45:58.000000000 +0100
@@ -87,7 +87,7 @@ int xenoprof_arch_map_shared_buffer(stru
 
 	npages = (get_buffer->bufsize * get_buffer->nbuf - 1) / PAGE_SIZE + 1;
 
-	area = alloc_vm_area(npages * PAGE_SIZE);
+	area = alloc_vm_area(npages * PAGE_SIZE, NULL);
 	if (area == NULL)
 		return -ENOMEM;
 
@@ -119,7 +119,7 @@ int xenoprof_arch_set_passive(struct xen
 
 	npages = (pdomain->bufsize * pdomain->nbuf - 1) / PAGE_SIZE + 1;
 
-	area = alloc_vm_area(npages * PAGE_SIZE);
+	area = alloc_vm_area(npages * PAGE_SIZE, NULL);
 	if (area == NULL) {
 		ret = -ENOMEM;
 		goto out;
--- 12.2.orig/arch/x86/pci/pcifront.c	2011-02-01 15:09:47.000000000 +0100
+++ 12.2/arch/x86/pci/pcifront.c	2011-11-18 15:58:33.000000000 +0100
@@ -4,7 +4,6 @@
  *
  *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
  */
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/pci.h>
--- 12.2.orig/drivers/acpi/processor_idle.c	2012-04-10 17:03:16.000000000 +0200
+++ 12.2/drivers/acpi/processor_idle.c	2012-04-10 17:23:28.000000000 +0200
@@ -1149,6 +1149,10 @@ static int acpi_processor_setup_cpuidle_
 
 	return 0;
 }
+#else
+static void acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr) {}
+static void acpi_processor_setup_cpuidle_states(struct acpi_processor *pr) {}
+#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
 
 int acpi_processor_hotplug(struct acpi_processor *pr)
 {
@@ -1186,7 +1190,6 @@ int acpi_processor_hotplug(struct acpi_p
 
 	return ret;
 }
-#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
 
 int acpi_processor_cst_has_changed(struct acpi_processor *pr)
 {
@@ -1252,7 +1255,6 @@ int __cpuinit acpi_processor_power_init(
 			      struct acpi_device *device)
 {
 	acpi_status status = 0;
-	int retval;
 	static int first_run;
 
 	if (disabled_by_idle_boot_param())
@@ -1290,6 +1292,8 @@ int __cpuinit acpi_processor_power_init(
 	 * platforms that only support C1.
 	 */
 	if (pr->flags.power) {
+		int retval;
+
 		/* Register acpi_idle_driver if not already registered */
 		if (!acpi_processor_registered) {
 			acpi_processor_setup_cpuidle_states(pr);
--- 12.2.orig/drivers/edac/Kconfig	2011-11-17 11:40:04.000000000 +0100
+++ 12.2/drivers/edac/Kconfig	2012-04-10 17:23:15.000000000 +0200
@@ -41,7 +41,7 @@ config EDAC_DEBUG
 
 config EDAC_DECODE_MCE
 	tristate "Decode MCEs in human-readable form (only on AMD for now)"
-	depends on CPU_SUP_AMD && X86_MCE_AMD
+	depends on CPU_SUP_AMD && (X86_MCE_AMD || X86_XEN_MCE)
 	default y
 	---help---
 	  Enable this option if you want to decode Machine Check Exceptions
@@ -171,7 +171,7 @@ config EDAC_I5400
 
 config EDAC_I7CORE
 	tristate "Intel i7 Core (Nehalem) processors"
-	depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL
+	depends on EDAC_MM_EDAC && PCI && X86 && (X86_MCE_INTEL || X86_XEN_MCE)
 	help
 	  Support for error detection and correction the Intel
 	  i7 Core (Nehalem) Integrated Memory Controller that exists on
@@ -215,7 +215,7 @@ config EDAC_I7300
 
 config EDAC_SBRIDGE
 	tristate "Intel Sandy-Bridge Integrated MC"
-	depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
+	depends on EDAC_MM_EDAC && PCI && X86_64 && (X86_MCE_INTEL || X86_XEN_MCE)
 	depends on PCI_MMCONFIG && EXPERIMENTAL
 	help
 	  Support for error detection and correction the Intel
--- 12.2.orig/drivers/edac/sb_edac.c	2012-06-20 12:12:04.000000000 +0200
+++ 12.2/drivers/edac/sb_edac.c	2012-02-08 13:07:42.000000000 +0100
@@ -1620,7 +1620,11 @@ static int sbridge_mce_check_error(struc
 		mce->socketid, mce->apicid);
 
 	/* Only handle if it is the right mc controller */
+#ifdef CONFIG_XEN /* Could easily be used for non-Xen too. */
+	if (mce->socketid != pvt->sbridge_dev->mc)
+#else
 	if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc)
+#endif
 		return NOTIFY_DONE;
 
 	smp_rmb();
--- 12.2.orig/drivers/hwmon/coretemp-xen.c	2011-11-03 13:59:03.000000000 +0100
+++ 12.2/drivers/hwmon/coretemp-xen.c	2011-11-17 16:53:49.000000000 +0100
@@ -110,7 +110,7 @@ struct pdev_entry {
 
 struct cpu_info {
 	struct platform_device *pdev;
-	u32 cpuid_6_eax, ucode_rev;
+	u32 cpuid_6_eax, microcode;
 	u32 phys_proc_id, cpu_core_id;
 	u8 x86_model, x86_mask;
 };
@@ -381,17 +381,10 @@ static int chk_ucode_version(unsigned in
 	 * Readings might stop update when processor visited too deep sleep,
 	 * fixed for stepping D0 (6EC).
 	 */
-	if (c->x86_model == 0xe && c->x86_mask < 0xc) {
-		/* check for microcode update */
-		if (!(c->ucode_rev + 1)) {
-			pr_err("Cannot determine microcode revision of "
-			       "CPU#%u!\n", cpu);
-			return -ENODEV;
-		} else if (c->ucode_rev < 0x39) {
-			pr_err("Errata AE18 not fixed, update BIOS or "
-			       "microcode of the CPU!\n");
-			return -ENODEV;
-		}
+	if (c->x86_model == 0xe && c->x86_mask < 0xc && c->microcode < 0x39) {
+		pr_err("Errata AE18 not fixed, update BIOS or "
+		       "microcode of the CPU!\n");
+		return -ENODEV;
 	}
 	return 0;
 }
@@ -677,8 +670,8 @@ static void get_cpuid_info(void *arg)
 	    || !info->x86_model
 	    || wrmsr_safe(MSR_IA32_UCODE_REV, 0, 0) < 0
 	    || (sync_core(), rdmsr_safe(MSR_IA32_UCODE_REV,
-					&val, &info->ucode_rev)) < 0)
-		info->ucode_rev = ~0;
+					&val, &info->microcode)) < 0)
+		info->microcode = 0;
 
 	info->cpuid_6_eax = cpuid_eax(0) >= 6 ? cpuid_eax(6) : 0;
 }
--- 12.2.orig/drivers/pci/Kconfig	2012-04-10 17:06:40.000000000 +0200
+++ 12.2/drivers/pci/Kconfig	2012-04-10 17:23:20.000000000 +0200
@@ -94,6 +94,17 @@ config XEN_PCIDEV_FRONTEND
 	  The PCI device frontend driver allows the kernel to import arbitrary
 	  PCI devices from a PCI backend to support PCI driver domains.
 
+config XEN_PCIDEV_FE_DEBUG
+        bool "Xen PCI Frontend debugging"
+        depends on XEN_PCIDEV_FRONTEND
+	help
+	  Say Y here if you want the Xen PCI frontend to produce a bunch of debug
+	  messages to the system log.  Select this if you are having a
+	  problem with Xen PCI frontend support and want to see more of what is
+	  going on.
+
+	  When in doubt, say N.
+
 config HT_IRQ
 	bool "Interrupts on hypertransport devices"
 	default y
--- 12.2.orig/drivers/pci/msi-xen.c	2011-02-01 15:03:03.000000000 +0100
+++ 12.2/drivers/pci/msi-xen.c	2011-11-17 15:56:06.000000000 +0100
@@ -11,6 +11,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
+#include <linux/export.h>
 #include <linux/ioport.h>
 #include <linux/pci.h>
 #include <linux/proc_fs.h>
--- 12.2.orig/drivers/xen/Makefile	2011-11-03 12:54:39.000000000 +0100
+++ 12.2/drivers/xen/Makefile	2011-11-17 16:59:30.000000000 +0100
@@ -32,7 +32,7 @@ obj-$(CONFIG_XEN_GNTDEV)		+= xen-gntdev.
 obj-$(CONFIG_XENFS)			+= xenfs/
 obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
-obj-$(CONFIG_XEN_PLATFORM_PCI)		+= xen-platform-pci.o
+obj-$(CONFIG_XEN_PVHVM)			+= platform-pci.o
 obj-$(CONFIG_XEN_TMEM)			+= tmem.o
 obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
 obj-$(CONFIG_XEN_DOM0)			+= pci.o
@@ -42,8 +42,6 @@ xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
 xen-gntalloc-y				:= gntalloc.o
 
-xen-platform-pci-y			:= platform-pci.o
-
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
 obj-$(CONFIG_XEN_BLKDEV_TAP)		+= blktap/
 obj-$(filter m,$(CONFIG_XEN_BLKDEV_TAP2)) += blktap2/ blktap2-new/
--- 12.2.orig/drivers/xen/blkback/blkback.c	2012-04-04 10:26:37.000000000 +0200
+++ 12.2/drivers/xen/blkback/blkback.c	2012-04-04 10:26:46.000000000 +0200
@@ -39,6 +39,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/list.h>
+#include <linux/module.h>
 #include <linux/delay.h>
 #include <xen/balloon.h>
 #include <xen/evtchn.h>
@@ -195,16 +196,17 @@ static void fast_flush_area(pending_req_
 static void print_stats(blkif_t *blkif)
 {
 	printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d  |  br %4d"
-	       "  |  fl %4d\n",
+	       "  |  fl %4d  |  ds %4d\n",
 	       current->comm, blkif->st_oo_req,
 	       blkif->st_rd_req, blkif->st_wr_req,
-	       blkif->st_br_req, blkif->st_fl_req);
+	       blkif->st_br_req, blkif->st_fl_req, blkif->st_ds_req);
 	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
 	blkif->st_rd_req = 0;
 	blkif->st_wr_req = 0;
 	blkif->st_oo_req = 0;
 	blkif->st_br_req = 0;
 	blkif->st_fl_req = 0;
+	blkif->st_ds_req = 0;
 }
 
 int blkif_schedule(void *arg)
@@ -337,6 +339,49 @@ irqreturn_t blkif_be_int(int irq, void *
  * DOWNWARD CALLS -- These interface with the block-device layer proper.
  */
 
+static void dispatch_discard(blkif_t *blkif, struct blkif_request_discard *req)
+{
+	struct phys_req preq;
+	int err = -EOPNOTSUPP, status;
+
+	blkif->st_ds_req++;
+
+	preq.dev           = req->handle;
+	preq.sector_number = req->sector_number;
+	preq.nr_sects      = req->nr_sectors;
+
+	if (vbd_translate(&preq, blkif, REQ_DISCARD) != 0) {
+		DPRINTK("access denied: discard of [%Lu,%Lu) on dev=%04x\n",
+			preq.sector_number,
+			preq.sector_number + preq.nr_sects, preq.dev);
+		make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+		msleep(1); /* back off a bit */
+		return;
+	}
+
+	plug_queue(blkif, preq.bdev);
+
+	if (blkif->blk_backend_type == BLKIF_BACKEND_PHY ||
+	    blkif->blk_backend_type == BLKIF_BACKEND_FILE)
+		err = blkdev_issue_discard(preq.bdev, preq.sector_number,
+					   preq.nr_sects, GFP_KERNEL, 0);
+
+	switch (err) {
+	case 0:
+		status = BLKIF_RSP_OKAY;
+		break;
+	case -EOPNOTSUPP:
+		DPRINTK("discard op failed, not supported\n");
+		status = BLKIF_RSP_EOPNOTSUPP;
+		break;
+	default:
+		status = BLKIF_RSP_ERROR;
+		break;
+	}
+
+	make_response(blkif, req->id, req->operation, status);
+}
+
 static int _do_block_io_op(blkif_t *blkif)
 {
 	blkif_back_rings_t *blk_rings = &blkif->blk_rings;
@@ -391,6 +436,11 @@ static int _do_block_io_op(blkif_t *blki
 
 			dispatch_rw_block_io(blkif, &req, pending_req);
 			break;
+		case BLKIF_OP_DISCARD:
+			blk_rings->common.req_cons = rc;
+			barrier();
+			dispatch_discard(blkif, (void *)&req);
+			break;
 		default:
 			/* A good sign something is wrong: sleep for a while to
 			 * avoid excessive CPU consumption by a bad guest. */
--- 12.2.orig/drivers/xen/blkback/common.h	2012-06-08 10:35:57.000000000 +0200
+++ 12.2/drivers/xen/blkback/common.h	2012-06-08 10:37:58.000000000 +0200
@@ -27,7 +27,6 @@
 #ifndef __BLKIF__BACKEND__COMMON_H__
 #define __BLKIF__BACKEND__COMMON_H__
 
-#include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
@@ -44,6 +43,11 @@
 	pr_debug("(file=%s, line=%d) " _f,	\
 		 __FILE__ , __LINE__ , ## _a )
 
+enum blkif_backend_type {
+	BLKIF_BACKEND_PHY  = 1,
+	BLKIF_BACKEND_FILE = 2,
+};
+
 struct vbd {
 	blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
 	fmode_t        mode;        /* FMODE_xxx */
@@ -64,6 +68,7 @@ typedef struct blkif_st {
 	unsigned int      irq;
 	/* Comms information. */
 	enum blkif_protocol blk_protocol;
+	enum blkif_backend_type blk_backend_type;
 	blkif_back_rings_t blk_rings;
 	struct vm_struct *blk_ring_area;
 	/* The VBD attached to this interface. */
@@ -89,6 +94,7 @@ typedef struct blkif_st {
 	int                 st_oo_req;
 	int                 st_br_req;
 	int                 st_fl_req;
+	int                 st_ds_req;
 	int                 st_rd_sect;
 	int                 st_wr_sect;
 
@@ -128,7 +134,7 @@ unsigned long vbd_secsize(struct vbd *vb
 
 struct phys_req {
 	unsigned short       dev;
-	unsigned short       nr_sects;
+	blkif_sector_t       nr_sects;
 	struct block_device *bdev;
 	blkif_sector_t       sector_number;
 };
--- 12.2.orig/drivers/xen/blkback/xenbus.c	2012-03-22 14:19:12.000000000 +0100
+++ 12.2/drivers/xen/blkback/xenbus.c	2012-03-22 14:25:55.000000000 +0100
@@ -18,7 +18,6 @@
 */
 
 #include <stdarg.h>
-#include <linux/module.h>
 #include <linux/kthread.h>
 #include "common.h"
 
@@ -120,6 +119,7 @@ VBD_SHOW(rd_req,  "%d\n", be->blkif->st_
 VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
 VBD_SHOW(br_req,  "%d\n", be->blkif->st_br_req);
 VBD_SHOW(fl_req,  "%d\n", be->blkif->st_fl_req);
+VBD_SHOW(ds_req,  "%d\n", be->blkif->st_ds_req);
 VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
 VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
 
@@ -129,6 +129,7 @@ static struct attribute *vbdstat_attrs[]
 	&dev_attr_wr_req.attr,
 	&dev_attr_br_req.attr,
 	&dev_attr_fl_req.attr,
+	&dev_attr_ds_req.attr,
 	&dev_attr_rd_sect.attr,
 	&dev_attr_wr_sect.attr,
 	NULL
@@ -222,6 +223,54 @@ void blkback_flush_diskcache(struct xenb
 		xenbus_dev_error(dev, err, "writing feature-flush-cache");
 }
 
+static void blkback_discard(struct xenbus_transaction xbt,
+			    struct backend_info *be)
+{
+	struct xenbus_device *dev = be->dev;
+	blkif_t *blkif = be->blkif;
+	char *type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
+	int err, state = 0;
+
+	if (!IS_ERR(type)) {
+		if (strncmp(type, "file", 4) == 0) {
+			state = 1;
+			blkif->blk_backend_type = BLKIF_BACKEND_FILE;
+		}
+		if (strncmp(type, "phy", 3) == 0) {
+			struct request_queue *q;
+
+			q = bdev_get_queue(blkif->vbd.bdev);
+			if (blk_queue_discard(q)) {
+				blkif->blk_backend_type = BLKIF_BACKEND_PHY;
+				err = xenbus_printf(xbt, dev->nodename,
+					"discard-granularity", "%u",
+					q->limits.discard_granularity);
+				if (!err)
+					state = 1;
+				else
+					xenbus_dev_error(dev, err,
+						"writing discard-granularity");
+				err = xenbus_printf(xbt, dev->nodename,
+					"discard-alignment", "%u",
+					q->limits.discard_alignment);
+				if (err) {
+					xenbus_dev_error(dev, err,
+						"writing discard-alignment");
+					state = 0;
+				}
+			}
+		}
+		kfree(type);
+	} else
+		xenbus_dev_error(dev, PTR_ERR(type),
+				 "reading type for discard");
+
+	err = xenbus_printf(xbt, dev->nodename, "feature-discard",
+			    "%d", state);
+	if (err)
+		xenbus_dev_error(dev, err, "writing feature-discard");
+}
+
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures, and watch the store waiting for the hotplug scripts to tell us
@@ -443,6 +492,7 @@ again:
 
 	blkback_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
 	blkback_barrier(xbt, be, be->blkif->vbd.flush_support);
+	blkback_discard(xbt, be);
 
 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
 			    vbd_size(&be->blkif->vbd));
--- 12.2.orig/drivers/xen/blkfront/blkfront.c	2012-06-12 15:35:27.000000000 +0200
+++ 12.2/drivers/xen/blkfront/blkfront.c	2012-06-12 15:36:36.000000000 +0200
@@ -325,6 +325,32 @@ static void backend_changed(struct xenbu
 
 /* ** Connection ** */
 
+static void blkfront_setup_discard(struct blkfront_info *info)
+{
+	int err;
+	char *type;
+	unsigned int discard_granularity;
+	unsigned int discard_alignment;
+
+	type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
+	if (IS_ERR(type))
+		return;
+
+	if (strncmp(type, "phy", 3) == 0) {
+		err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			"discard-granularity", "%u", &discard_granularity,
+			"discard-alignment", "%u", &discard_alignment,
+			NULL);
+		if (!err) {
+			info->feature_discard = 1;
+			info->discard_granularity = discard_granularity;
+			info->discard_alignment = discard_alignment;
+		}
+	} else if (strncmp(type, "file", 4) == 0)
+		info->feature_discard = 1;
+
+	kfree(type);
+}
 
 /*
  * Invoked when the backend is finally 'ready' (and has told produced
@@ -335,7 +361,7 @@ static void connect(struct blkfront_info
 	unsigned long long sectors;
 	unsigned long sector_size;
 	unsigned int binfo;
-	int err, barrier, flush;
+	int err, barrier, flush, discard;
 
 	switch (info->connected) {
 	case BLKIF_STATE_CONNECTED:
@@ -406,6 +432,12 @@ static void connect(struct blkfront_info
 		info->feature_flush = QUEUE_ORDERED_NONE;
 #endif
 
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "feature-discard", "%d", &discard);
+
+	if (err > 0 && discard)
+		blkfront_setup_discard(info);
+
 	err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
 	if (err) {
 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
@@ -812,9 +844,17 @@ static int blkif_queue_request(struct re
 #endif
 		ring_req->operation = info->flush_op;
 
-	ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
-	BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
-	for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
+	if (unlikely(req->cmd_flags & REQ_DISCARD)) {
+		struct blkif_request_discard *discard = (void *)ring_req;
+
+		/* id, sector_number and handle are set above. */
+		discard->operation = BLKIF_OP_DISCARD;
+		discard->flag = 0;
+		discard->nr_sectors = blk_rq_sectors(req);
+	} else {
+		ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
+		BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
+		for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
 			buffer_mfn = page_to_phys(sg_page(sg)) >> PAGE_SHIFT;
 			fsect = sg->offset >> 9;
 			lsect = fsect + (sg->length >> 9) - 1;
@@ -834,6 +874,7 @@ static int blkif_queue_request(struct re
 					.gref       = ref,
 					.first_sect = fsect,
 					.last_sect  = lsect };
+		}
 	}
 
 	info->ring.req_prod_pvt++;
@@ -986,6 +1027,18 @@ static irqreturn_t blkif_int(int irq, vo
 
 			__blk_end_request_all(req, ret);
 			break;
+		case BLKIF_OP_DISCARD:
+			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+				struct request_queue *rq = info->rq;
+
+				pr_warn("blkfront: %s: discard op failed\n",
+					info->gd->disk_name);
+				ret = -EOPNOTSUPP;
+				info->feature_discard = 0;
+				queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
+			}
+			__blk_end_request_all(req, ret);
+			break;
 		default:
 			BUG();
 		}
--- 12.2.orig/drivers/xen/blkfront/block.h	2012-06-08 10:37:30.000000000 +0200
+++ 12.2/drivers/xen/blkfront/block.h	2012-06-08 10:38:01.000000000 +0200
@@ -109,6 +109,9 @@ struct blkfront_info
 	unsigned long shadow_free;
 	unsigned int feature_flush;
 	unsigned int flush_op;
+	unsigned int feature_discard;
+	unsigned int discard_granularity;
+	unsigned int discard_alignment;
 	int is_ready;
 };
 
--- 12.2.orig/drivers/xen/blkfront/vbd.c	2012-03-12 16:18:05.000000000 +0100
+++ 12.2/drivers/xen/blkfront/vbd.c	2012-03-12 16:18:35.000000000 +0100
@@ -365,6 +365,13 @@ xlvbd_init_blk_queue(struct gendisk *gd,
 	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
 #endif
 
+	if (info->feature_discard) {
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
+		blk_queue_max_discard_sectors(rq, get_capacity(gd));
+		rq->limits.discard_granularity = info->discard_granularity;
+		rq->limits.discard_alignment = info->discard_alignment;
+	}
+
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
 	blk_queue_logical_block_size(rq, sector_size);
 	blk_queue_max_hw_sectors(rq, 512);
--- 12.2.orig/drivers/xen/blktap/blktap.c	2012-05-23 13:39:15.000000000 +0200
+++ 12.2/drivers/xen/blktap/blktap.c	2011-11-18 15:35:59.000000000 +0100
@@ -42,6 +42,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/list.h>
+#include <linux/module.h>
 #include <asm/hypervisor.h>
 #include "common.h"
 #include <xen/balloon.h>
--- 12.2.orig/drivers/xen/blktap/common.h	2012-06-06 13:55:35.000000000 +0200
+++ 12.2/drivers/xen/blktap/common.h	2011-11-18 15:35:50.000000000 +0100
@@ -27,7 +27,6 @@
 #ifndef __BLKIF__BACKEND__COMMON_H__
 #define __BLKIF__BACKEND__COMMON_H__
 
-#include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
--- 12.2.orig/drivers/xen/blktap/xenbus.c	2012-02-16 13:29:26.000000000 +0100
+++ 12.2/drivers/xen/blktap/xenbus.c	2011-11-18 15:35:43.000000000 +0100
@@ -35,7 +35,6 @@
  */
 
 #include <stdarg.h>
-#include <linux/module.h>
 #include <linux/kthread.h>
 #include <xen/xenbus.h>
 #include "common.h"
--- 12.2.orig/drivers/xen/blktap2-new/device.c	2012-02-16 16:38:39.000000000 +0100
+++ 12.2/drivers/xen/blktap2-new/device.c	2011-11-21 15:50:27.000000000 +0100
@@ -2,6 +2,7 @@
 #include <linux/blkdev.h>
 #include <linux/cdrom.h>
 #include <linux/hdreg.h>
+#include <linux/module.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_ioctl.h>
 
--- 12.2.orig/drivers/xen/blktap2-new/ring.c	2011-02-24 15:10:15.000000000 +0100
+++ 12.2/drivers/xen/blktap2-new/ring.c	2011-11-21 15:50:37.000000000 +0100
@@ -1,4 +1,4 @@
-
+#include <linux/module.h>
 #include <linux/device.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
--- 12.2.orig/drivers/xen/core/evtchn.c	2012-01-26 13:48:33.000000000 +0100
+++ 12.2/drivers/xen/core/evtchn.c	2011-11-21 15:49:38.000000000 +0100
@@ -37,7 +37,6 @@
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
 #include <linux/ftrace.h>
-#include <linux/version.h>
 #include <linux/atomic.h>
 #include <asm/system.h>
 #include <asm/ptrace.h>
--- 12.2.orig/drivers/xen/core/gnttab.c	2012-03-12 16:17:09.000000000 +0100
+++ 12.2/drivers/xen/core/gnttab.c	2012-03-12 16:18:44.000000000 +0100
@@ -31,7 +31,7 @@
  * IN THE SOFTWARE.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
@@ -547,7 +547,7 @@ static int unmap_pte_fn(pte_t *pte, stru
 void *arch_gnttab_alloc_shared(unsigned long *frames)
 {
 	struct vm_struct *area;
-	area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
+	area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames(), NULL);
 	BUG_ON(area == NULL);
 	return area->addr;
 }
--- 12.2.orig/drivers/xen/core/machine_reboot.c	2011-02-01 15:03:10.000000000 +0100
+++ 12.2/drivers/xen/core/machine_reboot.c	2011-11-18 15:43:23.000000000 +0100
@@ -1,8 +1,7 @@
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/unistd.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/reboot.h>
 #include <linux/sysrq.h>
 #include <linux/stringify.h>
--- 12.2.orig/drivers/xen/core/reboot.c	2011-02-01 15:04:27.000000000 +0100
+++ 12.2/drivers/xen/core/reboot.c	2011-11-18 15:46:15.000000000 +0100
@@ -1,7 +1,6 @@
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/unistd.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/reboot.h>
 #include <linux/sched.h>
@@ -18,8 +17,6 @@
 #undef handle_sysrq
 #endif
 
-MODULE_LICENSE("Dual BSD/GPL");
-
 #define SHUTDOWN_INVALID  -1
 #define SHUTDOWN_POWEROFF  0
 #define SHUTDOWN_SUSPEND   2
--- 12.2.orig/drivers/xen/core/smpboot.c	2012-03-22 16:22:34.000000000 +0100
+++ 12.2/drivers/xen/core/smpboot.c	2011-11-18 15:44:14.000000000 +0100
@@ -5,7 +5,6 @@
  *	portions of this file.
  */
 
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
--- 12.2.orig/drivers/xen/core/spinlock.c	2012-01-31 18:18:39.000000000 +0100
+++ 12.2/drivers/xen/core/spinlock.c	2012-02-07 11:59:21.000000000 +0100
@@ -9,9 +9,9 @@
 
 #ifdef TICKET_SHIFT
 
+#include <linux/export.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <asm/hardirq.h>
 #include <xen/evtchn.h>
 
@@ -90,9 +90,9 @@ static inline void sequence(unsigned int
 #if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
 static DEFINE_PER_CPU(unsigned int, _irq_count);
 
-static unsigned int spin_adjust(struct spinning *spinning,
-				const arch_spinlock_t *lock,
-				unsigned int ticket)
+static __ticket_t spin_adjust(struct spinning *spinning,
+			      const arch_spinlock_t *lock,
+			      __ticket_t ticket)
 {
 	for (; spinning; spinning = spinning->prev) {
 		unsigned int old = spinning->ticket;
@@ -117,34 +117,35 @@ static unsigned int spin_adjust(struct s
 	return ticket;
 }
 
-unsigned int xen_spin_adjust(const arch_spinlock_t *lock, unsigned int token)
+struct __raw_tickets xen_spin_adjust(const arch_spinlock_t *lock,
+				     struct __raw_tickets token)
 {
-	token = spin_adjust(percpu_read(_spinning), lock,
-			    token >> TICKET_SHIFT);
-	return (token << TICKET_SHIFT) | lock->cur;
+	token.tail = spin_adjust(percpu_read(_spinning), lock, token.tail);
+	token.head = ACCESS_ONCE(lock->tickets.head);
+	return token;
 }
 
 static unsigned int ticket_drop(struct spinning *spinning,
 				unsigned int ticket, unsigned int cpu)
 {
 	arch_spinlock_t *lock = spinning->lock;
-	unsigned int token;
-	bool kick;
 
 	if (cmpxchg(&spinning->ticket, ticket, -1) != ticket)
 		return -1;
-	__ticket_spin_unlock_body;
-	return kick ? (ticket + 1) & ((1 << TICKET_SHIFT) - 1) : -1;
+	asm volatile(UNLOCK_LOCK_PREFIX "inc" UNLOCK_SUFFIX(0) " %0"
+		     : "+m" (lock->tickets.head)
+		     : : "memory", "cc");
+	ticket = (__ticket_t)(ticket + 1);
+	return ticket != lock->tickets.tail ? ticket : -1;
 }
 
 static unsigned int ticket_get(arch_spinlock_t *lock, struct spinning *prev)
 {
-	unsigned int token;
-	bool free;
+	struct __raw_tickets token = xadd(&lock->tickets,
+				 	  (struct __raw_tickets){ .tail = 1 });
 
-	__ticket_spin_lock_preamble;
-	token >>= TICKET_SHIFT;
-	return free ? token : spin_adjust(prev, lock, token);
+	return token.head == token.tail ? token.tail
+					: spin_adjust(prev, lock, token.tail);
 }
 
 void xen_spin_irq_enter(void)
@@ -165,7 +166,7 @@ void xen_spin_irq_enter(void)
 		 * Try to get a new ticket right away (to reduce latency after
 		 * the current lock was released), but don't acquire the lock.
 		 */
-		while (lock->cur == spinning->ticket) {
+		while (lock->tickets.head == spinning->ticket) {
 			unsigned int ticket = ticket_drop(spinning,
 							  spinning->ticket,
 							  cpu);
@@ -217,7 +218,7 @@ void xen_spin_irq_exit(void)
 }
 #endif
 
-bool xen_spin_wait(arch_spinlock_t *lock, unsigned int *ptok,
+bool xen_spin_wait(arch_spinlock_t *lock, struct __raw_tickets *ptok,
 		   unsigned int flags)
 {
 	typeof(vcpu_info(0)->evtchn_upcall_mask) upcall_mask
@@ -230,7 +231,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
 		return false;
 
 	/* announce we're spinning */
-	spinning.ticket = *ptok >> TICKET_SHIFT;
+	spinning.ticket = ptok->tail;
 	spinning.lock = lock;
 	spinning.prev = percpu_read(_spinning);
 #if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
@@ -257,7 +258,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
 		 * Check again to make sure it didn't become free while
 		 * we weren't looking.
 		 */
-		if (lock->cur == spinning.ticket) {
+		if (lock->tickets.head == spinning.ticket) {
 			/*
 			 * If we interrupted another spinlock while it was
 			 * blocking, make sure it doesn't block (again)
@@ -304,22 +305,22 @@ bool xen_spin_wait(arch_spinlock_t *lock
 	sequence(SEQ_REMOVE_BIAS);
 	arch_local_irq_restore(upcall_mask);
 	smp_rmb();
-	if (lock->cur == spinning.ticket)
+	if (lock->tickets.head == spinning.ticket)
 		return true;
 	BUG_ON(CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING && !(spinning.ticket + 1));
-	*ptok = lock->cur | (spinning.ticket << TICKET_SHIFT);
+	ptok->head = lock->tickets.head;
+	ptok->tail = spinning.ticket;
 
 	return false;
 }
 
-void xen_spin_kick(const arch_spinlock_t *lock, unsigned int token)
+void xen_spin_kick(const arch_spinlock_t *lock, unsigned int ticket)
 {
 	unsigned int cpu = raw_smp_processor_id(), anchor = cpu;
 
 	if (unlikely(!cpu_online(cpu)))
 		cpu = -1, anchor = nr_cpu_ids;
 
-	token &= (1U << TICKET_SHIFT) - 1;
 	while ((cpu = cpumask_next(cpu, cpu_online_mask)) != anchor) {
 		unsigned int flags;
 		atomic_t *rm_ctr;
@@ -356,11 +357,11 @@ void xen_spin_kick(const arch_spinlock_t
 
 		for (; spinning; spinning = spinning->prev)
 			if (spinning->lock == lock &&
-			    spinning->ticket == token) {
+			    spinning->ticket == ticket) {
 #if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
-				token = spinning->irq_count
+				ticket = spinning->irq_count
 					 < per_cpu(_irq_count, cpu)
-					 ? ticket_drop(spinning, token, cpu) : -2;
+					 ? ticket_drop(spinning, ticket, cpu) : -2;
 #endif
 				break;
 			}
@@ -370,9 +371,9 @@ void xen_spin_kick(const arch_spinlock_t
 
 		if (unlikely(spinning)) {
 #if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
-			if (!(token + 1))
+			if (!(ticket + 1))
 				return;
-			if (token + 2) {
+			if (ticket + 2) {
 				cpu = anchor < nr_cpu_ids ? anchor : -1;
 				continue;
 			}
--- 12.2.orig/drivers/xen/core/xen_proc.c	2011-04-04 08:44:27.000000000 +0200
+++ 12.2/drivers/xen/core/xen_proc.c	2011-11-18 15:45:37.000000000 +0100
@@ -1,5 +1,4 @@
 #include <linux/init.h>
-#include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <xen/xen_proc.h>
 
@@ -18,6 +17,8 @@ create_xen_proc_entry(const char *name, 
 }
 
 #ifdef MODULE
+#include <linux/export.h>
+
 EXPORT_SYMBOL_GPL(create_xen_proc_entry); 
 #elif defined(CONFIG_XEN_PRIVILEGED_GUEST)
 
--- 12.2.orig/drivers/xen/netback/loopback.c	2012-02-17 09:06:00.000000000 +0100
+++ 12.2/drivers/xen/netback/loopback.c	2012-02-17 09:14:49.000000000 +0100
@@ -109,7 +109,7 @@ static int skb_remove_foreign_references
 		return 0;
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		pfn = page_to_pfn(skb_shinfo(skb)->frags[i].page);
+		pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[i]));
 		if (!is_foreign(pfn))
 			continue;
 		
@@ -121,11 +121,11 @@ static int skb_remove_foreign_references
 		off = skb_shinfo(skb)->frags[i].page_offset;
 		memcpy(page_address(page) + off,
 		       vaddr + off,
-		       skb_shinfo(skb)->frags[i].size);
+		       skb_frag_size(&skb_shinfo(skb)->frags[i]));
 		kunmap_skb_frag(vaddr);
 
-		put_page(skb_shinfo(skb)->frags[i].page);
-		skb_shinfo(skb)->frags[i].page = page;
+		skb_frag_unref(skb, i);
+		skb_frag_set_page(skb, i, page);
 	}
 
 	return 1;
@@ -178,19 +178,10 @@ static const struct ethtool_ops network_
 	.get_link = ethtool_op_get_link,
 };
 
-/*
- * Nothing to do here. Virtual interface is point-to-point and the
- * physical interface is probably promiscuous anyway.
- */
-static void loopback_set_multicast_list(struct net_device *dev)
-{
-}
-
 static const struct net_device_ops loopback_netdev_ops = {
 	.ndo_open               = loopback_open,
 	.ndo_stop               = loopback_close,
 	.ndo_start_xmit         = loopback_start_xmit,
-	.ndo_set_multicast_list = loopback_set_multicast_list,
 	.ndo_change_mtu	        = NULL, /* allow arbitrary mtu */
 };
 
--- 12.2.orig/drivers/xen/netback/netback.c	2012-06-08 10:37:17.000000000 +0200
+++ 12.2/drivers/xen/netback/netback.c	2012-06-08 10:37:44.000000000 +0200
@@ -78,15 +78,18 @@ static struct timer_list netbk_tx_pendin
 
 #define MAX_PENDING_REQS 256
 
+/* Discriminate from any valid pending_idx value. */
+#define INVALID_PENDING_IDX 0xffff
+
 static struct sk_buff_head rx_queue;
 
 static struct page **mmap_pages;
-static inline unsigned long idx_to_pfn(unsigned int idx)
+static inline unsigned long idx_to_pfn(u16 idx)
 {
 	return page_to_pfn(mmap_pages[idx]);
 }
 
-static inline unsigned long idx_to_kaddr(unsigned int idx)
+static inline unsigned long idx_to_kaddr(u16 idx)
 {
 	return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
 }
@@ -110,6 +113,16 @@ static inline int netif_page_index(struc
 	return idx;
 }
 
+static u16 frag_get_pending_idx(const skb_frag_t *frag)
+{
+	return (u16)frag->page_offset;
+}
+
+static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
+{
+	frag->page_offset = pending_idx;
+}
+
 /*
  * This is the amount of packet we copy rather than map, so that the
  * guest can't fiddle with the contents of the headers while we do
@@ -247,9 +260,7 @@ static struct sk_buff *netbk_copy_skb(st
 		ret = skb_copy_bits(skb, offset, page_address(page), copy);
 		BUG_ON(ret);
 
-		ninfo->frags[ninfo->nr_frags].page = page;
-		ninfo->frags[ninfo->nr_frags].page_offset = 0;
-		ninfo->frags[ninfo->nr_frags].size = copy;
+		__skb_fill_page_desc(nskb, ninfo->nr_frags, page, 0, copy);
 		ninfo->nr_frags++;
 
 		offset += copy;
@@ -479,8 +490,8 @@ static void netbk_gop_skb(struct sk_buff
 		meta = npo->meta + npo->meta_prod++;
 		meta->frag = skb_shinfo(skb)->frags[i];
 		meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
-					  meta->frag.page,
-					  meta->frag.size,
+					  skb_frag_page(&meta->frag),
+					  skb_frag_size(&meta->frag),
 					  meta->frag.page_offset);
 	}
 
@@ -502,7 +513,7 @@ static inline void netbk_free_pages(int 
 	int i;
 
 	for (i = 0; i < nr_frags; i++)
-		put_page(meta[i].frag.page);
+		put_page(skb_frag_page(&meta[i].frag));
 }
 
 /* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
@@ -1070,11 +1081,11 @@ static gnttab_map_grant_ref_t *netbk_get
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	skb_frag_t *frags = shinfo->frags;
-	unsigned long pending_idx = *((u16 *)skb->data);
+	u16 pending_idx = *(u16 *)skb->data;
 	int i, start;
 
 	/* Skip first skb fragment if it is on same page as header fragment. */
-	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+	start = (frag_get_pending_idx(frags) == pending_idx);
 
 	for (i = start; i < shinfo->nr_frags; i++, txp++) {
 		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
@@ -1086,7 +1097,7 @@ static gnttab_map_grant_ref_t *netbk_get
 		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
 		netif_get(netif);
 		pending_tx_info[pending_idx].netif = netif;
-		frags[i].page = (void *)pending_idx;
+		frag_set_pending_idx(&frags[i], pending_idx);
 	}
 
 	return mop;
@@ -1096,7 +1107,7 @@ static int netbk_tx_check_mop(struct sk_
 			       gnttab_map_grant_ref_t **mopp)
 {
 	gnttab_map_grant_ref_t *mop = *mopp;
-	int pending_idx = *((u16 *)skb->data);
+	u16 pending_idx = *(u16 *)skb->data;
 	netif_t *netif = pending_tx_info[pending_idx].netif;
 	netif_tx_request_t *txp;
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -1117,12 +1128,12 @@ static int netbk_tx_check_mop(struct sk_
 	}
 
 	/* Skip first skb fragment if it is on same page as header fragment. */
-	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+	start = (frag_get_pending_idx(shinfo->frags) == pending_idx);
 
 	for (i = start; i < nr_frags; i++) {
 		int j, newerr;
 
-		pending_idx = (unsigned long)shinfo->frags[i].page;
+		pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
 
 		/* Check error status: if okay then remember grant handle. */
 		newerr = (++mop)->status;
@@ -1150,7 +1161,7 @@ static int netbk_tx_check_mop(struct sk_
 		pending_idx = *((u16 *)skb->data);
 		netif_idx_release(pending_idx);
 		for (j = start; j < i; j++) {
-			pending_idx = (unsigned long)shinfo->frags[j].page;
+			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
 			netif_idx_release(pending_idx);
 		}
 
@@ -1169,20 +1180,16 @@ static void netbk_fill_frags(struct sk_b
 	int i;
 
 	for (i = 0; i < nr_frags; i++) {
-		skb_frag_t *frag = shinfo->frags + i;
 		netif_tx_request_t *txp;
-		unsigned long pending_idx;
-
-		pending_idx = (unsigned long)frag->page;
+		u16 pending_idx = frag_get_pending_idx(shinfo->frags + i);
 
 		pending_inuse[pending_idx].alloc_time = jiffies;
 		list_add_tail(&pending_inuse[pending_idx].list,
 			      &pending_inuse_head);
 
 		txp = &pending_tx_info[pending_idx].req;
-		frag->page = mmap_pages[pending_idx];
-		frag->size = txp->size;
-		frag->page_offset = txp->offset;
+		__skb_fill_page_desc(skb, i, mmap_pages[pending_idx],
+				     txp->offset, txp->size);
 
 		skb->len += txp->size;
 		skb->data_len += txp->size;
@@ -1384,14 +1391,11 @@ static void net_tx_action(unsigned long 
 		__skb_put(skb, data_len);
 
 		skb_shinfo(skb)->nr_frags = ret;
-		if (data_len < txreq.size) {
+		if (data_len < txreq.size)
 			skb_shinfo(skb)->nr_frags++;
-			skb_shinfo(skb)->frags[0].page =
-				(void *)(unsigned long)pending_idx;
-		} else {
-			/* Discriminate from any valid pending_idx value. */
-			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
-		}
+		else
+			pending_idx = INVALID_PENDING_IDX;
+		frag_set_pending_idx(skb_shinfo(skb)->frags, pending_idx);
 
 		__skb_queue_tail(&tx_queue, skb);
 
--- 12.2.orig/drivers/xen/netback/xenbus.c	2012-01-06 11:00:01.000000000 +0100
+++ 12.2/drivers/xen/netback/xenbus.c	2011-11-18 15:49:36.000000000 +0100
@@ -18,7 +18,6 @@
 */
 
 #include <stdarg.h>
-#include <linux/module.h>
 #include <linux/rwsem.h>
 #include <xen/xenbus.h>
 #include "common.h"
--- 12.2.orig/drivers/xen/netfront/accel.c	2011-09-12 10:44:16.000000000 +0200
+++ 12.2/drivers/xen/netfront/accel.c	2011-11-21 15:05:50.000000000 +0100
@@ -32,6 +32,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/list.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
 #include <asm/hypervisor.h>
 #include <xen/xenbus.h>
--- 12.2.orig/drivers/xen/netfront/netfront.c	2012-06-08 10:37:24.000000000 +0200
+++ 12.2/drivers/xen/netfront/netfront.c	2012-06-08 10:37:47.000000000 +0200
@@ -749,7 +749,7 @@ no_skb:
 		}
 
 		skb_reserve(skb, 16 + NET_IP_ALIGN); /* mimic dev_alloc_skb() */
-		skb_shinfo(skb)->frags[0].page = page;
+		__skb_fill_page_desc(skb, 0, page, 0, 0);
 		skb_shinfo(skb)->nr_frags = 1;
 		__skb_queue_tail(&np->rx_batch, skb);
 	}
@@ -782,8 +782,9 @@ no_skb:
 		BUG_ON((signed short)ref < 0);
 		np->grant_rx_ref[id] = ref;
 
-		pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
-		vaddr = page_address(skb_shinfo(skb)->frags[0].page);
+		page = skb_frag_page(skb_shinfo(skb)->frags);
+		pfn = page_to_pfn(page);
+		vaddr = page_address(page);
 
 		req = RING_GET_REQUEST(&np->rx, req_prod + i);
 		if (!np->copying_receiver) {
@@ -910,13 +911,13 @@ static void xennet_make_frags(struct sk_
 		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
 		BUG_ON((signed short)ref < 0);
 
-		mfn = pfn_to_mfn(page_to_pfn(frag->page));
+		mfn = pfn_to_mfn(page_to_pfn(skb_frag_page(frag)));
 		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
 						mfn, GTF_readonly);
 
 		tx->gref = np->grant_tx_ref[id] = ref;
 		tx->offset = frag->page_offset;
-		tx->size = frag->size;
+		tx->size = skb_frag_size(frag);
 		tx->flags = 0;
 	}
 
@@ -1180,8 +1181,8 @@ static int xennet_get_responses(struct n
 
 			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 				/* Remap the page. */
-				struct page *page =
-					skb_shinfo(skb)->frags[0].page;
+				const struct page *page =
+					skb_frag_page(skb_shinfo(skb)->frags);
 				unsigned long pfn = page_to_pfn(page);
 				void *vaddr = page_address(page);
 
@@ -1247,23 +1248,21 @@ static RING_IDX xennet_fill_frags(struct
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	int nr_frags = shinfo->nr_frags;
 	RING_IDX cons = np->rx.rsp_cons;
-	skb_frag_t *frag = shinfo->frags + nr_frags;
 	struct sk_buff *nskb;
 
 	while ((nskb = __skb_dequeue(list))) {
 		struct netif_rx_response *rx =
 			RING_GET_RESPONSE(&np->rx, ++cons);
 
-		frag->page = skb_shinfo(nskb)->frags[0].page;
-		frag->page_offset = rx->offset;
-		frag->size = rx->status;
+		__skb_fill_page_desc(skb, nr_frags,
+				     skb_frag_page(skb_shinfo(nskb)->frags),
+				     rx->offset, rx->status);
 
 		skb->data_len += rx->status;
 
 		skb_shinfo(nskb)->nr_frags = 0;
 		kfree_skb(nskb);
 
-		frag++;
 		nr_frags++;
 	}
 
@@ -1369,7 +1368,8 @@ err:	
 			}
 		}
 
-		NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page;
+		NETFRONT_SKB_CB(skb)->page =
+			skb_frag_page(skb_shinfo(skb)->frags);
 		NETFRONT_SKB_CB(skb)->offset = rx->offset;
 
 		len = rx->status;
@@ -1380,10 +1380,11 @@ err:	
 		if (rx->status > len) {
 			skb_shinfo(skb)->frags[0].page_offset =
 				rx->offset + len;
-			skb_shinfo(skb)->frags[0].size = rx->status - len;
+			skb_frag_size_set(skb_shinfo(skb)->frags,
+					  rx->status - len);
 			skb->data_len = rx->status - len;
 		} else {
-			skb_shinfo(skb)->frags[0].page = NULL;
+			__skb_fill_page_desc(skb, 0, NULL, 0, 0);
 			skb_shinfo(skb)->nr_frags = 0;
 		}
 
@@ -1455,7 +1456,7 @@ err:	
 
 		memcpy(skb->data, vaddr + offset, skb_headlen(skb));
 
-		if (page != skb_shinfo(skb)->frags[0].page)
+		if (page != skb_frag_page(skb_shinfo(skb)->frags))
 			__free_page(page);
 
 		/* Ethernet work: Delayed to here as it peeks the header. */
@@ -1553,6 +1554,8 @@ static void netif_release_rx_bufs_flip(s
 	spin_lock_bh(&np->rx_lock);
 
 	for (id = 0; id < NET_RX_RING_SIZE; id++) {
+		struct page *page;
+
 		if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) {
 			unused++;
 			continue;
@@ -1564,8 +1567,9 @@ static void netif_release_rx_bufs_flip(s
 		np->grant_rx_ref[id] = GRANT_INVALID_REF;
 		add_id_to_freelist(np->rx_skbs, id);
 
+		page = skb_frag_page(skb_shinfo(skb)->frags);
+
 		if (0 == mfn) {
-			struct page *page = skb_shinfo(skb)->frags[0].page;
 			balloon_release_driver_page(page);
 			skb_shinfo(skb)->nr_frags = 0;
 			dev_kfree_skb(skb);
@@ -1575,7 +1579,6 @@ static void netif_release_rx_bufs_flip(s
 
 		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 			/* Remap the page. */
-			struct page *page = skb_shinfo(skb)->frags[0].page;
 			unsigned long pfn = page_to_pfn(page);
 			void *vaddr = page_address(page);
 
@@ -1827,23 +1830,23 @@ static int network_connect(struct net_de
 
 	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
 	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
+		unsigned long pfn;
+
 		if (!np->rx_skbs[i])
 			continue;
 
 		skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
 		ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
 		req = RING_GET_REQUEST(&np->rx, requeue_idx);
+		pfn = page_to_pfn(skb_frag_page(skb_shinfo(skb)->frags));
 
 		if (!np->copying_receiver) {
 			gnttab_grant_foreign_transfer_ref(
-				ref, np->xbdev->otherend_id,
-				page_to_pfn(skb_shinfo(skb)->frags->page));
+				ref, np->xbdev->otherend_id, pfn);
 		} else {
 			gnttab_grant_foreign_access_ref(
 				ref, np->xbdev->otherend_id,
-				pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
-						       frags->page)),
-				0);
+				pfn_to_mfn(pfn), 0);
 		}
 		req->gref = ref;
 		req->id   = requeue_idx;
@@ -2070,7 +2073,7 @@ static const struct net_device_ops xenne
 	.ndo_open               = network_open,
 	.ndo_stop               = network_close,
 	.ndo_start_xmit         = network_start_xmit,
-	.ndo_set_multicast_list = network_set_multicast_list,
+	.ndo_set_rx_mode        = network_set_multicast_list,
 	.ndo_set_mac_address    = xennet_set_mac_address,
 	.ndo_validate_addr      = eth_validate_addr,
 	.ndo_fix_features       = xennet_fix_features,
--- 12.2.orig/drivers/xen/pcifront/pci.c	2007-06-12 13:13:45.000000000 +0200
+++ 12.2/drivers/xen/pcifront/pci.c	2011-11-18 15:48:41.000000000 +0100
@@ -3,8 +3,6 @@
  *
  *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
  */
-#include <linux/module.h>
-#include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/spinlock.h>
 #include "pcifront.h"
--- 12.2.orig/drivers/xen/scsiback/xenbus.c	2011-06-30 17:04:59.000000000 +0200
+++ 12.2/drivers/xen/scsiback/xenbus.c	2011-11-18 15:49:42.000000000 +0100
@@ -31,7 +31,6 @@
  */
 
 #include <stdarg.h>
-#include <linux/module.h>
 #include <linux/kthread.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
--- 12.2.orig/drivers/xen/sfc_netfront/accel.h	2011-02-01 15:03:03.000000000 +0100
+++ 12.2/drivers/xen/sfc_netfront/accel.h	2011-11-21 17:11:02.000000000 +0100
@@ -445,8 +445,8 @@ int netfront_accel_vi_poll(netfront_acce
 			} else {					\
 				skb_frag_t *fragment;			\
 				fragment = &skb_shinfo(skb)->frags[frag_idx]; \
-				frag_len = fragment->size;		\
-				frag_data = ((void*)page_address(fragment->page) \
+				frag_len = skb_frag_size(fragment);	\
+				frag_data = ((void*)page_address(skb_frag_page(fragment)) \
 					     + fragment->page_offset);	\
 			};						\
 			frag_idx++;					\
--- 12.2.orig/drivers/xen/sfc_netfront/accel_netfront.c	2011-09-12 11:46:42.000000000 +0200
+++ 12.2/drivers/xen/sfc_netfront/accel_netfront.c	2011-11-21 16:35:49.000000000 +0100
@@ -22,6 +22,7 @@
  ****************************************************************************
  */
 
+#include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 
--- 12.2.orig/drivers/xen/sfc_netfront/accel_tso.c	2011-01-31 17:32:29.000000000 +0100
+++ 12.2/drivers/xen/sfc_netfront/accel_tso.c	2011-11-21 17:12:21.000000000 +0100
@@ -376,8 +376,8 @@ int netfront_accel_enqueue_skb_tso(netfr
 		BUG_ON(skb_shinfo(skb)->nr_frags < 1);
 		frag_i = 0;
 		f = &skb_shinfo(skb)->frags[frag_i];
-		tso_get_fragment(&state, f->size, 
-				 page_address(f->page) + f->page_offset);
+		tso_get_fragment(&state, skb_frag_size(f),
+				 page_address(skb_frag_page(f)) + f->page_offset);
 	} else {
 		int hl = state.p.header_length;
 		tso_get_fragment(&state,  skb_headlen(skb) - hl, 
@@ -400,8 +400,8 @@ int netfront_accel_enqueue_skb_tso(netfr
 				/* End of payload reached. */
 				break;
 			f = &skb_shinfo(skb)->frags[frag_i];
-			tso_get_fragment(&state, f->size,
-					 page_address(f->page) +
+			tso_get_fragment(&state, skb_frag_size(f),
+					 page_address(skb_frag_page(f)) +
 					 f->page_offset);
 		}
 
--- 12.2.orig/drivers/xen/sfc_netfront/accel_vi.c	2011-06-30 17:09:25.000000000 +0200
+++ 12.2/drivers/xen/sfc_netfront/accel_vi.c	2011-11-21 17:10:44.000000000 +0100
@@ -482,8 +482,9 @@ netfront_accel_enqueue_skb_multi(netfron
 				/* End of payload reached. */
 				break;
 			f = &skb_shinfo(skb)->frags[frag_i];
-			state.ifc.len = f->size;
-			state.ifc.addr = page_address(f->page) + f->page_offset;
+			state.ifc.len = skb_frag_size(f);
+			state.ifc.addr = page_address(skb_frag_page(f))
+					 + f->page_offset;
 		}
 
 		/* Start a new buffer? */
--- 12.2.orig/drivers/xen/sfc_netutil/accel_util.c	2011-02-01 15:03:03.000000000 +0100
+++ 12.2/drivers/xen/sfc_netutil/accel_util.c	2011-11-28 10:44:58.000000000 +0100
@@ -24,7 +24,7 @@
 
 #include <linux/slab.h>
 #include <linux/if_ether.h>
-#include <linux/delay.h>
+#include <linux/module.h>
 #include <asm/io.h>
 #include <asm/pgtable.h>
 #include <asm/hypercall.h>
@@ -154,7 +154,7 @@ static void *net_accel_map_grants_valloc
 	void *addr;
 	int i, j, rc;
 
-	vm  = alloc_vm_area(PAGE_SIZE * npages);
+	vm  = alloc_vm_area(PAGE_SIZE * npages, NULL);
 	if (vm == NULL) {
 		EPRINTK("No memory from alloc_vm_area.\n");
 		return NULL;
--- 12.2.orig/drivers/xen/tpmback/xenbus.c	2011-04-11 15:04:27.000000000 +0200
+++ 12.2/drivers/xen/tpmback/xenbus.c	2011-11-18 15:50:14.000000000 +0100
@@ -17,7 +17,6 @@
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 #include <stdarg.h>
-#include <linux/module.h>
 #include <xen/xenbus.h>
 #include "common.h"
 
--- 12.2.orig/drivers/xen/xenbus/xenbus_backend_client.c	2011-01-31 17:49:31.000000000 +0100
+++ 12.2/drivers/xen/xenbus/xenbus_backend_client.c	2011-11-28 10:42:02.000000000 +0100
@@ -31,7 +31,7 @@
  */
 
 #include <linux/err.h>
-#include <linux/delay.h>
+#include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <xen/gnttab.h>
 #include <xen/xenbus.h>
@@ -42,7 +42,7 @@ struct vm_struct *xenbus_map_ring_valloc
 	struct gnttab_map_grant_ref op;
 	struct vm_struct *area;
 
-	area = alloc_vm_area(PAGE_SIZE);
+	area = alloc_vm_area(PAGE_SIZE, NULL);
 	if (!area)
 		return ERR_PTR(-ENOMEM);
 
--- 12.2.orig/drivers/xen/xenbus/xenbus_client.c	2011-09-12 12:00:32.000000000 +0200
+++ 12.2/drivers/xen/xenbus/xenbus_client.c	2011-11-28 10:14:06.000000000 +0100
@@ -37,7 +37,9 @@
 #else
 #include <linux/types.h>
 #include <linux/vmalloc.h>
+#include <linux/export.h>
 #include <asm/xen/hypervisor.h>
+#include <asm/xen/page.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/event_channel.h>
 #include <xen/events.h>
@@ -449,25 +451,26 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
 int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t gnt_ref, void **vaddr)
 {
 	struct gnttab_map_grant_ref op = {
-		.flags = GNTMAP_host_map,
+		.flags = GNTMAP_host_map | GNTMAP_contains_pte,
 		.ref   = gnt_ref,
 		.dom   = dev->otherend_id,
 	};
 	struct vm_struct *area;
+	pte_t *pte;
 
 	*vaddr = NULL;
 
-	area = xen_alloc_vm_area(PAGE_SIZE);
+	area = alloc_vm_area(PAGE_SIZE, &pte);
 	if (!area)
 		return -ENOMEM;
 
-	op.host_addr = (unsigned long)area->addr;
+	op.host_addr = arbitrary_virt_to_machine(pte).maddr;
 
 	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
 		BUG();
 
 	if (op.status != GNTST_okay) {
-		xen_free_vm_area(area);
+		free_vm_area(area);
 		xenbus_dev_fatal(dev, op.status,
 				 "mapping in shared page %d from domain %d",
 				 gnt_ref, dev->otherend_id);
@@ -540,6 +543,7 @@ int xenbus_unmap_ring_vfree(struct xenbu
 	struct gnttab_unmap_grant_ref op = {
 		.host_addr = (unsigned long)vaddr,
 	};
+	unsigned int level;
 
 	/* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
 	 * method so that we don't have to muck with vmalloc internals here.
@@ -561,12 +565,14 @@ int xenbus_unmap_ring_vfree(struct xenbu
 	}
 
 	op.handle = (grant_handle_t)area->phys_addr;
+	op.host_addr = arbitrary_virt_to_machine(
+		lookup_address((unsigned long)vaddr, &level)).maddr;
 
 	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
 		BUG();
 
 	if (op.status == GNTST_okay)
-		xen_free_vm_area(area);
+		free_vm_area(area);
 	else
 		xenbus_dev_error(dev, op.status,
 				 "unmapping page at handle %d error %d",
--- 12.2.orig/drivers/xen/xenbus/xenbus_probe.c	2012-03-22 14:23:46.000000000 +0100
+++ 12.2/drivers/xen/xenbus/xenbus_probe.c	2012-03-12 13:54:30.000000000 +0100
@@ -48,6 +48,7 @@
 #include <linux/mutex.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -1215,6 +1216,45 @@ fail0:
 #endif
 #endif /* CONFIG_XEN_PRIVILEGED_GUEST */
 
+/* Set up event channel for xenstored which is run as a local process
+ * (this is normally used only in dom0)
+ */
+static int __init xenstored_local_init(void)
+{
+	int err = 0;
+	unsigned long page = 0;
+	struct evtchn_alloc_unbound alloc_unbound;
+
+	/* Allocate Xenstore page */
+	page = get_zeroed_page(GFP_KERNEL);
+	if (!page)
+		goto out_err;
+
+	xen_store_mfn = xen_start_info->store_mfn =
+		pfn_to_mfn(virt_to_phys((void *)page) >>
+			   PAGE_SHIFT);
+
+	/* Next allocate a local port which xenstored can bind to */
+	alloc_unbound.dom        = DOMID_SELF;
+	alloc_unbound.remote_dom = DOMID_SELF;
+
+	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+					  &alloc_unbound);
+	if (err == -ENOSYS)
+		goto out_err;
+
+	BUG_ON(err);
+	xen_store_evtchn = xen_start_info->store_evtchn =
+		alloc_unbound.port;
+
+	return 0;
+
+ out_err:
+	if (page != 0)
+		free_page(page);
+	return err;
+}
+
 #ifndef MODULE
 static int __init
 #else
@@ -1223,7 +1263,6 @@ int __devinit
 xenbus_init(void)
 {
 	int err = 0;
-	unsigned long page = 0;
 
 	DPRINTK("");
 
@@ -1237,35 +1276,14 @@ xenbus_init(void)
 		pr_warning("XENBUS: Error registering frontend bus: %i\n",
 			   xenbus_frontend.error);
 	xenbus_backend_bus_register();
-#endif
 
 	/*
 	 * Domain0 doesn't have a store_evtchn or store_mfn yet.
 	 */
 	if (is_initial_xendomain()) {
-		struct evtchn_alloc_unbound alloc_unbound;
-
-		/* Allocate Xenstore page */
-		page = get_zeroed_page(GFP_KERNEL);
-		if (!page)
-			return -ENOMEM;
-
-		xen_store_mfn = xen_start_info->store_mfn =
-			pfn_to_mfn(virt_to_phys((void *)page) >>
-				   PAGE_SHIFT);
-
-		/* Next allocate a local port which xenstored can bind to */
-		alloc_unbound.dom        = DOMID_SELF;
-		alloc_unbound.remote_dom = DOMID_SELF;
-
-		err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
-						  &alloc_unbound);
-		if (err == -ENOSYS)
-			goto err;
-
-		BUG_ON(err);
-		xen_store_evtchn = xen_start_info->store_evtchn =
-			alloc_unbound.port;
+		err = xenstored_local_init();
+		if (err)
+			goto out_error;
 
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
 		/* And finally publish the above info in /proc/xen */
@@ -1283,44 +1301,58 @@ xenbus_init(void)
 #endif
 		xen_store_interface = mfn_to_virt(xen_store_mfn);
 	} else {
-#if !defined(CONFIG_XEN) && !defined(MODULE)
-		if (xen_hvm_domain()) {
-#endif
 #ifndef CONFIG_XEN
-			uint64_t v = 0;
+		uint64_t v = 0;
 
-			err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
-			if (err)
-				goto err;
-			xen_store_evtchn = (int)v;
-			err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
-			if (err)
-				goto err;
-			xen_store_mfn = (unsigned long)v;
-			xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT,
-						      PAGE_SIZE);
-#endif
-#if !defined(CONFIG_XEN) && !defined(MODULE)
-		} else {
+		err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
+		if (err)
+			goto out_error;
+		xen_store_evtchn = (int)v;
+		err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+		if (err)
+			goto out_error;
+		xen_store_mfn = (unsigned long)v;
+		xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT,
+					      PAGE_SIZE);
 #endif
 #ifndef MODULE
-			xen_store_evtchn = xen_start_info->store_evtchn;
-			xen_store_mfn = xen_start_info->store_mfn;
-			xen_store_interface = mfn_to_virt(xen_store_mfn);
-#endif
-#if !defined(CONFIG_XEN) && !defined(MODULE)
-		}
+		xen_store_evtchn = xen_start_info->store_evtchn;
+		xen_store_mfn = xen_start_info->store_mfn;
+		xen_store_interface = mfn_to_virt(xen_store_mfn);
 #endif
 		atomic_set(&xenbus_xsd_state, XENBUS_XSD_FOREIGN_READY);
 
 		/* Initialize the shared memory rings to talk to xenstored */
 		err = xb_init_comms();
 		if (err)
-			goto err;
+			goto out_error;
 	}
 
-#if defined(CONFIG_XEN) || defined(MODULE)
 	xenbus_dev_init();
+#else /* !defined(CONFIG_XEN) && !defined(MODULE) */
+	if (xen_hvm_domain()) {
+		uint64_t v = 0;
+		err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
+		if (err)
+			goto out_error;
+		xen_store_evtchn = (int)v;
+		err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+		if (err)
+			goto out_error;
+		xen_store_mfn = (unsigned long)v;
+		xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
+	} else {
+		xen_store_evtchn = xen_start_info->store_evtchn;
+		xen_store_mfn = xen_start_info->store_mfn;
+		if (xen_store_evtchn)
+			atomic_set(&xenbus_xsd_state, XENBUS_XSD_FOREIGN_READY);
+		else {
+			err = xenstored_local_init();
+			if (err)
+				goto out_error;
+		}
+		xen_store_interface = mfn_to_virt(xen_store_mfn);
+	}
 #endif
 
 	/* Initialize the interface to xenstore. */
@@ -1328,7 +1360,7 @@ xenbus_init(void)
 	if (err) {
 		pr_warning("XENBUS: Error initializing xenstore comms: %i\n",
 			   err);
-		goto err;
+		goto out_error;
 	}
 
 #if defined(CONFIG_XEN) || defined(MODULE)
@@ -1357,16 +1389,12 @@ xenbus_init(void)
 
 	return 0;
 
- err:
+out_error:
 	/*
 	 * Do not unregister the xenbus front/backend buses here. The buses
 	 * must exist because front/backend drivers will use them when they are
 	 * registered.
 	 */
-
-	if (page != 0)
-		free_page(page);
-
 	return err;
 }
 
--- 12.2.orig/drivers/xen/xenbus/xenbus_probe_backend.c	2012-03-22 14:23:42.000000000 +0100
+++ 12.2/drivers/xen/xenbus/xenbus_probe_backend.c	2011-11-17 15:56:06.000000000 +0100
@@ -44,6 +44,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/notifier.h>
+#include <linux/export.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -116,8 +117,6 @@ static int xenbus_uevent_backend(struct 
 
 	xdev = to_xenbus_device(dev);
 	bus = container_of(xdev->dev.bus, struct xen_bus_type, bus);
-	if (xdev == NULL)
-		return -ENODEV;
 
 	if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))
 		return -ENOMEM;
--- 12.2.orig/drivers/xen/xenbus/xenbus_xs.c	2012-03-22 14:09:34.000000000 +0100
+++ 12.2/drivers/xen/xenbus/xenbus_xs.c	2012-03-12 16:18:49.000000000 +0100
@@ -45,6 +45,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <xen/xenbus.h>
+#include <xen/xen.h>
 #include "xenbus_comms.h"
 
 #ifdef HAVE_XEN_PLATFORM_COMPAT_H
--- 12.2.orig/include/xen/balloon.h	2012-02-03 13:34:56.000000000 +0100
+++ 12.2/include/xen/balloon.h	2012-02-03 13:44:44.000000000 +0100
@@ -82,8 +82,9 @@ extern struct balloon_stats balloon_stat
 
 void balloon_set_new_target(unsigned long target);
 
-int alloc_xenballooned_pages(int nr_pages, struct page** pages);
-void free_xenballooned_pages(int nr_pages, struct page** pages);
+int alloc_xenballooned_pages(int nr_pages, struct page **pages,
+		bool highmem);
+void free_xenballooned_pages(int nr_pages, struct page **pages);
 
 #endif /* CONFIG_PARAVIRT_XEN */
 
--- 12.2.orig/include/xen/blkif.h	2012-04-04 08:57:09.000000000 +0200
+++ 12.2/include/xen/blkif.h	2012-04-04 10:27:25.000000000 +0200
@@ -46,12 +46,21 @@ struct blkif_x86_32_request {
 	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
 	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
+struct blkif_x86_32_discard {
+	uint8_t        operation;    /* BLKIF_OP_DISCARD                     */
+	uint8_t        reserved;     /*                                      */
+	blkif_vdev_t   handle;       /* same as for read/write requests      */
+	uint64_t       id;           /* private guest value, echoed in resp  */
+	blkif_sector_t sector_number;/* start sector idx on disk             */
+	uint64_t       nr_sectors;   /* number of contiguous sectors         */
+};
 struct blkif_x86_32_response {
 	uint64_t        id;              /* copied from request */
 	uint8_t         operation;       /* copied from request */
 	int16_t         status;          /* BLKIF_RSP_???       */
 };
 typedef struct blkif_x86_32_request blkif_x86_32_request_t;
+typedef struct blkif_x86_32_discard blkif_x86_32_discard_t;
 typedef struct blkif_x86_32_response blkif_x86_32_response_t;
 #pragma pack(pop)
 
@@ -64,12 +73,21 @@ struct blkif_x86_64_request {
 	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
 	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
+struct blkif_x86_64_discard {
+	uint8_t        operation;    /* BLKIF_OP_DISCARD                     */
+	uint8_t        reserved;     /*                                      */
+	blkif_vdev_t   handle;       /* sane as for read/write requests      */
+	uint64_t       __attribute__((__aligned__(8))) id;
+	blkif_sector_t sector_number;/* start sector idx on disk             */
+	uint64_t       nr_sectors;   /* number of contiguous sectors         */
+};
 struct blkif_x86_64_response {
 	uint64_t       __attribute__((__aligned__(8))) id;
 	uint8_t         operation;       /* copied from request */
 	int16_t         status;          /* BLKIF_RSP_???       */
 };
 typedef struct blkif_x86_64_request blkif_x86_64_request_t;
+typedef struct blkif_x86_64_discard blkif_x86_64_discard_t;
 typedef struct blkif_x86_64_response blkif_x86_64_response_t;
 
 #define blkif_native_sring blkif_sring
@@ -100,6 +118,13 @@ static void inline blkif_get_x86_32_req(
 	dst->id = src->id;
 	dst->sector_number = src->sector_number;
 	barrier();
+	if (unlikely(dst->operation == BLKIF_OP_DISCARD)) {
+		blkif_request_discard_t *d = (void *)dst;
+		const blkif_x86_32_discard_t *s = (const void *)src;
+
+		d->nr_sectors = s->nr_sectors;
+		return;
+	}
 	if (n > dst->nr_segments)
 		n = dst->nr_segments;
 	for (i = 0; i < n; i++)
@@ -115,6 +140,13 @@ static void inline blkif_get_x86_64_req(
 	dst->id = src->id;
 	dst->sector_number = src->sector_number;
 	barrier();
+	if (unlikely(dst->operation == BLKIF_OP_DISCARD)) {
+		blkif_request_discard_t *d = (void *)dst;
+		const blkif_x86_64_discard_t *s = (const void *)src;
+
+		d->nr_sectors = s->nr_sectors;
+		return;
+	}
 	if (n > dst->nr_segments)
 		n = dst->nr_segments;
 	for (i = 0; i < n; i++)
--- 12.2.orig/include/xen/interface/io/blkif.h	2012-04-04 10:26:18.000000000 +0200
+++ 12.2/include/xen/interface/io/blkif.h	2012-04-04 10:28:27.000000000 +0200
@@ -440,11 +440,16 @@ struct blkif_request {
             blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
             struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
         } rw;
+        struct blkif_request_discard {
+            blkif_sector_t sector_number;
+            uint64_t nr_sectors;
+        } discard;
     } u;
 #endif
 };
 typedef struct blkif_request blkif_request_t;
 
+#if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
 /*
  * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD
  * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request)
@@ -459,6 +464,7 @@ struct blkif_request_discard {
     uint64_t       nr_sectors;   /* number of contiguous sectors to discard*/
 };
 typedef struct blkif_request_discard blkif_request_discard_t;
+#endif
 
 struct blkif_response {
     uint64_t        id;              /* copied from request */
--- 12.2.orig/include/xen/interface/platform.h	2012-04-10 15:59:27.000000000 +0200
+++ 12.2/include/xen/interface/platform.h	2011-11-17 17:36:32.000000000 +0100
@@ -349,6 +349,7 @@ struct xen_processor_csd {
 	uint32_t    coord_type;  /* coordination type */
 	uint32_t    num;         /* number of processors in same domain */
 };
+DEFINE_GUEST_HANDLE_STRUCT(xen_processor_csd);
 typedef struct xen_processor_csd xen_processor_csd_t;
 DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t);
 
@@ -360,6 +361,7 @@ struct xen_processor_cx {
 	uint32_t    dpcnt;    /* number of dependency entries */
 	XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */
 };
+DEFINE_GUEST_HANDLE_STRUCT(xen_processor_cx);
 typedef struct xen_processor_cx xen_processor_cx_t;
 DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t);
 
@@ -395,6 +397,7 @@ struct xen_processor_px {
    	uint64_t control;        /* control value */
    	uint64_t status;     /* success indicator */
 };
+DEFINE_GUEST_HANDLE_STRUCT(xen_processor_px);
 typedef struct xen_processor_px xen_processor_px_t;
 DEFINE_XEN_GUEST_HANDLE(xen_processor_px_t);
 
@@ -416,6 +419,7 @@ struct xen_processor_performance {
 	struct xen_psd_package domain_info;
 	uint32_t shared_type;     /* coordination type of this processor */
 };
+DEFINE_GUEST_HANDLE_STRUCT(xen_processor_performance);
 typedef struct xen_processor_performance xen_processor_performance_t;
 DEFINE_XEN_GUEST_HANDLE(xen_processor_performance_t);
 
@@ -427,8 +431,13 @@ struct xenpf_set_processor_pminfo {
 		struct xen_processor_power          power;/* Cx: _CST/_CSD */
 		struct xen_processor_performance    perf; /* Px: _PPC/_PCT/_PSS/_PSD */
 		XEN_GUEST_HANDLE(uint32)            pdc;  /* _PDC */
+#ifdef CONFIG_XEN
 	} u;
+#else
+	};
+#endif
 };
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);
 typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t);
 
--- 12.2.orig/include/xen/xenbus.h	2011-04-13 15:43:04.000000000 +0200
+++ 12.2/include/xen/xenbus.h	2011-12-21 11:37:00.000000000 +0100
@@ -37,6 +37,7 @@
 #include <linux/device.h>
 #include <linux/notifier.h>
 #include <linux/mutex.h>
+#include <linux/export.h>
 #include <linux/completion.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -167,9 +168,9 @@ int xenbus_scanf(struct xenbus_transacti
 	__attribute__((format(scanf, 4, 5)));
 
 /* Single printf and write: returns -errno or 0. */
+__printf(4, 5)
 int xenbus_printf(struct xenbus_transaction t,
-		  const char *dir, const char *node, const char *fmt, ...)
-	__attribute__((format(printf, 4, 5)));
+		  const char *dir, const char *node, const char *fmt, ...);
 
 /* Generic read function: NULL-terminated triples of name,
  * sprintf-style type string, and pointer. Returns 0 or errno.*/
@@ -236,11 +237,11 @@ int xenbus_watch_path2(struct xenbus_dev
 		       void (*callback)(struct xenbus_watch *,
 					const char **, unsigned int));
 #else
+__printf(4, 5)
 int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
 			 void (*callback)(struct xenbus_watch *,
 					  const char **, unsigned int),
-			 const char *pathfmt, ...)
-	__attribute__ ((format (printf, 4, 5)));
+			 const char *pathfmt, ...);
 #endif
 
 /**
@@ -316,8 +317,7 @@ enum xenbus_state xenbus_read_driver_sta
  * formatted message.
  */
 void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt,
-		      ...) __attribute__((__format__(__printf__, 3, 4)));
-
+		      ...) __printf(3, 4);
 
 /***
  * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
@@ -325,7 +325,7 @@ void xenbus_dev_error(struct xenbus_devi
  * closedown of this driver and its peer.
  */
 void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
-		      ...) __attribute__((__format__(__printf__, 3, 4)));
+		      ...) __printf(3, 4);
 
 #if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
 int xenbus_dev_init(void);
--- 12.2.orig/mm/vmalloc.c	2011-09-07 16:06:16.000000000 +0200
+++ 12.2/mm/vmalloc.c	2012-06-20 12:18:25.000000000 +0200
@@ -2230,6 +2230,17 @@ struct vm_struct *alloc_vm_area(size_t s
 		return NULL;
 	}
 
+#ifdef CONFIG_XEN
+	/*
+	 * If the allocated address space is passed to a hypercall before
+	 * being used then we cannot rely on a page fault to trigger an update
+	 * of the page tables.  So sync all the page tables here unless the
+	 * caller is going to have the affected PTEs updated directly.
+	 */
+	if (!ptes)
+		vmalloc_sync_all();
+#endif
+
 	return area;
 }
 EXPORT_SYMBOL_GPL(alloc_vm_area);